import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
function isValidUrl(urlString: string): boolean {
try {
const url = new URL(urlString);
if (!['http:', 'https:'].includes(url.protocol)) {
return false;
}
const hostname = url.hostname.toLowerCase();
if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1') {
return false;
}
return true;
} catch {
return false;
}
}
export const POST: RequestHandler = async ({ request }) => {
const { url } = await request.json();
if (!url) {
return json({ error: 'URL is required' }, { status: 400 });
}
if (!isValidUrl(url)) {
return json({ error: 'Invalid URL' }, { status: 400 });
}
try {
const response = await fetch(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache'
}
});
if (!response.ok) {
return json({ error: 'Failed to fetch URL' }, { status: 400 });
}
const html = await response.text();
const baseUrl = new URL(url);
const origin = baseUrl.origin;
const imageUrls: string[] = [];
function toAbsoluteUrl(imgUrl: string): string {
if (imgUrl.startsWith('http')) {
return imgUrl;
}
if (imgUrl.startsWith('//')) {
return `https:${imgUrl}`;
}
if (imgUrl.startsWith('/')) {
return `${origin}${imgUrl}`;
}
return `${origin}/${imgUrl}`;
}
function isLikelyProductImage(url: string): boolean {
const lower = url.toLowerCase();
const badPatterns = [
'logo', 'icon', 'sprite', 'favicon', 'banner', 'footer',
'header', 'background', 'pattern', 'placeholder', 'thumbnail-small',
'btn', 'button', 'menu', 'nav', 'navigation', 'social',
'instagram', 'facebook', 'twitter', 'linkedin', 'pinterest'
];
if (badPatterns.some(pattern => lower.includes(pattern))) {
return false;
}
if (url.endsWith('.svg')) {
return false;
}
if (lower.includes('data:image')) {
return false;
}
if (lower.includes('loading') || lower.includes('spinner') || lower.includes('skeleton')) {
return false;
}
return true;
}
let match;
// Priority 1: OpenGraph and Twitter meta tags (main product image)
const ogImageRegex = /]+property=["']og:image["'][^>]+content=["']([^"'>]+)["']/gi;
const twitterImageRegex = /]+name=["']twitter:image["'][^>]+content=["']([^"'>]+)["']/gi;
while ((match = ogImageRegex.exec(html)) !== null) {
const url = toAbsoluteUrl(match[1]);
if (isLikelyProductImage(url) && !imageUrls.includes(url)) {
imageUrls.push(url);
}
}
while ((match = twitterImageRegex.exec(html)) !== null) {
const url = toAbsoluteUrl(match[1]);
if (isLikelyProductImage(url) && !imageUrls.includes(url)) {
imageUrls.push(url);
}
}
// Priority 2: Look for JSON-LD structured data (very common in modern e-commerce)
const jsonLdRegex = /