diff --git a/src/lib/components/wishlist/WishlistItem.svelte b/src/lib/components/wishlist/WishlistItem.svelte
index 4944a4c..c2a2e44 100644
--- a/src/lib/components/wishlist/WishlistItem.svelte
+++ b/src/lib/components/wishlist/WishlistItem.svelte
@@ -66,9 +66,10 @@
{#if showImage && item.imageUrl}

e.currentTarget.src = item.imageUrl}
/>
{/if}
diff --git a/src/routes/api/image-proxy/+server.ts b/src/routes/api/image-proxy/+server.ts
new file mode 100644
index 0000000..6ce75f3
--- /dev/null
+++ b/src/routes/api/image-proxy/+server.ts
@@ -0,0 +1,44 @@
+import type { RequestHandler } from './$types';
+
+export const GET: RequestHandler = async ({ url }) => {
+ const imageUrl = url.searchParams.get('url');
+
+ if (!imageUrl) {
+ return new Response('Image URL is required', { status: 400 });
+ }
+
+ try {
+ // Fetch the image with proper headers to avoid blocking
+ const response = await fetch(imageUrl, {
+ headers: {
+ 'User-Agent':
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+ 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
+ 'Accept-Language': 'en-US,en;q=0.9',
+ 'Referer': new URL(imageUrl).origin,
+ 'Sec-Fetch-Dest': 'image',
+ 'Sec-Fetch-Mode': 'no-cors',
+ 'Sec-Fetch-Site': 'cross-site'
+ }
+ });
+
+ if (!response.ok) {
+ return new Response('Failed to fetch image', { status: response.status });
+ }
+
+ const contentType = response.headers.get('content-type') || 'image/jpeg';
+ const imageBuffer = await response.arrayBuffer();
+
+ // Return the image with appropriate headers
+ return new Response(imageBuffer, {
+ headers: {
+ 'Content-Type': contentType,
+ 'Cache-Control': 'public, max-age=86400', // Cache for 1 day
+ 'Access-Control-Allow-Origin': '*'
+ }
+ });
+ } catch (error) {
+ console.error('Image proxy error:', error);
+ return new Response('Failed to proxy image', { status: 500 });
+ }
+};
diff --git a/src/routes/api/scrape-images/+server.ts b/src/routes/api/scrape-images/+server.ts
index b075d70..a1400d3 100644
--- a/src/routes/api/scrape-images/+server.ts
+++ b/src/routes/api/scrape-images/+server.ts
@@ -11,7 +11,11 @@ export const POST: RequestHandler = async ({ request }) => {
try {
const response = await fetch(url, {
headers: {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+ 'User-Agent':
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
+ 'Accept-Language': 'en-US,en;q=0.9',
+ 'Referer': 'https://www.google.com/'
}
});
@@ -24,9 +28,13 @@ export const POST: RequestHandler = async ({ request }) => {
const origin = baseUrl.origin;
const imageUrls: string[] = [];
- const imgRegex = /
![]()
]+src="([^">]+)"/g;
- const ogImageRegex = /
]+property="og:image"[^>]+content="([^">]+)"/g;
- const twitterImageRegex = /
]+name="twitter:image"[^>]+content="([^">]+)"/g;
+ // Match various image source patterns
+ const imgRegex = /
![]()
]+src=["']([^"'>]+)["']/gi;
+ const srcsetRegex = /
![]()
]+srcset=["']([^"'>]+)["']/gi;
+ const dataSrcRegex = /
![]()
]+data-src=["']([^"'>]+)["']/gi;
+ const ogImageRegex = /
]+property=["']og:image["'][^>]+content=["']([^"'>]+)["']/gi;
+ const twitterImageRegex = /
]+name=["']twitter:image["'][^>]+content=["']([^"'>]+)["']/gi;
+ const jsonLdRegex = /"image"\s*:\s*"([^"]+)"/gi;
function toAbsoluteUrl(imgUrl: string): string {
if (imgUrl.startsWith('http')) {
@@ -43,32 +51,70 @@ export const POST: RequestHandler = async ({ request }) => {
let match;
+ // Priority 1: OpenGraph and Twitter meta tags (usually the best product images)
while ((match = ogImageRegex.exec(html)) !== null) {
imageUrls.push(toAbsoluteUrl(match[1]));
}
while ((match = twitterImageRegex.exec(html)) !== null) {
- imageUrls.push(toAbsoluteUrl(match[1]));
+ const url = toAbsoluteUrl(match[1]);
+ if (!imageUrls.includes(url)) {
+ imageUrls.push(url);
+ }
}
+ // Priority 2: JSON-LD structured data (common for e-commerce)
+ while ((match = jsonLdRegex.exec(html)) !== null) {
+ const url = toAbsoluteUrl(match[1]);
+ if (!imageUrls.includes(url)) {
+ imageUrls.push(url);
+ }
+ }
+
+ // Priority 3: data-src attributes (lazy loaded images)
+ while ((match = dataSrcRegex.exec(html)) !== null) {
+ const url = toAbsoluteUrl(match[1]);
+ if (!imageUrls.includes(url)) {
+ imageUrls.push(url);
+ }
+ }
+
+ // Priority 4: srcset attributes (responsive images)
+ while ((match = srcsetRegex.exec(html)) !== null) {
+ const srcsetValue = match[1];
+ // srcset can have multiple URLs with sizes, extract them
+ const srcsetUrls = srcsetValue.split(',').map((s) => {
+ const parts = s.trim().split(/\s+/);
+ return parts[0]; // Get the URL part before size descriptor
+ });
+ for (const srcsetUrl of srcsetUrls) {
+ const url = toAbsoluteUrl(srcsetUrl);
+ if (!imageUrls.includes(url)) {
+ imageUrls.push(url);
+ }
+ }
+ }
+
+ // Priority 5: Regular img src attributes
while ((match = imgRegex.exec(html)) !== null) {
- const imgUrl = match[1];
- const fullUrl = toAbsoluteUrl(imgUrl);
- if (!imageUrls.includes(fullUrl)) {
- imageUrls.push(fullUrl);
+ const url = toAbsoluteUrl(match[1]);
+ if (!imageUrls.includes(url)) {
+ imageUrls.push(url);
}
}
const filteredImages = imageUrls.filter(
(url) =>
- !url.includes('logo') &&
- !url.includes('icon') &&
- !url.includes('sprite') &&
+ !url.toLowerCase().includes('logo') &&
+ !url.toLowerCase().includes('icon') &&
+ !url.toLowerCase().includes('sprite') &&
+ !url.toLowerCase().includes('favicon') &&
!url.endsWith('.svg') &&
- url.length < 500
+ url.length < 1000 && // Increased limit for modern CDN URLs
+ !url.includes('data:image') // Skip data URLs
);
- return json({ images: filteredImages.slice(0, 20) });
+ return json({ images: filteredImages.slice(0, 30) });
} catch (error) {
return json({ error: 'Failed to scrape images' }, { status: 500 });
}