initial production version
This commit is contained in:
75
src/routes/api/scrape-images/+server.ts
Normal file
75
src/routes/api/scrape-images/+server.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import { json } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
const { url } = await request.json();
|
||||
|
||||
if (!url) {
|
||||
return json({ error: 'URL is required' }, { status: 400 });
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return json({ error: 'Failed to fetch URL' }, { status: 400 });
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const baseUrl = new URL(url);
|
||||
const origin = baseUrl.origin;
|
||||
|
||||
const imageUrls: string[] = [];
|
||||
const imgRegex = /<img[^>]+src="([^">]+)"/g;
|
||||
const ogImageRegex = /<meta[^>]+property="og:image"[^>]+content="([^">]+)"/g;
|
||||
const twitterImageRegex = /<meta[^>]+name="twitter:image"[^>]+content="([^">]+)"/g;
|
||||
|
||||
function toAbsoluteUrl(imgUrl: string): string {
|
||||
if (imgUrl.startsWith('http')) {
|
||||
return imgUrl;
|
||||
}
|
||||
if (imgUrl.startsWith('//')) {
|
||||
return `https:${imgUrl}`;
|
||||
}
|
||||
if (imgUrl.startsWith('/')) {
|
||||
return `${origin}${imgUrl}`;
|
||||
}
|
||||
return `${origin}/${imgUrl}`;
|
||||
}
|
||||
|
||||
let match;
|
||||
|
||||
while ((match = ogImageRegex.exec(html)) !== null) {
|
||||
imageUrls.push(toAbsoluteUrl(match[1]));
|
||||
}
|
||||
|
||||
while ((match = twitterImageRegex.exec(html)) !== null) {
|
||||
imageUrls.push(toAbsoluteUrl(match[1]));
|
||||
}
|
||||
|
||||
while ((match = imgRegex.exec(html)) !== null) {
|
||||
const imgUrl = match[1];
|
||||
const fullUrl = toAbsoluteUrl(imgUrl);
|
||||
if (!imageUrls.includes(fullUrl)) {
|
||||
imageUrls.push(fullUrl);
|
||||
}
|
||||
}
|
||||
|
||||
const filteredImages = imageUrls.filter(
|
||||
(url) =>
|
||||
!url.includes('logo') &&
|
||||
!url.includes('icon') &&
|
||||
!url.includes('sprite') &&
|
||||
!url.endsWith('.svg') &&
|
||||
url.length < 500
|
||||
);
|
||||
|
||||
return json({ images: filteredImages.slice(0, 20) });
|
||||
} catch (error) {
|
||||
return json({ error: 'Failed to scrape images' }, { status: 500 });
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user