import express from "express"; import * as cheerio from "cheerio"; import { randomUUID } from "crypto"; import axios from "axios"; import archiver from "archiver"; import mime from "mime"; import { rateLimit } from "express-rate-limit"; import "dotenv/config"; const app = express(); const PORT = 3000; const useRateLimit = process.env.USE_RATE_LIMIT === "true"; const getImagesLimiter = rateLimit({ windowMs: 60 * 1000, // 1 min limit: 100, standardHeaders: "draft-8", legacyHeaders: false, }); const downloadLimiter = rateLimit({ windowMs: 60 * 1000, // 1 min limit: 5, standardHeaders: "draft-8", legacyHeaders: false, }); const headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", "Accept-Language": "en-US,en;q=0.9", }; let cachedImagesUrls = {}; app.set("trust proxy", 1); app.use(express.static("public")); if (useRateLimit) { app.use("/api/getImagesURL", getImagesLimiter); app.use("/api/downloadImages", downloadLimiter); } app.listen(PORT, () => { console.log(`Server launched on http://localhost:${PORT}`); }); app.get("/api/getImagesURL", async (req, res) => { try { let { q, offset = 1, count = 1000, smart = true } = req.query; offset = Number(offset); count = Number(count); smart = smart === "true"; if (!q) { return res.status(400).send("Missing 'q' parameter"); } let imagesUrls = []; let noNewCount = 0; do { const url = `https://www.bing.com/images/async?q=${encodeURIComponent(q)}&first=${String(offset)}`; const response = await fetch(url, { headers: headers }); const html = await response.text(); const urls = extractImageUrls(html); offset += urls.length; let newImageCount = 0; for (const url of urls) { if (!imagesUrls.includes(url)) { if (smart == true) { try { const response = await fetch(url, { method: "HEAD", headers: headers }); const contentType = response.headers.get("content-type"); if (contentType && contentType.startsWith("image/")) { imagesUrls.push(url); newImageCount += 1; } } catch {} } else { imagesUrls.push(url); newImageCount += 1; } } } noNewCount = newImageCount == 0 ? noNewCount + 1 : 0; if (noNewCount > 32) { break; } } while (imagesUrls.length < count); imagesUrls = imagesUrls.slice(0, count); const uuid = randomUUID(); cachedImagesUrls[uuid] = imagesUrls; setTimeout( () => { delete cachedImagesUrls[uuid]; }, 10 * 60 * 1000, ); res.send({ uuid: uuid, urls: imagesUrls }); } catch (error) { console.error(error); } }); app.get("/api/downloadImages", async (req, res) => { try { let { uuid } = req.query; const imagesUrls = cachedImagesUrls[uuid]; if (!imagesUrls) { return res.status(400).send("Invalid 'uuid'"); } res.setHeader("Content-Type", "application/zip"); res.setHeader("Content-Disposition", `attachment; filename=${uuid}.zip`); const archive = archiver("zip", { zlib: { level: 9 } }); archive.pipe(res); let errorNumber = 0; for (let i = 0; i < imagesUrls.length; i++) { const url = imagesUrls[i]; try { const response = await axios.get(url, { responseType: "stream", timeout: 5000, headers: headers }); const contentType = response.headers["content-type"]; const extension = mime.getExtension(contentType) || url.split(".").pop(); archive.append(response.data, { name: `image-${i + 1 - errorNumber}.${extension}` }); //console.log(`image downloaded ${url}`); } catch (error) { errorNumber += 1; console.warn(`Unable to download image ${url} : ${error}`); } } await archive.finalize(); } catch (error) { console.error(error); } }); function extractImageUrls(html) { const document = cheerio.load(html); const urls = []; const imagesTags = document("a.iusc").toArray(); for (const tag of imagesTags) { const metadataString = document(tag).attr("m"); const imageData = JSON.parse(metadataString); let url = new URL(imageData.murl); url = url.origin + url.pathname; urls.push(url); } return urls; }