mirror of
https://github.com/Gabi-Zar/Images-Scrapper-JS.git
synced 2026-04-17 05:36:06 +02:00
Add headers and fix reverse proxy usage
This commit is contained in:
12
main.js
12
main.js
@@ -22,9 +22,13 @@ const downloadLimiter = rateLimit({
|
|||||||
standardHeaders: "draft-8",
|
standardHeaders: "draft-8",
|
||||||
legacyHeaders: false,
|
legacyHeaders: false,
|
||||||
});
|
});
|
||||||
|
const headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
};
|
||||||
let cachedImagesUrls = {};
|
let cachedImagesUrls = {};
|
||||||
|
|
||||||
|
app.set("trust proxy", 1);
|
||||||
app.use(express.static("public"));
|
app.use(express.static("public"));
|
||||||
if (useRateLimit) {
|
if (useRateLimit) {
|
||||||
app.use("/api/getImagesURL", getImagesLimiter);
|
app.use("/api/getImagesURL", getImagesLimiter);
|
||||||
@@ -50,7 +54,7 @@ app.get("/api/getImagesURL", async (req, res) => {
|
|||||||
let noNewCount = 0;
|
let noNewCount = 0;
|
||||||
do {
|
do {
|
||||||
const url = `https://www.bing.com/images/async?q=${encodeURIComponent(q)}&first=${String(offset)}`;
|
const url = `https://www.bing.com/images/async?q=${encodeURIComponent(q)}&first=${String(offset)}`;
|
||||||
const response = await fetch(url);
|
const response = await fetch(url, { headers: headers });
|
||||||
const html = await response.text();
|
const html = await response.text();
|
||||||
|
|
||||||
const urls = extractImageUrls(html);
|
const urls = extractImageUrls(html);
|
||||||
@@ -60,7 +64,7 @@ app.get("/api/getImagesURL", async (req, res) => {
|
|||||||
if (!imagesUrls.includes(url)) {
|
if (!imagesUrls.includes(url)) {
|
||||||
if (smart == true) {
|
if (smart == true) {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url, { method: "HEAD" });
|
const response = await fetch(url, { method: "HEAD", headers: headers });
|
||||||
const contentType = response.headers.get("content-type");
|
const contentType = response.headers.get("content-type");
|
||||||
if (contentType && contentType.startsWith("image/")) {
|
if (contentType && contentType.startsWith("image/")) {
|
||||||
imagesUrls.push(url);
|
imagesUrls.push(url);
|
||||||
@@ -114,7 +118,7 @@ app.get("/api/downloadImages", async (req, res) => {
|
|||||||
for (let i = 0; i < imagesUrls.length; i++) {
|
for (let i = 0; i < imagesUrls.length; i++) {
|
||||||
const url = imagesUrls[i];
|
const url = imagesUrls[i];
|
||||||
try {
|
try {
|
||||||
const response = await axios.get(url, { responseType: "stream", timeout: 5000 });
|
const response = await axios.get(url, { responseType: "stream", timeout: 5000, headers: headers });
|
||||||
const contentType = response.headers["content-type"];
|
const contentType = response.headers["content-type"];
|
||||||
const extension = mime.getExtension(contentType) || url.split(".").pop();
|
const extension = mime.getExtension(contentType) || url.split(".").pop();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user