import { Dataset, PuppeteerCrawler, log, } from 'crawlee';
export const puppeteerCrawler = async (cbRouterHandler, links) => {
const crawler = new PuppeteerCrawler({
minConcurrency: 4,
maxConcurrency: 20,
maxRequestRetries: 3,
requestHandlerTimeoutSecs: 30,
headless: false,
requestHandler: cbRouterHandler,
preNavigationHooks: [
async (crawlingContext, gotoOptions) => {
gotoOptions.timeout = 15_000;
gotoOptions.waitUntil = 'networkidle2';
},
],
failedRequestHandler({ request, error }) {
log.error(`Request ${request.url} failed too many times.`);
},
});
await crawler.run(links);
await Dataset.exportToJSON('TEST');
};
import { Dataset, PuppeteerCrawler, log, } from 'crawlee';
export const puppeteerCrawler = async (cbRouterHandler, links) => {
const crawler = new PuppeteerCrawler({
minConcurrency: 4,
maxConcurrency: 20,
maxRequestRetries: 3,
requestHandlerTimeoutSecs: 30,
headless: false,
requestHandler: cbRouterHandler,
preNavigationHooks: [
async (crawlingContext, gotoOptions) => {
gotoOptions.timeout = 15_000;
gotoOptions.waitUntil = 'networkidle2';
},
],
failedRequestHandler({ request, error }) {
log.error(`Request ${request.url} failed too many times.`);
},
});
await crawler.run(links);
await Dataset.exportToJSON('TEST');
};