const crawler = new PlaywrightCrawler({
async requestHandler({ request, page, log, response }) { // This is triggered only once on the first 403 response
log.info(`Processing ${response?.status()} ${request.url}...`);
const cookies = await page.context().cookies();
console.log(cookies);
await page.waitForTimeout(10000);
},
headless: false,
retryOnBlocked: false,
sessionPoolOptions: {
blockedStatusCodes: [429], // Do not block 403
},
requestHandlerTimeoutSecs: 99999, // Sometimes, website does multiple redirects before the cookie is ready
maxRequestRetries: 0,
proxyConfiguration: new ProxyConfiguration({
proxyUrls: ['http://user:pass@host:port'],
}),
});
await crawler.run(['https://www.example.com']);
const crawler = new PlaywrightCrawler({
async requestHandler({ request, page, log, response }) { // This is triggered only once on the first 403 response
log.info(`Processing ${response?.status()} ${request.url}...`);
const cookies = await page.context().cookies();
console.log(cookies);
await page.waitForTimeout(10000);
},
headless: false,
retryOnBlocked: false,
sessionPoolOptions: {
blockedStatusCodes: [429], // Do not block 403
},
requestHandlerTimeoutSecs: 99999, // Sometimes, website does multiple redirects before the cookie is ready
maxRequestRetries: 0,
proxyConfiguration: new ProxyConfiguration({
proxyUrls: ['http://user:pass@host:port'],
}),
});
await crawler.run(['https://www.example.com']);