import { PlaywrightCrawler } from 'crawlee'
// const proxyConfiguration = new ProxyConfiguration({
// proxyUrls: [
// '...'
// ],
// })
const crawler: PlaywrightCrawler = new PlaywrightCrawler({
launchContext: {
launchOptions: {
headless: false,
// channel: 'chrome',
// viewport: null,
},
},
// proxyConfiguration,
maxRequestRetries: 0,
maxRequestsPerCrawl: 5,
sessionPoolOptions: {
blockedStatusCodes: [],
},
async requestHandler({ request, page, log }) {
log.info(`Processing ${request.url}...`)
await page.waitForTimeout(100000)
},
failedRequestHandler({ request, log }) {
log.info(`Request ${request.url} failed too many times.`)
},
// browserPoolOptions: {
// useFingerprints: false,
// },
})
await crawler.addRequests([
'https://abrahamjuliot.github.io/creepjs/'
])
await crawler.run()
console.log('Crawler finished.')
import { PlaywrightCrawler } from 'crawlee'
// const proxyConfiguration = new ProxyConfiguration({
// proxyUrls: [
// '...'
// ],
// })
const crawler: PlaywrightCrawler = new PlaywrightCrawler({
launchContext: {
launchOptions: {
headless: false,
// channel: 'chrome',
// viewport: null,
},
},
// proxyConfiguration,
maxRequestRetries: 0,
maxRequestsPerCrawl: 5,
sessionPoolOptions: {
blockedStatusCodes: [],
},
async requestHandler({ request, page, log }) {
log.info(`Processing ${request.url}...`)
await page.waitForTimeout(100000)
},
failedRequestHandler({ request, log }) {
log.info(`Request ${request.url} failed too many times.`)
},
// browserPoolOptions: {
// useFingerprints: false,
// },
})
await crawler.addRequests([
'https://abrahamjuliot.github.io/creepjs/'
])
await crawler.run()
console.log('Crawler finished.')