new PlaywrightCrawler({
autoscaledPoolOptions: {
autoscaleIntervalSecs: 5,
loggingIntervalSecs: null,
maxConcurrency: CONFIG.SOURCE_MAX_CONCURRENCY, // here 6
minConcurrency: CONFIG.SOURCE_MIN_CONCURRENCY, // here 1
},
browserPoolOptions: {
operationTimeoutSecs: 5,
retireBrowserAfterPageCount: 10,
maxOpenPagesPerBrowser: 5,
closeInactiveBrowserAfterSecs: 3,
},
launchContext: {
launchOptions: {
chromiumSandbox: false,
headless: true,
},
},
requestHandlerTimeoutSecs: 60,
maxRequestRetries: 3,
keepAlive: true, // Keeps the crawler alive even if all requests are handled; useful for long-running crawls
retryOnBlocked: false, // Automatically retries a request if it is identified as blocked (e.g., by bot detection)
requestHandler: this.requestHandler.bind(this), // Function to handle each request
failedRequestHandler: this.failedRequestHandler.bind(this), // Function to handle each failed request
})
new PlaywrightCrawler({
autoscaledPoolOptions: {
autoscaleIntervalSecs: 5,
loggingIntervalSecs: null,
maxConcurrency: CONFIG.SOURCE_MAX_CONCURRENCY, // here 6
minConcurrency: CONFIG.SOURCE_MIN_CONCURRENCY, // here 1
},
browserPoolOptions: {
operationTimeoutSecs: 5,
retireBrowserAfterPageCount: 10,
maxOpenPagesPerBrowser: 5,
closeInactiveBrowserAfterSecs: 3,
},
launchContext: {
launchOptions: {
chromiumSandbox: false,
headless: true,
},
},
requestHandlerTimeoutSecs: 60,
maxRequestRetries: 3,
keepAlive: true, // Keeps the crawler alive even if all requests are handled; useful for long-running crawls
retryOnBlocked: false, // Automatically retries a request if it is identified as blocked (e.g., by bot detection)
requestHandler: this.requestHandler.bind(this), // Function to handle each request
failedRequestHandler: this.failedRequestHandler.bind(this), // Function to handle each failed request
})