const crawler = new PuppeteerCrawler({
launchContext: {
launchOptions: {
headless: true,
args: [
"--no-sandbox", // Mitigates the "sandboxed" process issue in Docker containers,
"--ignore-certificate-errors",
"--disable-dev-shm-usage",
"--disable-infobars",
"--disable-extensions",
"--disable-setuid-sandbox",
"--ignore-certificate-errors",
"--disable-gpu", // Mitigates the "crashing GPU process" issue in Docker containers
],
},
},
maxRequestRetries: 1,
navigationTimeoutSecs: 60,
autoscaledPoolOptions: { minConcurrency: 30 },
maxSessionRotations: 5,
preNavigationHooks: [
async ({ blockRequests }, goToOptions) => {
if (goToOptions) goToOptions.waitUntil = "domcontentloaded"; // Set waitUntil here
await blockRequests({
urlPatterns: [
...
],
});
},
],
proxyConfiguration,
requestHandler: router,
});
await crawler.run(startUrls);
await Actor.exit();
const crawler = new PuppeteerCrawler({
launchContext: {
launchOptions: {
headless: true,
args: [
"--no-sandbox", // Mitigates the "sandboxed" process issue in Docker containers,
"--ignore-certificate-errors",
"--disable-dev-shm-usage",
"--disable-infobars",
"--disable-extensions",
"--disable-setuid-sandbox",
"--ignore-certificate-errors",
"--disable-gpu", // Mitigates the "crashing GPU process" issue in Docker containers
],
},
},
maxRequestRetries: 1,
navigationTimeoutSecs: 60,
autoscaledPoolOptions: { minConcurrency: 30 },
maxSessionRotations: 5,
preNavigationHooks: [
async ({ blockRequests }, goToOptions) => {
if (goToOptions) goToOptions.waitUntil = "domcontentloaded"; // Set waitUntil here
await blockRequests({
urlPatterns: [
...
],
});
},
],
proxyConfiguration,
requestHandler: router,
});
await crawler.run(startUrls);
await Actor.exit();