chromium.use(stealthPlugin());
const router = createPlaywrightRouter();
router.addHandler(
requestLabels.SPIDER,
spiderDiscoveryHandlerFactory(container),
);
router.addHandler(requestLabels.ARTICLE, articleHandlerFactory(container));
const config = new Configuration({
storageClient: new MemoryStorage({
localDataDirectory: `./storage/${message.messageId}`,
writeMetadata: true,
persistStorage: true,
}),
persistStateIntervalMillis: 5000,
persistStorage: true,
purgeOnStart: false,
headless: false,
});
const crawler = new PlaywrightCrawler(
{
launchContext: {
launcher: chromium,
},
requestHandler: router,
errorHandler: (_request, error) => {
logger.error(`${error.name}\n${error.message}`);
},
maxRequestsPerCrawl:
body.config.maxRequests > 0 ? body.config.maxRequests : undefined,
useSessionPool: true,
persistCookiesPerSession: true,
},
config,
);
chromium.use(stealthPlugin());
const router = createPlaywrightRouter();
router.addHandler(
requestLabels.SPIDER,
spiderDiscoveryHandlerFactory(container),
);
router.addHandler(requestLabels.ARTICLE, articleHandlerFactory(container));
const config = new Configuration({
storageClient: new MemoryStorage({
localDataDirectory: `./storage/${message.messageId}`,
writeMetadata: true,
persistStorage: true,
}),
persistStateIntervalMillis: 5000,
persistStorage: true,
purgeOnStart: false,
headless: false,
});
const crawler = new PlaywrightCrawler(
{
launchContext: {
launcher: chromium,
},
requestHandler: router,
errorHandler: (_request, error) => {
logger.error(`${error.name}\n${error.message}`);
},
maxRequestsPerCrawl:
body.config.maxRequests > 0 ? body.config.maxRequests : undefined,
useSessionPool: true,
persistCookiesPerSession: true,
},
config,
);