Crawlee does not work with cron job
I'm running a cron job on node server, but it doesn't execute after the the first run
crawler.teardown() and no luck, since its description is Function for cleaning up after all requests are processed.. const crawler = new JSDOMCrawler({
proxyConfiguration,
requestList,
async requestHandler({ request, window }) {
// await page.goto(request.url);
console.log("request", request.userData.url);
// const title = page.locator('article .entry .entry-title a');
// const count = await title.count();
const links = window.document.querySelectorAll('article .entry .entry-title a');
let position = {}
links.forEach((link, index) => {
// console.log("link", request.userData.url);
if (link.getAttribute("href") === request.userData.url) {
position = {
keyword: request.userData.keyword,
position: index + 1,
}
}
})
result[request.userData.plugin] = {
...result[request.userData.plugin],
url: request.userData.url,
pluginName: request.userData.plugin,
date: moment().format('ll'),
keywordsData: [...result[request.userData.plugin]?.keywordsData, position]
};
// console.log("result", result);
}
})
await crawler.run();crawler.teardown()Function for cleaning up after all requests are processed. Cron('*/10 * * * * *', async () => {
const crawler = new PlaywrightCrawler({
requestHandler: odHandler,
})
await crawler.run([process.env.OD_URL])
})INFO PlaywrightCrawler: Initializing the crawler.
INFO PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down.
INFO PlaywrightCrawler: Final request statistics: {"requestsFinished":0,"requestsFailed":0,"retryHistogram":[],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":null,"requestsFinishedPerMinute":0,"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"requestsTotal":0,"crawlerRuntimeMillis":192}
INFO PlaywrightCrawler: Finished! Total 0 requests: 0 succeeded, 0 failed. {"terminal":true}// npm i croner
import { Cron } from "croner";
// This runs every three hours,
Cron('0 */3 * * *', { timezone: 'Europe/Amsterdam' }, async () => {
// Your code here
});