import { Actor } from 'apify';
import { CheerioCrawler, downloadListOfUrls, EnqueueStrategy } from 'crawlee';
await Actor.init();
const crawler = new CheerioCrawler({
// Function called for each URL
async requestHandler({ request, enqueueLinks }) {
console.log(request.url);
await enqueueLinks({
globs: ['https://www.something/produto/*']
});
},
});
const listOfUrls = await downloadListOfUrls({ url: 'https://www.something/sitemap_index.xml' });
await crawler.addRequests(listOfUrls);
await crawler.run();
await Actor.exit();
import { Actor } from 'apify';
import { CheerioCrawler, downloadListOfUrls, EnqueueStrategy } from 'crawlee';
await Actor.init();
const crawler = new CheerioCrawler({
// Function called for each URL
async requestHandler({ request, enqueueLinks }) {
console.log(request.url);
await enqueueLinks({
globs: ['https://www.something/produto/*']
});
},
});
const listOfUrls = await downloadListOfUrls({ url: 'https://www.something/sitemap_index.xml' });
await crawler.addRequests(listOfUrls);
await crawler.run();
await Actor.exit();