Collecting url from the nested Xml
Need an assist to Crawl urls from the nested XML using downloadlistofurls using crawlee ,below my sample code
const urls = await downloadListOfUrls({ url: sitemapUrl });
for (let url of urls) {
if (url.indexOf('/sitemap_products_1/')==0) {
var reqUrl = url;
log.info(
got url: ${url});log.info(
got ListUrl: ${reqUrl});const listOfUrls = await downloadListOfUrls({url:reqUrl});
log.info(
got pdpurl: ${listOfUrls});var filteredUrls = listOfUrls.filter(u => {
let keep = u.indexOf('/products/') == 0
log.debug(
${keep ? 'keeping ' + u : 'rejecting ' + u})return keep
})
}
}
await crawler.addRequests(filteredUrls);
No output for the below log.
log.info(
got pdpurl: ${listOfUrls});