# Create key values stores for batches
scheduled_batches = await prepare_requests_from_mongo(crawler_name)
processed_batches = await KeyValueStore.open(
name=f'{crawler_name}-processed_batches'
)
# Create crawler
crawler = await create_playwright_crawler(crawler_name)
# Iterate over the batches
async for key_info in scheduled_batches.iterate_keys():
urls: List[str] = await scheduled_batches.get_value(key_info.key)
requests = [
Request.from_url(
url,
user_data={
'page_tags': [PageTag.HOME.value],
'chosen_page_tag': PageTag.HOME.value,
'label': PageTag.HOME.value,
},
)
for url in urls
]
LOGGER.info(f'Processing batch {key_info.key}')
await crawler.run(requests)
await scheduled_batches.set_value(key_info.key, None)
await processed_batches.set_value(key_info.key, urls)
# Create key values stores for batches
scheduled_batches = await prepare_requests_from_mongo(crawler_name)
processed_batches = await KeyValueStore.open(
name=f'{crawler_name}-processed_batches'
)
# Create crawler
crawler = await create_playwright_crawler(crawler_name)
# Iterate over the batches
async for key_info in scheduled_batches.iterate_keys():
urls: List[str] = await scheduled_batches.get_value(key_info.key)
requests = [
Request.from_url(
url,
user_data={
'page_tags': [PageTag.HOME.value],
'chosen_page_tag': PageTag.HOME.value,
'label': PageTag.HOME.value,
},
)
for url in urls
]
LOGGER.info(f'Processing batch {key_info.key}')
await crawler.run(requests)
await scheduled_batches.set_value(key_info.key, None)
await processed_batches.set_value(key_info.key, urls)