from datetime import timedelta
from crawlee.beautifulsoup_crawler import (
BeautifulSoupCrawler,
BeautifulSoupCrawlingContext,
)
import asyncio
async def task():
crawler = BeautifulSoupCrawler(
request_handler_timeout=timedelta(seconds=10),
max_request_retries=0,
)
@crawler.router.default_handler
async def _request_handler(context: BeautifulSoupCrawlingContext):
url = context.request.url
links = context.soup.find_all("a")
print(f"Found {len(links)} links")
await crawler.run(["https://www.unicreditgroup.eu/"])
if __name__ == "__main__":
asyncio.run(task())
from datetime import timedelta
from crawlee.beautifulsoup_crawler import (
BeautifulSoupCrawler,
BeautifulSoupCrawlingContext,
)
import asyncio
async def task():
crawler = BeautifulSoupCrawler(
request_handler_timeout=timedelta(seconds=10),
max_request_retries=0,
)
@crawler.router.default_handler
async def _request_handler(context: BeautifulSoupCrawlingContext):
url = context.request.url
links = context.soup.find_all("a")
print(f"Found {len(links)} links")
await crawler.run(["https://www.unicreditgroup.eu/"])
if __name__ == "__main__":
asyncio.run(task())