Hey @Jaogmar
Example with Loguru, But you can also use the standard logger in the same way
import asyncio
from datetime import timedelta
from loguru import logger
from crawlee.crawlers import (
BeautifulSoupCrawler,
BeautifulSoupCrawlingContext,
)
logger.add('app.log', format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}')
async def main() -> None:
crawler = BeautifulSoupCrawler(
max_request_retries=1,
request_handler_timeout=timedelta(seconds=30),
max_requests_per_crawl=10,
configure_logging=False,
_logger=logger
)
@crawler.router.default_handler
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')
data = {
'url': context.request.url,
'title': context.soup.title.string if context.soup.title else None,
'h1s': [h1.text for h1 in context.soup.find_all('h1')],
'h2s': [h2.text for h2 in context.soup.find_all('h2')],
'h3s': [h3.text for h3 in context.soup.find_all('h3')],
}
await context.push_data(data)
crawler.log.info('Test')
await crawler.run(['https://crawlee.dev'])
if __name__ == '__main__':
asyncio.run(main())