Apify Discord Mirror

Updated 3 days ago

How i can change/save the logger that the context provides

At a glance
The context of handler provides a context.log but i want to save/change the logger is used, because i want to save this, i am using the Crawly without Apify CLI
Attachment
image.png
Marked as solution
Hey @Jaogmar

Example with Loguru, But you can also use the standard logger in the same way

Plain Text
import asyncio
from datetime import timedelta

from loguru import logger

from crawlee.crawlers import (
    BeautifulSoupCrawler,
    BeautifulSoupCrawlingContext,
)

logger.add('app.log', format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}')

async def main() -> None:
    crawler = BeautifulSoupCrawler(
        max_request_retries=1,
        request_handler_timeout=timedelta(seconds=30),
        max_requests_per_crawl=10,
        configure_logging=False,
        _logger=logger
    )

    @crawler.router.default_handler
    async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
        context.log.info(f'Processing {context.request.url} ...')

        data = {
            'url': context.request.url,
            'title': context.soup.title.string if context.soup.title else None,
            'h1s': [h1.text for h1 in context.soup.find_all('h1')],
            'h2s': [h2.text for h2 in context.soup.find_all('h2')],
            'h3s': [h3.text for h3 in context.soup.find_all('h3')],
        }

        await context.push_data(data)

    crawler.log.info('Test')
    await crawler.run(['https://crawlee.dev'])


if __name__ == '__main__':
    asyncio.run(main())
View full solution
M
J
A
7 comments
Hey @Jaogmar

Example with Loguru, But you can also use the standard logger in the same way

Plain Text
import asyncio
from datetime import timedelta

from loguru import logger

from crawlee.crawlers import (
    BeautifulSoupCrawler,
    BeautifulSoupCrawlingContext,
)

logger.add('app.log', format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}')

async def main() -> None:
    crawler = BeautifulSoupCrawler(
        max_request_retries=1,
        request_handler_timeout=timedelta(seconds=30),
        max_requests_per_crawl=10,
        configure_logging=False,
        _logger=logger
    )

    @crawler.router.default_handler
    async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
        context.log.info(f'Processing {context.request.url} ...')

        data = {
            'url': context.request.url,
            'title': context.soup.title.string if context.soup.title else None,
            'h1s': [h1.text for h1 in context.soup.find_all('h1')],
            'h2s': [h2.text for h2 in context.soup.find_all('h2')],
            'h3s': [h3.text for h3 in context.soup.find_all('h3')],
        }

        await context.push_data(data)

    crawler.log.info('Test')
    await crawler.run(['https://crawlee.dev'])


if __name__ == '__main__':
    asyncio.run(main())
thank you Bro, i forget to close this thread
@Jaogmar just advanced to level 1! Thanks for your contributions! πŸŽ‰
i used the same way that you initialize de crawler class with the logger
the problem that i had that i was beeing very dumb and i forget to add the correct handlers to the logger
and when a used the context.log.info nothing happened
Glad you've solved it) πŸ™‚
Add a reply
Sign up and join the conversation on Discord