Strange behaviour when using rq_client()
I have been struggling with tests for a while, and finally reduced it to a simple test for which I don't understand the behaviour. Is this expected (and I am missing sth) or is this expected?
This test fails
async def test_failing():
storage_client = MemoryStorageClient()
request_queue_client = await storage_client.create_rq_client()
req = Request.from_url("https://crawlee.dev")
await request_queue_client.add_batch_of_requests([req])
crawler = BasicCrawler(
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
max_crawl_depth=2,
storage_client=storage_client,
)
@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
pass
stats = await crawler.run()
assert stats.requests_finished > 0
but this one passes
async def test_success():
storage_client = MemoryStorageClient()
req = Request.from_url("https://crawlee.dev")
crawler = BasicCrawler(
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
max_crawl_depth=2,
storage_client=storage_client,
)
@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
pass
await crawler.add_requests([req])
stats = await crawler.run()
assert stats.requests_finished > 0
the only difference is that in the first I add requests through a request client or through the crawler. If I add it through this
rq = await RequestQueue.open()
await rq.add_request(req)
it also fails. Thanks in advance
This test fails
async def test_failing():
storage_client = MemoryStorageClient()
request_queue_client = await storage_client.create_rq_client()
req = Request.from_url("https://crawlee.dev")
await request_queue_client.add_batch_of_requests([req])
crawler = BasicCrawler(
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
max_crawl_depth=2,
storage_client=storage_client,
)
@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
pass
stats = await crawler.run()
assert stats.requests_finished > 0
but this one passes
async def test_success():
storage_client = MemoryStorageClient()
req = Request.from_url("https://crawlee.dev")
crawler = BasicCrawler(
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
max_crawl_depth=2,
storage_client=storage_client,
)
@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
pass
await crawler.add_requests([req])
stats = await crawler.run()
assert stats.requests_finished > 0
the only difference is that in the first I add requests through a request client or through the crawler. If I add it through this
rq = await RequestQueue.open()
await rq.add_request(req)
it also fails. Thanks in advance