This is the official developer community of Apify and Crawlee.
- Build AI agents & multi-agent systems (GPT-4o, Claude 3, CrewAI, AutoGen)
- Automate workflows with Zapier, n8n, Make.com, and custom Python scripts
- Develop NLP - powered chatbots and memory-based assistants
- Integrate voice & speech systems (TTS/STT, Voiceflow, OpenAI Realtime)
- Deploy custom LLMs and fine-tuned AI models
- Work on Web3, smart contracts, and AI-integrated blockchain games
Traceback:
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1366, in __run_task_function
if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1566, in _is_allowed_based_on_robots_txt_file
robots_txt_file = await self._get_robots_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1589, in _get_robots_txt_file_for_url
robots_txt_file = await self._find_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1599, in _find_txt_file_for_url
return await RobotsTxtFile.find(url, self._http_client)
File "crawlee/_utils/robots.py", line 48, in find
return await cls.load(str(robots_url), http_client, proxy_info)
File "crawlee/_utils/robots.py", line 59, in load
response = await http_client.send_request(url, proxy_info=proxy_info)
File "crawlee/http_clients/_impit.py", line 167, in send_request
response = await client.request(
impit.ConnectError: Failed to connect to the server.
Reason: hyper_util::client::legacy::Error(
Connect,
ConnectError(
"dns error",
Custom {
kind: Uncategorized,
error: "failed to lookup address information: Name or service not known",
},
),
)
exited with code 1Traceback:
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1366, in __run_task_function
if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1566, in _is_allowed_based_on_robots_txt_file
robots_txt_file = await self._get_robots_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1589, in _get_robots_txt_file_for_url
robots_txt_file = await self._find_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1599, in _find_txt_file_for_url
return await RobotsTxtFile.find(url, self._http_client)
File "crawlee/_utils/robots.py", line 48, in find
return await cls.load(str(robots_url), http_client, proxy_info)
File "crawlee/_utils/robots.py", line 59, in load
response = await http_client.send_request(url, proxy_info=proxy_info)
File "crawlee/http_clients/_impit.py", line 167, in send_request
response = await client.request(
impit.ConnectError: Failed to connect to the server.
Reason: hyper_util::client::legacy::Error(
Connect,
ConnectError(
"dns error",
Custom {
kind: Uncategorized,
error: "failed to lookup address information: Name or service not known",
},
),
)
exited with code 1crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
playwright_crawler_specific_kwargs={
"browser_type": "firefox",
"headless": True,
},
max_session_rotations=10,
retry_on_blocked=True,
max_request_retries=5,
keep_alive=True,
respect_robots_txt_file=True,
)crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
playwright_crawler_specific_kwargs={
"browser_type": "firefox",
"headless": True,
},
max_session_rotations=10,
retry_on_blocked=True,
max_request_retries=5,
keep_alive=True,
respect_robots_txt_file=True,
)
python-clientpython-client I can only get the post description but I cannot fetch the title of the post itself (shown in bold).
maxItemsmaxItems to be of type numbernumber but received type stringstring in object)". Anybody can help me please'Traceback:
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1366, in __run_task_function
if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1566, in _is_allowed_based_on_robots_txt_file
robots_txt_file = await self._get_robots_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1589, in _get_robots_txt_file_for_url
robots_txt_file = await self._find_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1599, in _find_txt_file_for_url
return await RobotsTxtFile.find(url, self._http_client)
File "crawlee/_utils/robots.py", line 48, in find
return await cls.load(str(robots_url), http_client, proxy_info)
File "crawlee/_utils/robots.py", line 59, in load
response = await http_client.send_request(url, proxy_info=proxy_info)
File "crawlee/http_clients/_impit.py", line 167, in send_request
response = await client.request(
impit.ConnectError: Failed to connect to the server.
Reason: hyper_util::client::legacy::Error(
Connect,
ConnectError(
"dns error",
Custom {
kind: Uncategorized,
error: "failed to lookup address information: Name or service not known",
},
),
)
exited with code 1Traceback:
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1366, in __run_task_function
if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1566, in _is_allowed_based_on_robots_txt_file
robots_txt_file = await self._get_robots_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1589, in _get_robots_txt_file_for_url
robots_txt_file = await self._find_txt_file_for_url(url)
File "crawlee/crawlers/_basic/_basic_crawler.py", line 1599, in _find_txt_file_for_url
return await RobotsTxtFile.find(url, self._http_client)
File "crawlee/_utils/robots.py", line 48, in find
return await cls.load(str(robots_url), http_client, proxy_info)
File "crawlee/_utils/robots.py", line 59, in load
response = await http_client.send_request(url, proxy_info=proxy_info)
File "crawlee/http_clients/_impit.py", line 167, in send_request
response = await client.request(
impit.ConnectError: Failed to connect to the server.
Reason: hyper_util::client::legacy::Error(
Connect,
ConnectError(
"dns error",
Custom {
kind: Uncategorized,
error: "failed to lookup address information: Name or service not known",
},
),
)
exited with code 1crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
playwright_crawler_specific_kwargs={
"browser_type": "firefox",
"headless": True,
},
max_session_rotations=10,
retry_on_blocked=True,
max_request_retries=5,
keep_alive=True,
respect_robots_txt_file=True,
)crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
playwright_crawler_specific_kwargs={
"browser_type": "firefox",
"headless": True,
},
max_session_rotations=10,
retry_on_blocked=True,
max_request_retries=5,
keep_alive=True,
respect_robots_txt_file=True,
)python-clientpython-clientmaxItemsmaxItemsnumbernumberstringstring