Function isn't running when posted in api fy
Hello everyone,
I have the following code working perfectly when I am not using Apify however once I use Apify it doesn't run the second function. I am using the apify template for Scrapy .
Thanks for the help
I have the following code working perfectly when I am not using Apify however once I use Apify it doesn't run the second function. I am using the apify template for Scrapy .
Thanks for the help
from typing import Generator
from scrapy.responsetypes import Response
from apify import Actor
from urllib.parse import urljoin
import nest_asyncio
import scrapy
from itemadapter import ItemAdapter
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.utils.reactor import install_reactor
from scrapy.http import Response, Request
class TitleSpider(scrapy.Spider):
name = 'example'
allowed_domains = ['apc.fr']
start_urls = [
"https://www.apc.fr/men/men-shirts.html",
]
def parse(self, response: Response):
Actor.log.info(f'TitleSpider is parsing {response}...')
li_elements = response.css('li.product-item')
product_links = []
for li in li_elements:
productlink_container = li.css('.product-link')
product_link = productlink_container.css('a::attr(href)').get()
if product_link:
product_links.append(product_link)
for link in product_links:
yield scrapy.Request(url=link, callback=self.second_page)
def second_page(self, response: Response):
Actor.log.info(f'Second fonction is parsing {response}...')
productname = response.css('h1.product-name::text').get()
print(productname)from typing import Generator
from scrapy.responsetypes import Response
from apify import Actor
from urllib.parse import urljoin
import nest_asyncio
import scrapy
from itemadapter import ItemAdapter
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.utils.reactor import install_reactor
from scrapy.http import Response, Request
class TitleSpider(scrapy.Spider):
name = 'example'
allowed_domains = ['apc.fr']
start_urls = [
"https://www.apc.fr/men/men-shirts.html",
]
def parse(self, response: Response):
Actor.log.info(f'TitleSpider is parsing {response}...')
li_elements = response.css('li.product-item')
product_links = []
for li in li_elements:
productlink_container = li.css('.product-link')
product_link = productlink_container.css('a::attr(href)').get()
if product_link:
product_links.append(product_link)
for link in product_links:
yield scrapy.Request(url=link, callback=self.second_page)
def second_page(self, response: Response):
Actor.log.info(f'Second fonction is parsing {response}...')
productname = response.css('h1.product-name::text').get()
print(productname)