Scraping skips big texts in the url, have tried to change input unsuccesfully.
So I am trying to scrape the text in this url: https://www.svila.it/en/our-story/
This is my input:
def run_text_scraper(self, url):
run_input = {
"startUrls": [{"url": url}],
"crawlerType": "playwright:chrome",
"excludeUrlGlobs": [
"https://**.**/**/Terms-of-Use*",
"https://**.**/**/terms-of-use*",
],
"maxCrawlDepth": 0,
"maxCrawlPages": 20,
"initialConcurrency": 2,
"maxConcurrency": 200,
"initialCookies": [],
"proxyConfiguration": {
"useApifyProxy": True
},
"dynamicContentWaitSecs": 10,
"maxScrollHeightPixels": 5000,
"removeElementsCssSelector": "dummy_keep_everything",
"removeCookieWarnings": True,
"clickElementsCssSelector": "[aria-expanded="false"]",
"htmlTransformer": "readableText",
"readableTextCharThreshold": 100,
"aggressivePrune": False,
"debugMode": False,
"saveHtml": False,
"saveMarkdown": False,
"saveFiles": False,
"saveScreenshots": False,
"maxResults": 20
}
return self.run_scraper("apify/website-content-crawler", run_input)
This is the output:
[{
"url": "https://www.svila.it/en/our-story/",
"crawl": {
"loadedUrl": "https://www.svila.it/en/our-story/",
"loadedTime": "2023-07-06T09:28:27.957Z",
"referrerUrl": "https://www.svila.it/en/our-story/",
"depth": 0,
"httpStatusCode": 200
},
"metadata": {
"canonicalUrl": "https://www.svila.it/en/our-story/",
"title": "Svila produces frozen pizzas since 1974",
"description": "Svila was... story!",
"author": null,
"keywords": null,
"languageCode": "en-US"
},
"screenshotUrl": null,
"text": "Svila is a manufacturer of frozen pizzas in Italy.......the world."
}]
This is my input:
def run_text_scraper(self, url):
run_input = {
"startUrls": [{"url": url}],
"crawlerType": "playwright:chrome",
"excludeUrlGlobs": [
"https://**.**/**/Terms-of-Use*",
"https://**.**/**/terms-of-use*",
],
"maxCrawlDepth": 0,
"maxCrawlPages": 20,
"initialConcurrency": 2,
"maxConcurrency": 200,
"initialCookies": [],
"proxyConfiguration": {
"useApifyProxy": True
},
"dynamicContentWaitSecs": 10,
"maxScrollHeightPixels": 5000,
"removeElementsCssSelector": "dummy_keep_everything",
"removeCookieWarnings": True,
"clickElementsCssSelector": "[aria-expanded="false"]",
"htmlTransformer": "readableText",
"readableTextCharThreshold": 100,
"aggressivePrune": False,
"debugMode": False,
"saveHtml": False,
"saveMarkdown": False,
"saveFiles": False,
"saveScreenshots": False,
"maxResults": 20
}
return self.run_scraper("apify/website-content-crawler", run_input)
This is the output:
[{
"url": "https://www.svila.it/en/our-story/",
"crawl": {
"loadedUrl": "https://www.svila.it/en/our-story/",
"loadedTime": "2023-07-06T09:28:27.957Z",
"referrerUrl": "https://www.svila.it/en/our-story/",
"depth": 0,
"httpStatusCode": 200
},
"metadata": {
"canonicalUrl": "https://www.svila.it/en/our-story/",
"title": "Svila produces frozen pizzas since 1974",
"description": "Svila was... story!",
"author": null,
"keywords": null,
"languageCode": "en-US"
},
"screenshotUrl": null,
"text": "Svila is a manufacturer of frozen pizzas in Italy.......the world."
}]