import { Actor, log, ProxyConfiguration } from 'apify';
import { HttpCrawler } from 'crawlee';
await Actor.init();
const { userIsPaying } = Actor.getEnv();
if (!userIsPaying) {
log.info('You need a paid Apify plan to scrape mulptiple pages');
}
const { keyword } = await Actor.getInput() ?? {};
const proxyConfiguration = new ProxyConfiguration();
const crawler = new HttpCrawler({
proxyConfiguration,
requestHandler: async ({ json, request, pushData, addRequests }) => {
const chargeLimit = Actor.getChargingManager().calculateMaxEventChargeCountWithinLimit('apify-default-dataset-item');
if (chargeLimit <= 0) {
log.warning('Reached the maximum allowed cost for this run. Increase the maximum cost per run to scrape more.');
await crawler.autoscaledPool?.abort();
return;
}
if (request.label === 'SEARCH') {
const { listings = [], page = 1, totalPages = 1 } = json;
// Enqueue all listings
for (const listing of listings) {
addRequests([{
url: listing.url,
label: 'LISTING',
}]);
}
// If we are on page 1, enqueue all other pages if user is paying
if (page === 1 && totalPages > 1 && userIsPaying) {
for (let nextPage = 2; nextPage <= totalPages; nextPage++) {
const nextUrl = `https://example.com/search?keyword=${encodeURIComponent(request.userData.keyword)}&page=${nextPage}`;
addRequests([{
url: nextUrl,
label: 'SEARCH',
}]);
}
}
} else {
// Process individual listing
await pushData(json);
}
}
});
await crawler.run([{
url: `https://example.com/search?keyword=${encodeURIComponent(keyword)}&page=1`,
label: 'SEARCH',
userData: { keyword },
}]);
await Actor.exit();
import { Actor, log, ProxyConfiguration } from 'apify';
import { HttpCrawler } from 'crawlee';
await Actor.init();
const { userIsPaying } = Actor.getEnv();
if (!userIsPaying) {
log.info('You need a paid Apify plan to scrape mulptiple pages');
}
const { keyword } = await Actor.getInput() ?? {};
const proxyConfiguration = new ProxyConfiguration();
const crawler = new HttpCrawler({
proxyConfiguration,
requestHandler: async ({ json, request, pushData, addRequests }) => {
const chargeLimit = Actor.getChargingManager().calculateMaxEventChargeCountWithinLimit('apify-default-dataset-item');
if (chargeLimit <= 0) {
log.warning('Reached the maximum allowed cost for this run. Increase the maximum cost per run to scrape more.');
await crawler.autoscaledPool?.abort();
return;
}
if (request.label === 'SEARCH') {
const { listings = [], page = 1, totalPages = 1 } = json;
// Enqueue all listings
for (const listing of listings) {
addRequests([{
url: listing.url,
label: 'LISTING',
}]);
}
// If we are on page 1, enqueue all other pages if user is paying
if (page === 1 && totalPages > 1 && userIsPaying) {
for (let nextPage = 2; nextPage <= totalPages; nextPage++) {
const nextUrl = `https://example.com/search?keyword=${encodeURIComponent(request.userData.keyword)}&page=${nextPage}`;
addRequests([{
url: nextUrl,
label: 'SEARCH',
}]);
}
}
} else {
// Process individual listing
await pushData(json);
}
}
});
await crawler.run([{
url: `https://example.com/search?keyword=${encodeURIComponent(keyword)}&page=1`,
label: 'SEARCH',
userData: { keyword },
}]);
await Actor.exit();