import { PlaywrightCrawler } from 'crawlee';
import { Actor } from 'apify';
import fs from 'fs';
(async () => {
// Initialize the Actor
await Actor.init();
// Read the configuration from Actor.config
let input;
if (process.env.NODE_ENV === 'production') {
input = await Actor.getInput();
} else {
// Read the local Actor.config file
const configPath = './Actor.config';
if (fs.existsSync(configPath)) {
input = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
} else {
console.error('Local Actor.config file not found.');
process.exit(1);
}
}
console.log('Input configuration:', input);
// Create a PlaywrightCrawler
const crawler = new PlaywrightCrawler({
requestHandler: async ({ request, page, enqueueLinks, pushData, log }) => {
const title = await page.title();
log.info(`Title of ${request.loadedUrl} is '${title}'`);
// Save results as JSON
await pushData({ title, url: request.loadedUrl });
// Extract links and add them to the queue
await enqueueLinks();
},
headless: true,
maxRequestsPerCrawl: 20,
});
// Add the start URL from the input configuration and start the crawl
await crawler.run([input.startUrl]);
// Clean up and exit the Actor
await Actor.exit();
})();