import { PlaywrightCrawler } from 'crawlee'; export async function runExample() { const testPage1 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5'; const testPage2 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05'; const crawler1 = new PlaywrightCrawler({ requestHandler: async ({ request, page, log }) => { const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); }, launchContext: { launchOptions: { args: ['--ignore-certificate-errors'], }, }, }); const crawler2 = new PlaywrightCrawler({ requestHandler: async ({ request, page, log }) => { const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); }, launchContext: { launchOptions: { args: ['--ignore-certificate-errors'], }, }, }); await crawler1.run([testPage1]); await crawler2.run([testPage2]); } runExample();
INFO PlaywrightCrawler: Starting the crawler. INFO PlaywrightCrawler: URL: https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5 TITLE: INFO PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down. INFO PlaywrightCrawler: Final request statistics: {"requestsFinished":1,"requestsFailed":0,"retryHistogram":[1],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":782,"requestsFinishedPerMinute":55,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"re,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"requestsTotal":1,"crawlerRuntimeMillis":1083} INFO PlaywrightCrawler: Finished! Total 1 requests: 1 succeeded, 0 failed. {"terminal":true} INFO PlaywrightCrawler: Starting the crawler. INFO PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down. INFO PlaywrightCrawler: Final request statistics: {"requestsFinished":0,"requestsFailed":0,"retryHistogram":[],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":null,"requestsFinishedPerMinute":0,"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"reque"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"requestsTotal":0,"crawlerRuntimeMillis":238} INFO PlaywrightCrawler: Finished! Total 0 requests: 0 succeeded, 0 failed. {"terminal":true}
const requestList1 = await RequestList.open('my-request-list1', [ 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5' const crawler1 = new PlaywrightCrawler({ requestList:requestList1, requestHandler: async ({ request, page, log }) => { .... }); await crawler1.run();
import { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee'; export async function runExample() { const testPage1 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5'; const testPage2 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05'; const router = createPlaywrightRouter(); router.addDefaultHandler(async (params) => { const { page, log, request, enqueueLinks } = params; const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); await enqueueLinks({ label: 'ROUTE_2', urls: [testPage2], }); }); router.addHandler('ROUTE_2', async (params) => { const { page, log, request } = params; const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); }); const crawler1 = new PlaywrightCrawler({ requestHandler: router, launchContext: { launchOptions: { args: ['--ignore-certificate-errors'], }, }, }); await crawler1.run([testPage1]); } runExample();
import { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee'; export async function runExample() { const testPage1 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5'; const testPage2 = 'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05'; const router = createPlaywrightRouter(); router.addDefaultHandler(async (params) => { const { page, log, request, crawler} = params; const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); await crawler.addRequests([{ label: 'ROUTE_2', url: testPage2, }]); }); router.addHandler('ROUTE_2', async (params) => { const { page, log, request } = params; const title = await page.title(); log.info(`URL: ${request.url}\nTITLE: ${title}`); }); const crawler1 = new PlaywrightCrawler({ requestHandler: router, launchContext: { launchOptions: { args: ['--ignore-certificate-errors'], }, }, }); await crawler1.run([testPage1]); } runExample();
await crawler.addRequests([{ label: 'ROUTE_2', url: testPage2, uniqueKey:testPage2, }]); });