Hi, I'm using the proxy config with 100 proxies.
The goal is to let the scraper run with say 4 sessions concurrently - using 4 different proxies.
In each run, I see it picks one Session ID = One proxy and runs through all requests with the same one.
(it's different one each time, but each time it's a single IP).
import { ProxyConfiguration } from 'crawlee';
import { SMART_PROXY_DATACENTER_IPS } from '../utils/proxies.js';
import ApplicationRouter from './ApplicationRouter.js';
export default class TestProxies extends ApplicationRouter {
async setup() {
this.version = 1;
this.prefix = 'TestProxies';
this.datasetName = `${this.prefix}_dataset_V${this.version}`;
}
async getInitialPages() {
return [
{ url: "https://ifconfig.co/?a=1", label: "page" },
{ url: "https://ifconfig.co/?a=2", label: "page" },
{ url: "https://ifconfig.co/?a=3", label: "page" },
{ url: "https://ifconfig.co/?a=4", label: "page" },
];
}
getRequestQueueName() {
return `${this.prefix}_queue`;
}
getPageRoot() {
return 'https://ifconfig.co';
}
// This is the entry
async visitPage() {
const ip = await this.text({ css: "#output" })
this.debug("Proxy IP is", ip);
await this.sleep(4000);
}
async getCrawlerOptions() {
return {
maxRequestRetries: 3,
maxConcurrency: 2,
useSessionPool: true,
sessionPoolOptions: {
maxPoolSize: 25,
sessionOptions: {
maxUsageCount: 150,
maxAgeSecs: 23*60, // IPs rotate after 30 minutes
},
persistStateKeyValueStoreId: `${this.prefix}_V${this.version}_sessions`,
persistStateKey: `${this.prefix}_V${this.version}_my-session-pool`,
},
proxyConfiguration: new ProxyConfiguration({
proxyUrls: SMART_PROXY_DATACENTER_IPS
})
}
}
}