conscious-sapphire
conscious-sapphire13mo ago

Cheerio not persisting cookies

Cheerio is not able to persist cookies that are set in the session. I have persistCookiesPerSession: true and I also verify that the cookie is being saved in the session in the requestHandler. But when i print out the request headers the cookie header is not present. The session in preNavigationHooks also does not contain the cookies
const crawler = new CheerioCrawler({
minConcurrency: 1,
maxConcurrency: 10,
requestHandlerTimeoutSecs: 30,
maxRequestRetries: 10,
useSessionPool: true,
persistCookiesPerSession: true,
preNavigationHooks: [
async ({ request, session }, gotOptions) => {
gotOptions.useHeaderGenerator = true;
gotOptions.headerGeneratorOptions = {
browsers: [{ name: 'firefox', minVersion: 115, maxVersion: 115 }],
devices: ['desktop'],
operatingSystems: ['windows'],
locales: ['en-US', 'en'],
};
console.log('START PRE HOOK');
console.log(request.url);

// THIS IS EMPTY ON SECOND REQUEST
console.log(session?.getCookies(request.url));
console.log(gotOptions.headers);
console.log('END PRE HOOK');
},
],
requestHandler: async ({ response, request, session, log, addRequests }) => {
const refresh = response.headers?.refresh;
if (refresh && session) {
console.log(response.request.options.headers);
log.info(`Access queue detected, waiting for ${refresh} seconds...`);

// Cookies are present here
console.log(session.getCookies(request.url));
await sleep((parseInt(refresh) - 1) * 1000);
await addRequests([{ url: request.url, uniqueKey: new Date().toString() }]);
}
},
});
const crawler = new CheerioCrawler({
minConcurrency: 1,
maxConcurrency: 10,
requestHandlerTimeoutSecs: 30,
maxRequestRetries: 10,
useSessionPool: true,
persistCookiesPerSession: true,
preNavigationHooks: [
async ({ request, session }, gotOptions) => {
gotOptions.useHeaderGenerator = true;
gotOptions.headerGeneratorOptions = {
browsers: [{ name: 'firefox', minVersion: 115, maxVersion: 115 }],
devices: ['desktop'],
operatingSystems: ['windows'],
locales: ['en-US', 'en'],
};
console.log('START PRE HOOK');
console.log(request.url);

// THIS IS EMPTY ON SECOND REQUEST
console.log(session?.getCookies(request.url));
console.log(gotOptions.headers);
console.log('END PRE HOOK');
},
],
requestHandler: async ({ response, request, session, log, addRequests }) => {
const refresh = response.headers?.refresh;
if (refresh && session) {
console.log(response.request.options.headers);
log.info(`Access queue detected, waiting for ${refresh} seconds...`);

// Cookies are present here
console.log(session.getCookies(request.url));
await sleep((parseInt(refresh) - 1) * 1000);
await addRequests([{ url: request.url, uniqueKey: new Date().toString() }]);
}
},
});
1 Reply
exotic-emerald
exotic-emerald13mo ago
Hello @Tay, this is caused by the fact, that these two requests probably use a different session, due to session pool picking a random session from the pool.

Did you find this page helpful?