absent-sapphire
absent-sapphire•13mo ago

How can I override the default logs of Crawlee?

Hello I wonder how to override the default logs of crawler, this is how it looks: This logs came from basic-crawle library: (https://github.com/apify/crawlee/blob/3ffcf56d744ac527ed8d883be3b1a62356a5930c/packages/basic-crawler/src/internals/basic-crawler.ts#L891) I am using Playwright, and thats how I mange to override default logs with my custom like that:
//playwright-winston-proxy-logger.ts
import { Log } from 'crawlee'

import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger) {
super()
this.logger = logger
}

debug(message: string, data?: AdditionalData): void {
if (data) {
this.logger.debug(message, data)
} else {
this.logger.debug(message)
}
}

info(message: string, data?: AdditionalData): void {
if (data) {
this.logger.info(message, data)
} else {
this.logger.info(message)
}
}

warning(message: string, data?: AdditionalData): void {
if (data) {
this.logger.warn(message, data)
} else {
this.logger.warn(message)
}
}

error(message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, data)
} else {
this.logger.error(message)
}
}

exception(exception: Error, message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, { exception, ...data })
} else {
this.logger.error(message, { exception })
}
}
}
//playwright-winston-proxy-logger.ts
import { Log } from 'crawlee'

import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger) {
super()
this.logger = logger
}

debug(message: string, data?: AdditionalData): void {
if (data) {
this.logger.debug(message, data)
} else {
this.logger.debug(message)
}
}

info(message: string, data?: AdditionalData): void {
if (data) {
this.logger.info(message, data)
} else {
this.logger.info(message)
}
}

warning(message: string, data?: AdditionalData): void {
if (data) {
this.logger.warn(message, data)
} else {
this.logger.warn(message)
}
}

error(message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, data)
} else {
this.logger.error(message)
}
}

exception(exception: Error, message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, { exception, ...data })
} else {
this.logger.error(message, { exception })
}
}
}
and thats how I use them:
...
private createCrawler = (): PlaywrightCrawler => {
const loggerCrawler = new WinstonLoggerProxy(
createLogger({ module: 'PLAYWRIGHT' })
)

return new PlaywrightCrawler({
log: loggerCrawler, // Provide the custom logger proxy
...
...
private createCrawler = (): PlaywrightCrawler => {
const loggerCrawler = new WinstonLoggerProxy(
createLogger({ module: 'PLAYWRIGHT' })
)

return new PlaywrightCrawler({
log: loggerCrawler, // Provide the custom logger proxy
...
GitHub
crawlee/packages/basic-crawler/src/internals/basic-crawler.ts at 3f...
Crawlee—A web scraping and browser automation library for Node.js to build reliable crawlers. In JavaScript and TypeScript. Extract data for AI, LLMs, RAG, or GPTs. Download HTML, PDF, JPG, PNG, an...
No description
6 Replies
Hall
Hall•13mo ago
Post created!
This post has been synced with the Apify community site and will be indexed by search engines
absent-sapphire
absent-sapphireOP•13mo ago
Solution: The solution is to add internal method to your proxy class, here is full code for proxy winston -> crawlee log
import { Log, LogLevel } from 'crawlee'

import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger) {
super()
this.logger = logger
}

debug(message: string, data?: AdditionalData): void {
if (data) {
this.logger.debug(message, data)
} else {
this.logger.debug(message)
}
}

info(message: string, data?: AdditionalData): void {
if (data) {
this.logger.info(message, data)
} else {
this.logger.info(message)
}
}

internal(
level: LogLevel,
message: string,
data?: any,
exception?: any
): void {
switch (level) {
case LogLevel.DEBUG:
if (data) {
this.logger.debug(message, { data, exception })
} else {
this.logger.debug(message, { exception })
}
break
case LogLevel.INFO:
if (data) {
this.logger.info(message, { data, exception })
} else {
this.logger.info(message, { exception })
}
break
}
}

warning(message: string, data?: AdditionalData): void {
if (data) {
this.logger.warn(message, data)
} else {
this.logger.warn(message)
}
}

error(message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, data)
} else {
this.logger.error(message)
}
}

exception(exception: Error, message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, { exception, ...data })
} else {
this.logger.error(message, { exception })
}
}
}
import { Log, LogLevel } from 'crawlee'

import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger) {
super()
this.logger = logger
}

debug(message: string, data?: AdditionalData): void {
if (data) {
this.logger.debug(message, data)
} else {
this.logger.debug(message)
}
}

info(message: string, data?: AdditionalData): void {
if (data) {
this.logger.info(message, data)
} else {
this.logger.info(message)
}
}

internal(
level: LogLevel,
message: string,
data?: any,
exception?: any
): void {
switch (level) {
case LogLevel.DEBUG:
if (data) {
this.logger.debug(message, { data, exception })
} else {
this.logger.debug(message, { exception })
}
break
case LogLevel.INFO:
if (data) {
this.logger.info(message, { data, exception })
} else {
this.logger.info(message, { exception })
}
break
}
}

warning(message: string, data?: AdditionalData): void {
if (data) {
this.logger.warn(message, data)
} else {
this.logger.warn(message)
}
}

error(message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, data)
} else {
this.logger.error(message)
}
}

exception(exception: Error, message: string, data?: AdditionalData): void {
if (data) {
this.logger.error(message, { exception, ...data })
} else {
this.logger.error(message, { exception })
}
}
}
I manage to override some logs, but still struggling to make statistic work as well
absent-sapphire
absent-sapphireOP•13mo ago
I manage to do this by overriding child method and passing data object to my Winston Logger, but what concern me are those 3 repeating logs
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Attempting to run a task.
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Checking for ready tasks.
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Task will not run. No tasks are ready.
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Attempting to run a task.
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Checking for ready tasks.
[PLAYWRIGHT] [ #1 ] [2024-08-29 13:50:00] INFO : Task will not run. No tasks are ready.
I don't have anything like that in my code, its from library: Attempting to run a task.: https://github.com/apify/crawlee/blob/c69a34a616feda0824c88f9ec18871bff0b212c0/packages/core/src/autoscaling/autoscaled_pool.ts#L473 Checking for ready tasks.: https://github.com/apify/crawlee/blob/c69a34a616feda0824c88f9ec18871bff0b212c0/packages/core/src/autoscaling/autoscaled_pool.ts#L504 Task will not run. No tasks are ready. : https://github.com/apify/crawlee/blob/c69a34a616feda0824c88f9ec18871bff0b212c0/packages/core/src/autoscaling/autoscaled_pool.ts#L519 @Marco hi I see you are a developer at Apify, maybe you know answer to my question
GitHub
crawlee/packages/core/src/autoscaling/autoscaled_pool.ts at c69a34a...
Crawlee—A web scraping and browser automation library for Node.js to build reliable crawlers. In JavaScript and TypeScript. Extract data for AI, LLMs, RAG, or GPTs. Download HTML, PDF, JPG, PNG, an...
absent-sapphire
absent-sapphireOP•13mo ago
Thats how it looks
No description
absent-sapphire
absent-sapphireOP•13mo ago
okey fixed, small issue with switch and handling PERF log level, I was treating them as INFO, and I should treat them as DEBUG or mock logic to ignore those strings:
import { Log, LogLevel } from 'crawlee'

import type { LoggerOptions } from 'crawlee'
import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger, options?: Partial<LoggerOptions>) {
super(options)
this.logger = logger
}

getLevel(): number {
return super.getLevel()
}

setLevel(level: LogLevel): void {
super.setLevel(level)
}

getOptions(): Required<LoggerOptions> {
return super.getOptions()
}

setOptions(options: Partial<LoggerOptions>): void {
super.setOptions(options)
}

child(options: Partial<LoggerOptions>): WinstonLoggerProxy {
const childLogger = this.logger.child(options)

return new WinstonLoggerProxy(childLogger, {
...this.getOptions(),
...options,
})
}

internal(
level: LogLevel,
message: string,
data?: any,
exception?: any
): void {
const logMethod = this.getLogMethod(level)
if (data || exception) {
this.logger[logMethod](message, { data, exception })
} else {
this.logger[logMethod](message)
}
}
...

private getLogMethod(level: LogLevel): keyof Logger {
switch (level) {
case LogLevel.DEBUG:
case LogLevel.PERF: //this ifxed
return 'debug'
case LogLevel.INFO:
return 'info'
case LogLevel.WARNING:
case LogLevel.SOFT_FAIL:
return 'warn'
case LogLevel.ERROR:
return 'error'
default:
return 'info'
}
}
}
import { Log, LogLevel } from 'crawlee'

import type { LoggerOptions } from 'crawlee'
import type { Logger } from 'winston'

type AdditionalData = Record<string, unknown> | null

export class WinstonLoggerProxy extends Log {
private logger: Logger

constructor(logger: Logger, options?: Partial<LoggerOptions>) {
super(options)
this.logger = logger
}

getLevel(): number {
return super.getLevel()
}

setLevel(level: LogLevel): void {
super.setLevel(level)
}

getOptions(): Required<LoggerOptions> {
return super.getOptions()
}

setOptions(options: Partial<LoggerOptions>): void {
super.setOptions(options)
}

child(options: Partial<LoggerOptions>): WinstonLoggerProxy {
const childLogger = this.logger.child(options)

return new WinstonLoggerProxy(childLogger, {
...this.getOptions(),
...options,
})
}

internal(
level: LogLevel,
message: string,
data?: any,
exception?: any
): void {
const logMethod = this.getLogMethod(level)
if (data || exception) {
this.logger[logMethod](message, { data, exception })
} else {
this.logger[logMethod](message)
}
}
...

private getLogMethod(level: LogLevel): keyof Logger {
switch (level) {
case LogLevel.DEBUG:
case LogLevel.PERF: //this ifxed
return 'debug'
case LogLevel.INFO:
return 'info'
case LogLevel.WARNING:
case LogLevel.SOFT_FAIL:
return 'warn'
case LogLevel.ERROR:
return 'error'
default:
return 'info'
}
}
}
MEE6
MEE6•13mo ago
@Wojciech just advanced to level 3! Thanks for your contributions! 🎉

Did you find this page helpful?