import { AsyncEventEmitter } from '@vladfrangu/async_event_emitter'; import { Request, PlaywrightCrawlingContext, PlaywrightDirectNavigationOptions, SystemInfo } from 'crawlee'; import { FinalStatistics } from 'crawlee'; import { PlaywrightCrawler, PlaywrightCrawlerOptions, Configuration, RequestOptions } from 'crawlee'; import { NormalizedUrl } from '@autogram/url-tools'; import { UniqueUrl } from '../model/index.js'; import { InternalSpiderOptions, SpiderOptions, SpiderContext, SpiderStatus } from './index.js'; type SpiderEventMap = Record & { systemInfo: [status: SystemInfo & SpiderStatus]; progress: [status: SpiderStatus, url: string]; end: [status: SpiderStatus & FinalStatistics]; aborting: [reason: string]; exiting: [reason: string]; }; type SpiderEventType = keyof SpiderEventMap; type SpiderEventParams = SpiderEventMap[T]; type SpiderEventListener = (...args: SpiderEventParams) => unknown; type RequestValue = string | Request | RequestOptions | NormalizedUrl | UniqueUrl; export declare class Spider extends PlaywrightCrawler { spiderOptions: InternalSpiderOptions; crawlerOptions: PlaywrightCrawlerOptions; status: SpiderStatus; protected _events: AsyncEventEmitter; constructor(options?: Partial, config?: Configuration); protected _runRequestHandler(context: PlaywrightCrawlingContext): Promise; protected _navigationHandler(crawlingContext: PlaywrightCrawlingContext, gotoOptions: PlaywrightDirectNavigationOptions): Promise; /** * Respond to an internal Spider event. * * - `systemInfo`: Fired at regular intervals, summarizing memory and server load * - `progress`: Fired when a specific reqest has been processed * - `end`: Fired when last request in the queue has been processed */ on(event: T, listener: SpiderEventListener): this; off(event: T, listener: SpiderEventListener): this; protected updateStats({ request, requestMeta }: SpiderContext, error?: boolean): void; protected _cleanupContext(context: SpiderContext): Promise; /** * Enqueue a set of URLs and crawl them using the current Spider options. */ run(requests?: RequestValue | RequestValue[]): Promise; /** * Resume the crawl with saved-but-unvisited URLs. */ resume(urls: UniqueUrl[]): Promise<{ requestsFinished: number; requestsFailed: number; retryHistogram: number[]; requestAvgFailedDurationMillis: number; requestAvgFinishedDurationMillis: number; requestsFinishedPerMinute: number; requestsFailedPerMinute: number; requestTotalDurationMillis: number; requestsTotal: number; crawlerRuntimeMillis: number; requestsByStatus: Record; requestsByType: Record; requestsByHost: Record; requestsByLabel: Record; total: number; finished: number; failed: number; lastError?: string; startTime: number; finishTime: number; }>; } export {}; //# sourceMappingURL=spider.d.ts.map