import { EventEmitter } from 'eventemitter3'; import { ScraperEngine, ScraperDefinition, ScraperResult, ScraperExecutionOptions, ScraperHook, HookHandler, ScraperPlugin, ScraperEvents, ScraperEngineConfig, Logger } from './types'; /** * @file Provides the core implementation of the ScraperEngine, orchestrating * the entire scraping lifecycle including browser management, hook execution, * plugin integration, and data parsing. */ /** * The main engine for managing and executing scrapers. * It handles the browser pool, plugin lifecycle, global and scraper-specific hooks, * and the overall execution flow of scraper definitions. * Emits various events throughout the scraping lifecycle (see {@link ScraperEvents}). */ export declare class CrawleeScraperEngine extends EventEmitter implements ScraperEngine { private browserPool; private logger; private config; private definitions; private plugins; private globalHooks; /** * Creates an instance of the CrawleeScraperEngine. * @param config The configuration object for the scraper engine. See {@link ScraperEngineConfig}. * @param logger An instance of a logger conforming to the {@link Logger} interface. */ constructor(config: ScraperEngineConfig, logger: Logger); /** * Executes a registered scraper definition with the given input and runtime options. * This method orchestrates the entire scraping lifecycle for a single task, including: * - Input validation. * - Acquiring a browser instance from the pool. * - Executing `beforeRequest`, scraper `parse`, `afterRequest`, `onSuccess` hooks. * - Handling retries with `onRetry` hooks upon failure. * - Managing errors with `onError` hooks. * - Output validation. * - Releasing the browser instance. * Emits `scraper:start`, `scraper:success`, `scraper:error`, and `scraper:retry` events. * * @template Input The type of the input data the scraper expects. * @template Output The type of the data the scraper's `parse` function will return. * @param definition The {@link ScraperDefinition} to execute. * @param input The input data to pass to the scraper. * @param options Optional. Partial {@link ScraperExecutionOptions} that can override * default and definition-specific options for this execution. * @returns A Promise that resolves to a {@link ScraperResult} containing the outcome of the execution. */ execute(definition: ScraperDefinition, input: Input, options?: Partial): Promise>; /** * Registers a scraper definition with the engine, making it available for execution. * If a definition with the same ID already exists, it will be overwritten. * @template Input The type of the input data the scraper definition expects. * @template Output The type of the data the scraper definition will output. * @param definition The {@link ScraperDefinition} to register. */ register(definition: ScraperDefinition): void; /** * Retrieves a registered scraper definition by its ID. * @param id The unique identifier of the scraper definition. * @returns The {@link ScraperDefinition} if found, otherwise `undefined`. */ getDefinition(id: string): ScraperDefinition | undefined; /** * Lists all currently registered scraper definitions. * @returns An array of {@link ScraperDefinition} objects. */ listDefinitions(): ScraperDefinition[]; /** * Installs a plugin, allowing it to extend the engine's functionality. * The plugin's `install` method will be called with this engine instance. * @param plugin The {@link ScraperPlugin} instance to install. */ use(plugin: ScraperPlugin): void; /** * Adds a global hook handler for a specified lifecycle event. * Global hooks are executed for all scrapers managed by this engine. * @param hook The {@link ScraperHook} event type (e.g., 'beforeRequest', 'onError'). * @param handler The {@link HookHandler} function to execute when the event occurs. */ addHook(hook: ScraperHook, handler: HookHandler): void; /** * Removes a previously added global hook handler. * @param hook The {@link ScraperHook} event type. * @param handler The specific {@link HookHandler} function to remove. * It must be the same function reference that was originally added. */ removeHook(hook: ScraperHook, handler: HookHandler): void; /** * Gracefully shuts down the scraper engine. * This includes uninstalling all plugins that have an `uninstall` method * and shutting down the browser pool, closing all browser instances. * @returns A Promise that resolves when shutdown is complete. */ shutdown(): Promise; /** * Execute the actual scraper logic */ private executeScraper; /** * Handle navigation based on the navigation strategy */ private handleNavigation; /** * Handle wait strategy */ private handleWaitStrategy; /** * Execute hooks for a specific event */ private executeHooks; /** * Build API URL with parameters */ private buildApiUrl; /** * Create success result */ private createSuccessResult; /** * Create error result */ private createErrorResult; /** * Initialize global hooks from config */ private initializeGlobalHooks; /** * Delay execution */ private delay; } //# sourceMappingURL=scraper.d.ts.map