import { ConsoleLogger } from '@tarko/mcp-agent'; import { Page } from '@agent-infra/browser'; /** * Content extraction result with pagination information */ export interface PaginatedContentResult { /** The extracted content in markdown format */ content: string; /** Total number of pages */ totalPages: number; /** Current page number */ currentPage: number; /** Whether there are more pages available */ hasMorePages: boolean; /** Original page title */ title?: string; } /** * PaginatedContentExtractor - Memory-efficient content extraction with pagination support * * This class leverages the Mozilla Readability algorithm to extract the main content * from web pages while supporting pagination to prevent memory issues on large pages. * * Key features: * - Uses Readability to isolate valuable content from web pages * - Converts HTML to markdown for better token efficiency * - Implements pagination to limit memory usage * - Provides detailed pagination metadata */ export declare class PaginatedContentExtractor { private readonly logger; private readonly pageSize; /** * Create a new paginated content extractor * * @param logger - Logger instance for debugging and error reporting * @param pageSize - Maximum number of characters per page */ constructor(logger: ConsoleLogger, pageSize?: number); /** * Extract content from a web page with pagination support * * @param page - Puppeteer page object * @param pageNumber - Page number to extract (1-based index) * @returns Promise with paginated content result */ extractContent(page: Page, pageNumber?: number): Promise; } //# sourceMappingURL=content-extractor.d.ts.map