import { ConsoleLogger } from '@tarko/mcp-agent';
import { Page } from '@agent-infra/browser';
/**
 * Content extraction result with pagination information
 */
export interface PaginatedContentResult {
    /** The extracted content in markdown format */
    content: string;
    /** Total number of pages */
    totalPages: number;
    /** Current page number */
    currentPage: number;
    /** Whether there are more pages available */
    hasMorePages: boolean;
    /** Original page title */
    title?: string;
}
/**
 * PaginatedContentExtractor - Memory-efficient content extraction with pagination support
 *
 * This class leverages the Mozilla Readability algorithm to extract the main content
 * from web pages while supporting pagination to prevent memory issues on large pages.
 *
 * Key features:
 * - Uses Readability to isolate valuable content from web pages
 * - Converts HTML to markdown for better token efficiency
 * - Implements pagination to limit memory usage
 * - Provides detailed pagination metadata
 */
export declare class PaginatedContentExtractor {
    private readonly logger;
    private readonly pageSize;
    /**
     * Create a new paginated content extractor
     *
     * @param logger - Logger instance for debugging and error reporting
     * @param pageSize - Maximum number of characters per page
     */
    constructor(logger: ConsoleLogger, pageSize?: number);
    /**
     * Extract content from a web page with pagination support
     *
     * @param page - Puppeteer page object
     * @param pageNumber - Page number to extract (1-based index)
     * @returns Promise with paginated content result
     */
    extractContent(page: Page, pageNumber?: number): Promise<PaginatedContentResult>;
}
//# sourceMappingURL=content-extractor.d.ts.map