import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
import type { ExecutionTools } from '../../execution/ExecutionTools';
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
import type { Converter } from '../_common/Converter';
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
import type { Scraper, ScraperSourceHandler } from '../_common/Scraper';
import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
/**
 * Scraper of .docx and .odt files
 *
 * @see `documentationUrl` for more details
 *
 * @public exported from `@promptbook/documents`
 */
export declare class DocumentScraper implements Converter, Scraper {
    private readonly tools;
    private readonly options;
    /**
     * Metadata of the scraper which includes title, mime types, etc.
     */
    get metadata(): ScraperAndConverterMetadata;
    /**
     * Markdown scraper is used internally
     */
    private readonly markdownScraper;
    constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
    /**
     * Convert the `.docx` or `.odt`  to `.md` file and returns intermediate source
     *
     * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
     */
    $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
    /**
     * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
     */
    scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
}