import { FileReader, Document } from '@llamaindex/core/schema';
import { Opts } from 'string-strip-html';

/**
 * Extract the significant text from an arbitrary HTML document.
 * The contents of any head, script, style, and xml tags are removed completely.
 * The URLs for a[href] tags are extracted, along with the inner text of the tag.
 * All other tags are removed, and the inner text is kept intact.
 * Html entities (e.g., &amp;) are not decoded.
 */
declare class HTMLReader extends FileReader<Document> {
    /**
     * Public method for this reader.
     * Required by BaseReader interface.
     * @param fileContent - The content of the file.
     * @returns `Promise<Document[]>` A Promise object, eventually yielding zero or one Document parsed from the HTML content of the specified file.
     */
    loadDataAsContent(fileContent: Uint8Array): Promise<Document[]>;
    /**
     * Wrapper for string-strip-html usage.
     * @param html Raw HTML content to be parsed.
     * @param options An object of options for the underlying library
     * @see getOptions
     * @returns The HTML content, stripped of unwanted tags and attributes
     */
    parseContent(html: string, options?: Partial<Opts>): Promise<string>;
    /**
     * Wrapper for our configuration options passed to string-strip-html library
     * @see https://codsen.com/os/string-strip-html/examples
     * @returns An object of options for the underlying library
     */
    getOptions(): Partial<Opts>;
}

export { HTMLReader };