import { TextTools, Resource } from '../../index.js';
import { HtmlToTextOptions } from './get-plaintext.js';
export interface PageContent extends Record<string, unknown> {
    text?: string;
    readability?: TextTools.ReadabilityScore;
}
/**
 * Options to control the extraction of core content from an HTML page
 */
export interface PageContentOptions {
    /**
     * Generate a plaintext version of the HTML.
     *
     * @remarks
     * Other options like {@link PageContentOptions.selector | selector},
     * {@link PageContentOptions.allowMultipleContentElements | allowMultipleContentElements}, and
     * {@link PageContentOptions.defaultToFullDocument | defaultToFullDocument} will override the
     * equivalent values in this configuration object.
     *
     * @see {@link https://github.com/html-to-text/node-html-to-text/tree/master/packages/html-to-text | Html-To-Text docs} for details
     */
    htmlToText?: HtmlToTextOptions;
    /**
     * One or more CSS selectors used to find the markup's primary content.
     *
     * @remarks
     * This option is prefered over {@link HtmlToTextOptions.baseElements.selectors | baseElements.selectors}
     * on the {@link PageContentOptions.htmlToText | text} option. HtmlToText is good, but its support for
     * some selectors is limited and can generate surprising results. Whenever possible, use this option instead.
     */
    selector?: string | string[];
    /**
     * Allow multiple page elements to be treated as the markup's 'primary content'.
     *
     * @remarks
     * Setting this to `true`  is equivalent to setting {@link HtmlToTextOptions.limits.maxBaseElements | limits.maxBaseElements}
     * on the {@link PageContentOptions.htmlToText | text} option to `1`.
     *
     * @defaultValue `false`
     */
    allowMultipleContentElements?: boolean;
    /**
     * Fall back to the full text of the page if the specified selectors have no
     * matches. This will include headers, footers, navigation elements, etc.
     *
     * @remarks
     * Setting this to `true`  is equivalent to setting {@link HtmlToTextOptions.baseElements.returnDomByDefault | baseElements.returnDomByDefault}
     * on the {@link PageContentOptions.htmlToText | text} option.
     *
     * @defaultValue `false`
     */
    defaultToFullDocument?: boolean;
    /**
     * Trim surrounding whitespace around the content's plaintext.
     *
     * @defaultValue `true`
     */
    trim?: boolean;
    /**
     * Calculate the readability score for the page's main content.
     *
     * @defaultValue `true`
     */
    readability?: boolean | TextTools.ReadabilityScoreOptions;
}
/**
 * Extract the core content of an HTML page and return its plaintext, with
 * optional configuration options.
 */
export declare function getPageContent(input: string | cheerio.Root | Resource, customOptions?: PageContentOptions): Promise<PageContent | undefined>;
//# sourceMappingURL=get-page-content.d.ts.map