/** * HTML entity decoding and tag stripping utilities for EDINET XBRL data. * * EDINET XBRL files contain text blocks with escaped HTML entities (e.g., <p>, &apos;). * This module provides functions to decode these entities and strip HTML tags to produce clean text. */ /** * Decodes HTML entities in a string. * Supports common HTML entities like <, >, &, ", ', and numeric entities. * * @param text - Text containing HTML entities * @returns Text with HTML entities decoded */ export declare function decodeHtmlEntities(text: string): string; /** * Removes HTML tags from a string. * This is a simple regex-based approach suitable for cleaning XBRL text blocks. * * IMPORTANT: This function is for EXTRACTING text content from HTML, not for sanitizing * untrusted input for HTML rendering. The output is plain text and should not be * rendered as HTML. For HTML sanitization, use a dedicated library like DOMPurify. * * @param text - Text containing HTML tags * @returns Text with HTML tags removed */ export declare function stripHtmlTags(text: string): string; /** * Decodes HTML entities and removes HTML tags from text. * This is the main function to clean EDINET XBRL text blocks. * * @param text - Text with HTML entities and tags * @returns Clean text without HTML entities or tags */ export declare function cleanHtml(text: string | undefined): string | undefined;