/** * Unicode Text Segmentation (UAX #29) * Grapheme cluster and word boundaries */ import type { GlyphInfo } from "../types.ts"; /** * Grapheme cluster break property */ export declare enum GraphemeBreakProperty { Other = 0, CR = 1, LF = 2, Control = 3, Extend = 4, ZWJ = 5, Regional_Indicator = 6, Prepend = 7, SpacingMark = 8, L = 9,// Hangul L V = 10,// Hangul V T = 11,// Hangul T LV = 12,// Hangul LV LVT = 13,// Hangul LVT Extended_Pictographic = 14 } /** * Word break property */ export declare enum WordBreakProperty { Other = 0, CR = 1, LF = 2, Newline = 3, Extend = 4, ZWJ = 5, Regional_Indicator = 6, Format = 7, Katakana = 8, Hebrew_Letter = 9, ALetter = 10, Single_Quote = 11, Double_Quote = 12, MidNumLet = 13, MidLetter = 14, MidNum = 15, Numeric = 16, ExtendNumLet = 17, WSegSpace = 18, Extended_Pictographic = 19 } /** * Get grapheme break property for codepoint * @param cp Unicode codepoint to check * @returns Grapheme break property for the codepoint */ export declare function getGraphemeBreakProperty(cp: number): GraphemeBreakProperty; /** * Get word break property for codepoint * @param cp Unicode codepoint to check * @returns Word break property for the codepoint */ export declare function getWordBreakProperty(cp: number): WordBreakProperty; /** * Grapheme cluster boundary result */ export interface GraphemeBoundaries { /** Boundary positions (indices where clusters end) */ boundaries: number[]; /** Grapheme break properties */ properties: GraphemeBreakProperty[]; } /** * Find grapheme cluster boundaries in codepoints * @param codepoints Array of Unicode codepoints * @returns Object containing boundary positions and grapheme break properties */ export declare function findGraphemeBoundaries(codepoints: number[]): GraphemeBoundaries; /** * Word boundary result */ export interface WordBoundaries { /** Boundary positions */ boundaries: number[]; /** Word break properties */ properties: WordBreakProperty[]; } /** * Find word boundaries in codepoints * @param codepoints Array of Unicode codepoints * @returns Object containing boundary positions and word break properties */ export declare function findWordBoundaries(codepoints: number[]): WordBoundaries; /** * Split text into grapheme clusters * @param text Text string to split * @returns Array of grapheme cluster strings */ export declare function splitGraphemes(text: string): string[]; /** * Split text into words * @param text Text string to split * @returns Array of word strings (whitespace-only segments are filtered out) */ export declare function splitWords(text: string): string[]; /** * Count grapheme clusters in text * @param text Text string to analyze * @returns Number of grapheme clusters in the text */ export declare function countGraphemes(text: string): number; /** * Analyze grapheme boundaries for glyph infos * @param infos Array of glyph information objects * @returns Object containing boundary positions and grapheme break properties */ export declare function analyzeGraphemesForGlyphs(infos: GlyphInfo[]): GraphemeBoundaries; /** * Analyze word boundaries for glyph infos * @param infos Array of glyph information objects * @returns Object containing boundary positions and word break properties */ export declare function analyzeWordsForGlyphs(infos: GlyphInfo[]): WordBoundaries;