// Type definitions for @willwade/noisy-channel-predictor // Project: https://github.com/willwade/noisy-channel-correction // Definitions by: Will Wade /** * Configuration options for the predictor. */ export interface PredictorConfig { /** Maximum context length for PPM (default: 5) */ maxOrder?: number; /** Enable error-tolerant mode (default: false) */ errorTolerant?: boolean; /** Maximum edit distance for fuzzy matching (default: 2) */ maxEditDistance?: number; /** Minimum similarity score 0-1 (default: 0.5) */ minSimilarity?: number; /** Use keyboard-aware distance (default: false) */ keyboardAware?: boolean; /** Custom adjacency map for keyboard-aware distance */ keyboardAdjacencyMap?: Record; /** Case-sensitive matching (default: false) */ caseSensitive?: boolean; /** Maximum number of predictions to return (default: 10) */ maxPredictions?: number; /** Update model as text is entered (default: false) */ adaptive?: boolean; /** Optional word list for word prediction */ lexicon?: string[]; /** PPM smoothing alpha (default: 0.49) */ ppmAlpha?: number; /** PPM smoothing beta (default: 0.77) */ ppmBeta?: number; /** Enable inference-time exclusion in PPM (default: true) */ ppmUseExclusion?: boolean; /** Enable single-count updates in PPM (default: true) */ ppmUpdateExclusion?: boolean; /** Maximum trie nodes per corpus model (0 = unlimited) */ ppmMaxNodes?: number; } /** * PPM parameter options. */ export interface PPMOptions { /** Smoothing alpha (default: 0.49) */ alpha?: number; /** Smoothing beta (default: 0.77) */ beta?: number; /** Enable exclusion at inference time */ useExclusion?: boolean; /** Enable "single counting" updates */ updateExclusion?: boolean; /** Maximum trie nodes for the model (0 = unlimited) */ maxNodes?: number; } /** * Prediction result. */ export interface Prediction { /** Predicted text */ text: string; /** Probability score (0-1) */ probability: number; /** Edit distance (only in error-tolerant mode) */ distance?: number; /** Similarity score (only in error-tolerant mode) */ similarity?: number; } /** * Options for adding a training corpus. */ export interface CorpusOptions { /** Human-readable description of the corpus */ description?: string; /** Whether this corpus should be active (default: true) */ enabled?: boolean; /** Optional word list specific to this corpus (e.g., French words for French corpus) */ lexicon?: string[]; } /** * Information about a training corpus. */ export interface CorpusInfo { /** Corpus identifier */ key: string; /** Human-readable description */ description: string; /** Whether corpus is currently enabled */ enabled: boolean; } /** * Bigram statistics. */ export interface BigramStats { /** Number of unique bigrams learned */ uniqueBigrams: number; /** Total bigram occurrences */ totalBigrams: number; } /** * Predictor class providing word and letter prediction. */ export class Predictor { /** * Create a new predictor. * @param config Configuration options */ constructor(config?: PredictorConfig); /** * Train the default corpus on text. * For multi-corpus training, use addTrainingCorpus() instead. * @param text Training text */ train(text: string): void; /** * Add a new training corpus with a unique identifier. * @param corpusKey Unique identifier for this corpus (e.g., 'medical', 'personal') * @param text Training text for this corpus * @param options Optional configuration */ addTrainingCorpus(corpusKey: string, text: string, options?: CorpusOptions): void; /** * Enable specific training corpora for predictions. * Disables all other corpora. * @param corpusKeys Single corpus key or array of corpus keys to use */ useCorpora(corpusKeys: string | string[]): void; /** * Enable all loaded training corpora for predictions. */ useAllCorpora(): void; /** * Get list of available corpus keys. * @param onlyEnabled If true, only return enabled corpora * @returns Array of corpus keys */ getCorpora(onlyEnabled?: boolean): string[]; /** * Get information about a specific corpus. * @param corpusKey Corpus identifier * @returns Corpus information */ getCorpusInfo(corpusKey: string): CorpusInfo; /** * Remove a training corpus. * Cannot remove the 'default' corpus. * @param corpusKey Corpus identifier to remove */ removeCorpus(corpusKey: string): void; /** * Reset the prediction context. */ resetContext(): void; /** * Add text to the current context. * @param text Text to add to context * @param update Whether to update the model (defaults to config.adaptive) */ addToContext(text: string, update?: boolean): void; /** * Get character/letter predictions. * Merges predictions from all active training corpora. * @param context Optional context string (uses current context if not provided) * @returns Array of character predictions */ predictNextCharacter(context?: string): Prediction[]; /** * Get word completion predictions. * @param partialWord Partial word to complete * @param precedingContext Optional preceding context * @returns Array of word predictions */ predictWordCompletion(partialWord: string, precedingContext?: string): Prediction[]; /** * Predict next word based on bigram frequencies. * @param currentWord The current/last word typed * @param maxPredictions Maximum number of predictions to return (default: 10) * @returns Array of next-word predictions sorted by probability */ predictNextWord(currentWord: string, maxPredictions?: number): Prediction[]; /** * Export learned bigrams as text. * @returns Bigrams in text format (one per line: "word1 word2 count") */ exportBigrams(): string; /** * Import bigrams from text. * @param bigramText Bigrams in text format */ importBigrams(bigramText: string): void; /** * Clear all learned bigrams. */ clearBigrams(): void; /** * Get bigram statistics. * @returns Bigram statistics */ getBigramStats(): BigramStats; /** * Get PPM stats per corpus (node counts and budget counters). */ getPPMStats(): Record; /** * Get current configuration. * @returns Current configuration */ getConfig(): PredictorConfig; /** * Update configuration. * @param newConfig Configuration updates */ updateConfig(newConfig: Partial): void; } /** * PPM Language Model class (for advanced usage). */ export class PPMLanguageModel { constructor(vocab: Vocabulary, maxOrder: number, options?: PPMOptions); setParameters(options: PPMOptions): void; getStats(): { numNodes: number; maxNodes: number; skippedNodeAdds: number }; createContext(): any; cloneContext(context: any): any; addSymbolToContext(context: any, symbol: number): void; addSymbolAndUpdate(context: any, symbol: number): void; getProbs(context: any): number[]; printToConsole(): void; } /** * Vocabulary class (for advanced usage). */ export class Vocabulary { constructor(); addSymbol(symbol: string): number; getSymbol(symbol: string): number; getSymbolOrOOV(symbol: string): number; size(): number; symbols_: string[]; } /** * Fuzzy matcher utilities. */ export namespace fuzzyMatcher { /** * Calculate Levenshtein distance between two strings. */ function levenshteinDistance(str1: string, str2: string): number; /** * Calculate similarity score between two strings (0-1). */ function similarityScore(str1: string, str2: string): number; /** * Check if a string starts with a prefix. */ function startsWith(str: string, prefix: string, caseSensitive?: boolean): boolean; /** * Filter and rank strings by similarity to a target string. */ function fuzzyMatch( target: string, candidates: string[], maxDistance?: number, minSimilarity?: number ): Array<{ text: string; distance: number; similarity: number }>; /** * Get keyboard adjacency map for QWERTY layout. */ function getQwertyAdjacency(): Record; /** * Check if two characters are adjacent on a QWERTY keyboard. */ function areKeysAdjacent( char1: string, char2: string, adjacency?: Record ): boolean; /** * Calculate keyboard-aware edit distance. */ function keyboardAwareDistance( str1: string, str2: string, adjacency?: Record ): number; } /** * Word tokenizer utilities. */ export namespace wordTokenizer { /** * Tokenize text into words. */ function tokenize(text: string): string[]; /** * Get the last partial word from text. */ function getLastPartialWord(text: string): string; /** * Get the context (all words except the last partial word). */ function getContext(text: string): string; /** * Check if text ends with a word boundary. */ function endsWithWordBoundary(text: string): boolean; /** * Normalize text for prediction. */ function normalize(text: string, lowercase?: boolean): string; /** * Split text into characters. */ function toCharArray(text: string): string[]; /** * Join an array of characters into a string. */ function fromCharArray(chars: string[]): string; /** * Get n-grams from text. */ function getNgrams(text: string, n: number): string[]; /** * Remove punctuation from text. */ function removePunctuation(text: string): string; /** * Check if a character is alphanumeric. */ function isAlphanumeric(char: string): boolean; /** * Check if a character is whitespace. */ function isWhitespace(char: string): boolean; } /** * Create a new predictor instance with the given configuration. * @param config Configuration options * @returns Predictor instance */ export function createPredictor(config?: PredictorConfig): Predictor; /** * Create a predictor with strict mode (exact matching only). * @param config Configuration options * @returns Predictor instance in strict mode */ export function createStrictPredictor(config?: PredictorConfig): Predictor; /** * Create a predictor with error-tolerant mode enabled. * @param config Configuration options * @returns Predictor instance in error-tolerant mode */ export function createErrorTolerantPredictor(config?: PredictorConfig): Predictor; /** * Calculate edit distance between two strings. * @param str1 First string * @param str2 Second string * @returns Edit distance */ export function levenshteinDistance(str1: string, str2: string): number; /** * Calculate similarity score between two strings. * @param str1 First string * @param str2 Second string * @returns Similarity score (0-1) */ export function similarityScore(str1: string, str2: string): number;