/** * Column reordering based on entropy analysis * * Reorders columns to optimize tokenizer performance by placing * low-entropy (repetitive) columns first, which provides better * context for BPE tokenizers. */ import type { ColumnReorderResult } from './types.js'; /** * Column reorderer with entropy-based optimization */ export declare class ColumnReorderer { /** * Calculate Shannon entropy for column values * * H(X) = -Σ p(x) * log₂(p(x)) * * Lower entropy = more repetitive = better for left context * Higher entropy = more unique = worse for left context * * @param values - Array of values in the column * @returns Entropy value (0 = all same, higher = more diverse) */ calculateEntropy(values: any[]): number; /** * Analyze all columns and calculate their entropies * * @param data - Array of objects * @param columns - Column names to analyze * @returns Map of column name to entropy value */ analyzeColumns(data: any[], columns: string[]): Map; /** * Reorder columns by ascending entropy (low to high) * * Low entropy columns (repetitive) come first to provide * better context for tokenizers. * * @param data - Array of objects * @param columns - Original column order * @returns Reordering result with new order and mapping */ reorderColumns(data: any[], columns: string[]): ColumnReorderResult; /** * Generate column mapping directive * * Format: @colmap 2,0,1,3,4 * Represents original indices in optimized order * * @param mapping - Array of original indices * @returns TONL directive string */ generateMappingDirective(mapping: number[]): string; /** * Parse column mapping directive * * @param directive - TONL directive like "@colmap 2,0,1" * @returns Array of original indices */ parseMappingDirective(directive: string): number[]; /** * Restore original column order using mapping * * @param reorderedColumns - Columns in optimized order * @param mapping - Original indices in optimized order * @returns Columns in original order */ restoreOriginalOrder(reorderedColumns: string[], mapping: number[]): string[]; /** * Reorder row data according to column mapping * * @param row - Object with data * @param originalColumns - Original column order * @param reorderedColumns - New column order * @returns Object with reordered fields */ reorderRow(row: any, originalColumns: string[], reorderedColumns: string[]): any; /** * Calculate potential token savings from reordering * * This is an estimate based on entropy reduction in left-context. * Lower entropy first = better compression in BPE tokenizers. * * @param entropies - Map of column to entropy * @param originalOrder - Original column order * @param optimizedOrder - Optimized column order * @returns Estimated token savings percentage (0-1) */ estimateSavings(entropies: Map, originalOrder: string[], optimizedOrder: string[]): number; /** * Check if reordering would be beneficial * * @param data - Array of objects * @param columns - Column names * @param minSavingsThreshold - Minimum savings to apply reordering (default: 0.05 = 5%) * @returns True if reordering is recommended */ shouldReorder(data: any[], columns: string[], minSavingsThreshold?: number): boolean; } //# sourceMappingURL=column-reorder.d.ts.map