/** * Log Mel-filterbank feature extraction. * Exact port of the AudioUtils used by EfficientWord-Net InBrowser. * - 64 Mel filters * - 512-point FFT * - 25 ms window / 10 ms step at 16 kHz * Produces a [149 × 64] spectrogram from a 1.5 s (24 000-sample) input, * matching the [1, 1, 149, 64] tensor expected by the ONNX Siamese model. */ export declare class AudioUtils { private _nfft; private _nfilt; private _sampleRate; private _melFilters; private _fft; constructor(sampleRate?: number, nfft?: number, nfilt?: number); private _hzToMel; private _melToHz; private _createMelFilterbank; /** Returns a flat Float32Array of shape [numFrames × nfilt]. */ logfbank(signal: Float32Array): Float32Array; maxCosineSim(embedding: Float32Array, refs: number[][]): number; } export declare interface Command { name: string; triggers: Trigger[]; onMatch?: (trigger: TriggerName, confidence: number) => any; } export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js"; export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx"; export declare const DEFAULT_ORT_CDN_URL = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs"; export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs"; export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold"; export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/"; export declare class Detector { private _refsStorageKey; private _thresholdStorageKey; private _audioProcessorPath; private _modelPath; private _wasmPaths; private _ortCdnUrl; private _started; private _inferring; private _audioCtx; private _stream; private _refEmbeddings; private _lastMatchAt; private _lastInferenceAt; private _audioUtils; private _commands; private _threshold; private _initPromise; private _log; get threshold(): number; set threshold(value: number); get listening(): boolean; constructor(commands: Command[], config?: DetectorConfig); /** * Streams `url`, calling `onProgress(downloaded, total)` after each chunk. * Falls back to a single-shot fetch when the body stream is unavailable. */ private _trackFetch; private _init; /** * Loads the ONNX model and all reference embeddings. * Must be called before {@link start}. * Safe to call multiple times — the work is only done once. * * @param onProgress - optional callback invoked as each asset is loaded */ init(onProgress?: ProgressCallback): Promise; /** Adds (or replaces) the reference embeddings for a word without restarting. */ addCustomWord(ref: WordRef): void; start(): Promise; stop(): Promise; private _handleBuffer; } export declare interface DetectorConfig { refsStorageKey?: string; thresholdStorageKey?: string; wasmPaths?: string; modelPath?: string; audioProcessorPath?: string; ortCdnUrl?: string; audioUtils?: AudioUtils; /** Enable console logging. Pass `true` for info+warn+error, or a custom logger. Defaults to silent. */ log?: boolean | { info?: (...a: unknown[]) => void; warn?: (...a: unknown[]) => void; error?: (...a: unknown[]) => void; }; } /** * Records voice samples and generates an EfficientWord-Net reference file * (compatible with the JSON format used by the ONNX Siamese model). * * Usage: * const session = new EnrollmentSession('suivant'); * const count = await session.recordSample(); // repeat ≥ 3 times * const ref = await session.generateRef(); // needs ≥ 3 samples * Mellon.saveWord(ref); * mellon.addCustomWord(ref); */ export declare class EnrollmentSession { private _config; private _wordName; private _samples; private _audioUtils; constructor(wordName: string, config?: EnrollmentSessionConfig); /** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */ recordSample(): Promise; /** Returns the raw PCM Float32Array for the sample at the given index (16 kHz). */ getSample(index: number): Float32Array; /** Removes the sample at the given index. Returns the new sample count. */ deleteSample(index: number): number; /** Runs ONNX inference on every recorded sample to produce reference embeddings. */ generateRef(): Promise; } export declare interface EnrollmentSessionConfig { wasmPaths?: string; modelPath?: string; ortCdnUrl?: string; audioUtils?: AudioUtils; } /** * Called during {@link Detector.init} to report real download progress. * @param downloaded - total bytes received so far across all assets * @param total - sum of known Content-Length values for all assets; * may still be 0 early on (before first header is received) */ export declare type ProgressCallback = (downloaded: number, total: number) => void; declare class Storage_2 { static loadWords(storageKey?: string): WordRef[]; static saveWord(ref: WordRef, storageKey?: string): void; static deleteWord(wordName: string, storageKey?: string): void; } export { Storage_2 as Storage } export declare interface Trigger { name: TriggerName; defaultRefPath?: string; } export declare type TriggerName = string; export declare interface WordRef { word_name: TriggerName; model_type?: string; embeddings: number[][]; } export { }