/** * TypeScript definitions for Speech ASR */ export interface SpeechASROptions { /** VAD mode: 'silero', 'ten', or 'off' */ vadMode?: 'silero' | 'ten' | 'off'; /** VAD configuration */ vadConfig?: VADConfig | null; /** Online model: 'zipformer' or 'paraformer' */ onlineModel?: 'zipformer' | 'paraformer'; /** Model file paths */ modelPaths?: ModelPaths; /** Two-pass recognition configuration */ twoPass?: TwoPassConfig; /** Translation configuration */ translation?: TranslationConfig; /** Audio sample rate (default: 16000) */ sampleRate?: number; /** Audio buffer size (default: 4096) */ bufferSize?: number; /** Callback when final result is available */ onResult?: (text: string) => void; /** Callback for partial/interim results */ onPartial?: (text: string) => void; /** Callback when a segment is completed */ onSegment?: (segment: Segment) => void; /** Error callback */ onError?: (error: ASRError) => void; /** Callback when ASR is ready */ onReady?: () => void; } export interface VADConfig { /** Minimum silence duration in ms */ minSilenceDuration?: number; /** Speech pad duration in ms */ speechPadDuration?: number; /** Threshold for speech detection */ threshold?: number; } export interface ModelPaths { /** * Model directory path (recommended) * SDK will automatically append file names like: * - sherpa-onnx-wasm-main-asr.data * - sherpa-onnx-wasm-main-asr.js * - sherpa-onnx-wasm-main-asr.wasm * - sherpa-onnx-asr.js * - sherpa-onnx-vad.js (when vadMode='silero') * - offline-worker.js (when twoPass.enabled=true) */ m_path?: string; /** sherpa-onnx *.data model bundle (required if m_path is not set) */ data?: string; /** sherpa-onnx wasm js wrapper */ wasmJs?: string; /** sherpa-onnx wasm runtime */ wasm?: string; /** sherpa-onnx helper functions (sherpa-onnx-asr.js) */ asrJs?: string; /** sherpa-onnx VAD helper (sherpa-onnx-vad.js) - required when vadMode='silero' */ vadJs?: string; /** Offline worker script (offline-worker.js) - required when twoPass.enabled=true */ offlineWorker?: string; } export interface TwoPassConfig { /** Enable two-pass recognition */ enabled?: boolean; /** Backend: 'wasm' or 'webgpu' */ backend?: 'wasm' | 'webgpu'; /** * Mode: * - 'auto': run two-pass for each segment (default) * - 'manual-stop': only run two-pass once after stop() */ mode?: 'auto' | 'manual-stop'; /** * When true and mode='manual-stop', skip online first-pass to save CPU and * only run offline two-pass on stop(). */ disableOnlineInManual?: boolean; /** * Warm up offline worker/models right after init() to reduce the latency * of the first two-pass invocation. Default: true. */ autoWarmup?: boolean; } export interface TranslationConfig { /** Enable translation */ enabled?: boolean; /** API endpoint */ endpoint?: string; /** API key */ apiKey?: string; /** Model name */ model?: string; /** Translation direction: 'en-zh' or 'zh-en' */ direction?: 'en-zh' | 'zh-en'; } export interface Segment { /** Segment text */ text: string; /** Start time in seconds */ startTime?: number; /** End time in seconds */ endTime?: number; /** Confidence score */ confidence?: number; } export interface ASRError { /** Error message */ message: string; /** Original error object */ error?: Error; } export interface RecognitionResult { /** Recognition text */ text: string; /** List of segments */ segments?: Segment[]; /** Translation (if enabled) */ translation?: string; } /** * Easy ASR - Real-time Speech Recognition SDK */ export class SpeechASR { /** * Create a new SpeechASR instance * @param options Configuration options */ constructor(options?: SpeechASROptions); /** * Initialize the ASR engine * Must be called before start() or transcribeFile() */ init(): Promise; /** * Start recording and real-time recognition * Requires microphone permission */ start(): Promise; /** * Stop recording and recognition */ stop(): void; /** * Transcribe an audio file * @param audioFile Audio file (File or Blob) * @returns Transcription result */ transcribeFile(audioFile: File | Blob): Promise; /** * Get the current recognition result */ getResult(): string; /** * Clear all recognition results */ clear(): void; /** * Destroy the ASR instance and free resources */ destroy(): void; /** Whether ASR is ready */ readonly isReady: boolean; /** Whether currently recording */ readonly isRecording: boolean; } export type EasyASROptions = SpeechASROptions; export default SpeechASR;