/** * TypeScript definitions for Speech ASR */ export interface SpeechASROptions { /** VAD mode: 'silero', 'ten', or 'off' */ vadMode?: 'silero' | 'ten' | 'off'; /** VAD configuration */ vadConfig?: VADConfig | null; /** Online model: 'zipformer' or 'paraformer' */ onlineModel?: 'zipformer' | 'paraformer'; /** Model file paths */ modelPaths?: ModelPaths; /** Two-pass recognition configuration */ twoPass?: TwoPassConfig; /** Translation configuration */ translation?: TranslationConfig; /** Audio sample rate (default: 16000) */ sampleRate?: number; /** Audio buffer size (default: 4096) */ bufferSize?: number; /** Optional: override the base URL for worker/WASM assets (default: auto-detected) */ assetBaseUrl?: string; /** Callback when final result is available */ onResult?: (text: string) => void; /** Callback for partial/interim results */ onPartial?: (text: string) => void; /** Callback when a segment is completed */ onSegment?: (segment: Segment) => void; /** Error callback */ onError?: (error: ASRError) => void; /** Callback when ASR is ready */ onReady?: () => void; } export interface VADConfig { /** Minimum silence duration in ms */ minSilenceDuration?: number; /** Speech pad duration in ms */ speechPadDuration?: number; /** Threshold for speech detection */ threshold?: number; } export interface ModelPaths { /** sherpa-onnx *.data model bundle (required) */ data: string; /** Optional: Custom base path for SDK resources (default: auto-detected from node_modules) */ basePath?: string; /** Optional: Path to sherpa-onnx-wasm-main-asr.js (for Vite/Webpack environments with explicit imports) */ wasmJs?: string; /** Optional: Path to sherpa-onnx-wasm-main-asr.wasm (for Vite/Webpack environments with explicit imports) */ wasm?: string; /** Optional: Path to sherpa-onnx-asr.js (for Vite/Webpack environments with explicit imports) */ asrJs?: string; /** Optional: Path to sherpa-onnx-vad.js (for Vite/Webpack environments with explicit imports) */ vadJs?: string; /** Optional: Path to offline-worker.js (for Vite/Webpack environments with explicit imports) */ offlineWorker?: string; } export interface TwoPassConfig { /** Enable two-pass recognition */ enabled?: boolean; /** Backend: 'wasm' or 'webgpu' */ backend?: 'wasm' | 'webgpu'; } export interface TranslationConfig { /** Enable translation */ enabled?: boolean; /** API endpoint */ endpoint?: string; /** API key */ apiKey?: string; /** Model name */ model?: string; /** Translation direction: 'en-zh' or 'zh-en' */ direction?: 'en-zh' | 'zh-en'; } export interface Segment { /** Segment text */ text: string; /** Start time in seconds */ startTime?: number; /** End time in seconds */ endTime?: number; /** Confidence score */ confidence?: number; } export interface ASRError { /** Error message */ message: string; /** Original error object */ error?: Error; } export interface RecognitionResult { /** Recognition text */ text: string; /** List of segments */ segments?: Segment[]; /** Translation (if enabled) */ translation?: string; } /** * Easy ASR - Real-time Speech Recognition SDK */ export class SpeechASR { /** * Create a new SpeechASR instance * @param options Configuration options */ constructor(options?: SpeechASROptions); /** * Initialize the ASR engine * Must be called before start() or transcribeFile() */ init(): Promise; /** * Start recording and real-time recognition * Requires microphone permission */ start(): Promise; /** * Stop recording and recognition */ stop(): void; /** * Transcribe an audio file * @param audioFile Audio file (File or Blob) * @returns Transcription result */ transcribeFile(audioFile: File | Blob): Promise; /** * Get the current recognition result */ getResult(): string; /** * Clear all recognition results */ clear(): void; /** * Destroy the ASR instance and free resources */ destroy(): void; /** Whether ASR is ready */ readonly isReady: boolean; /** Whether currently recording */ readonly isRecording: boolean; } export type EasyASROptions = SpeechASROptions; export default SpeechASR;