//#region ../core/src/types/errors.d.ts /** * Error codes for Real-time (WebSocket) API errors */ type RealtimeErrorCode = 'auth_error' | 'bad_request' | 'quota_exceeded' | 'connection_error' | 'network_error' | 'aborted' | 'state_error' | 'realtime_error'; /** * Error codes for HTTP client errors */ type HttpErrorCode = 'network_error' | 'timeout' | 'aborted' | 'http_error' | 'parse_error'; /** * HTTP methods supported by the client */ type HttpMethod = 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE' | 'HEAD'; /** * Error details for SonioxHttpError */ interface HttpErrorDetails { code: HttpErrorCode; message: string; url: string; method: HttpMethod; statusCode?: number | undefined; headers?: Record | undefined; /** Response body text (capped at 4KB) */ bodyText?: string | undefined; cause?: unknown; } /** * All possible SDK error codes (real-time + HTTP-specific codes) */ type SonioxErrorCode = RealtimeErrorCode | 'soniox_error' | HttpErrorCode; //#endregion //#region ../core/src/errors.d.ts declare class SonioxError extends Error { /** * Error code describing the type of error. * Typed as `string` at the base level to allow subclasses (e.g. HTTP errors) * to use their own error code unions. */ readonly code: SonioxErrorCode | (string & {}); /** * HTTP status code when applicable (e.g., 401 for auth errors, 500 for server errors). */ readonly statusCode: number | undefined; /** * The underlying error that caused this error, if any. */ readonly cause: unknown; constructor(message: string, code?: SonioxErrorCode | (string & {}), statusCode?: number, cause?: unknown); /** * Creates a human-readable string representation */ toString(): string; /** * Converts to a plain object for logging/serialization */ toJSON(): Record; } //#endregion //#region ../core/src/http-errors.d.ts /** * HTTP error class for all HTTP-related failures (REST API). * * Thrown when HTTP requests fail due to network issues, timeouts, * server errors, or response parsing failures. */ declare class SonioxHttpError extends SonioxError { /** Categorized HTTP error code */ readonly code: HttpErrorCode; /** Request URL */ readonly url: string; /** HTTP method */ readonly method: HttpMethod; /** Response headers (only for http_error) */ readonly headers: Record | undefined; /** Response body text, capped at 4KB (only for http_error/parse_error) */ readonly bodyText: string | undefined; constructor(details: HttpErrorDetails); /** * Creates a human-readable string representation */ toString(): string; /** * Converts to a plain object for logging/serialization */ toJSON(): Record; } /** * Type guard to check if an error is an AbortError */ declare function isAbortError(error: unknown): boolean; /** * Type guard to check if an error is any SonioxError (base class). * This catches all SDK errors including HTTP and real-time errors. */ declare function isSonioxError(error: unknown): error is SonioxError; /** * Type guard to check if an error is a SonioxHttpError */ declare function isSonioxHttpError(error: unknown): error is SonioxHttpError; /** * Checks if an error is a 404 Not Found error */ declare function isNotFoundError(error: unknown): boolean; //#endregion //#region ../core/src/types/transcriptions.d.ts /** * Shared transcription types used by both @soniox/node and @soniox/client. */ /** * Key-value pair for general context information. */ type ContextGeneralEntry = { /** * The key describing the context type (e.g., "domain", "topic", "doctor"). */ key: string; /** * The value for the context key. */ value: string; }; /** * Custom translation term mapping. */ type ContextTranslationTerm = { /** * The source term to translate. */ source: string; /** * The target translation for the term. */ target: string; }; /** * Additional context to improve transcription and translation accuracy. * All sections are optional - include only what's relevant for your use case. */ type TranscriptionContext = { /** * Structured key-value pairs describing domain, topic, intent, participant names, etc. */ general?: ContextGeneralEntry[] | undefined; /** * Longer free-form background text, prior interaction history, reference documents, or meeting notes. */ text?: string | undefined; /** * Domain-specific or uncommon words to recognize. */ terms?: string[] | undefined; /** * Custom translations for ambiguous terms. */ translation_terms?: ContextTranslationTerm[] | undefined; }; /** * One-way translation configuration. * Translates all spoken languages into a single target language. */ type OneWayTranslationConfig = { /** * Translation type. */ type: 'one_way'; /** * Target language code for translation (e.g., "fr", "es", "de"). */ target_language: string; }; /** * Two-way translation configuration. * Translates between two specified languages. */ type TwoWayTranslationConfig = { /** * Translation type. */ type: 'two_way'; /** * First language code. */ language_a: string; /** * Second language code. */ language_b: string; }; /** * Translation configuration. */ type TranslationConfig = OneWayTranslationConfig | TwoWayTranslationConfig; /** * Fields that can be used to group tokens into segments */ type SegmentGroupKey = 'speaker' | 'language'; //#endregion //#region ../core/src/types/realtime.d.ts /** * Supported audio formats for real-time transcription. */ type AudioFormat = 'pcm_s8' | 'pcm_s8le' | 'pcm_s8be' | 'pcm_s16le' | 'pcm_s16be' | 'pcm_s24le' | 'pcm_s24be' | 'pcm_s32le' | 'pcm_s32be' | 'pcm_u8' | 'pcm_u8le' | 'pcm_u8be' | 'pcm_u16le' | 'pcm_u16be' | 'pcm_u24le' | 'pcm_u24be' | 'pcm_u32le' | 'pcm_u32be' | 'pcm_f32le' | 'pcm_f32be' | 'pcm_f64le' | 'pcm_f64be' | 'mulaw' | 'alaw' | 'aac' | 'aiff' | 'amr' | 'asf' | 'wav' | 'mp3' | 'flac' | 'ogg' | 'webm'; /** * Configuration sent to the Soniox WebSocket API when starting a session. */ type SttSessionConfig = { /** * Speech-to-text model to use. */ model: string; /** * Audio format. Use 'auto' for automatic detection of container formats. * For raw PCM formats, also set sample_rate and num_channels. * @default 'auto' */ audio_format?: 'auto' | AudioFormat | undefined; /** * Sample rate in Hz (required for PCM formats). */ sample_rate?: number | undefined; /** * Number of audio channels (required for raw audio formats). */ num_channels?: number | undefined; /** * Expected languages in the audio (ISO language codes). */ language_hints?: string[] | undefined; /** * When true, recognition is strongly biased toward language hints. * Best-effort only, not a hard guarantee. */ language_hints_strict?: boolean | undefined; /** * Enable speaker identification. */ enable_speaker_diarization?: boolean | undefined; /** * Enable automatic language detection. */ enable_language_identification?: boolean | undefined; /** * Enable endpoint detection for utterance boundaries. * Useful for voice AI agents. */ enable_endpoint_detection?: boolean | undefined; /** * Maximum delay between the end of speech and returned endpoint. * Allowed values for maximum delay are between 500ms and 3000ms. The default value is 2000ms */ max_endpoint_delay_ms?: number | undefined; /** * Controls how aggressively endpoints are detected. * * Adjusts how likely the model is to emit an endpoint. Higher values make * endpoints more likely, which can finalize segments sooner. Lower values * make endpoints less likely, which can help the system wait longer before * finalizing. * * Allowed values are between -1.0 and 1.0. The default value is 0.0. */ endpoint_sensitivity?: number | undefined; /** * Optional tracking identifier (max 256 chars). */ client_reference_id?: string | undefined; /** * Additional context to improve transcription accuracy. */ context?: TranscriptionContext | undefined; /** * Translation configuration. */ translation?: TranslationConfig | undefined; }; /** * SDK-level session options (not sent to the server). */ type SttSessionOptions = { /** * AbortSignal for cancellation. */ signal?: AbortSignal | undefined; /** * Interval for sending keepalive messages while paused (milliseconds). * @default 5000 */ keepalive_interval_ms?: number | undefined; /** * Maximum time to wait for the WebSocket connection to open (milliseconds). * If the connection is not established within this time, a * {@link ConnectionError} with message "Connection timed out" is thrown. * @default 20000 */ connect_timeout_ms?: number | undefined; }; /** * A single token from the real-time transcription. */ type RealtimeToken = { /** * The transcribed text. */ text: string; /** * Start time in milliseconds relative to audio start. */ start_ms?: number | undefined; /** * End time in milliseconds relative to audio start. */ end_ms?: number | undefined; /** * Confidence score (0.0 to 1.0). */ confidence: number; /** * Whether this is a finalized token. */ is_final: boolean; /** * Speaker identifier (if diarization enabled). */ speaker?: string | undefined; /** * Detected language code (if language identification enabled). */ language?: string | undefined; /** * Translation status of this token. */ translation_status?: 'none' | 'original' | 'translation' | undefined; /** * Source language for translated tokens. */ source_language?: string | undefined; }; /** * A segment of contiguous real-time tokens grouped by speaker/language. */ type RealtimeSegment = { /** * Concatenated text of all tokens in this segment. */ text: string; /** * Start time of the segment in milliseconds (from first token). */ start_ms?: number | undefined; /** * End time of the segment in milliseconds (from last token). */ end_ms?: number | undefined; /** * Speaker identifier (if diarization enabled). */ speaker?: string | undefined; /** * Detected language code (if language identification enabled). */ language?: string | undefined; /** * Original tokens in this segment. */ tokens: RealtimeToken[]; }; /** * Options for segmenting real-time tokens. */ type RealtimeSegmentOptions = { /** * Fields to group by. A new segment starts when any of these fields changes * @default ['speaker', 'language'] */ group_by?: SegmentGroupKey[] | undefined; /** * When true, only tokens marked as final are included. * @default false */ final_only?: boolean | undefined; }; /** * Options for rolling real-time segmentation buffers. */ type RealtimeSegmentBufferOptions = { /** * Fields to group by. A new segment starts when any of these fields changes * @default ['speaker', 'language'] */ group_by?: SegmentGroupKey[] | undefined; /** * When true, only tokens marked as final are buffered. * @default true */ final_only?: boolean | undefined; /** * Maximum number of tokens to keep in the buffer. * @default 2000 */ max_tokens?: number | undefined; /** * Maximum time window to keep in milliseconds (requires token timings). */ max_ms?: number | undefined; }; /** * A single utterance built from real-time segments. */ type RealtimeUtterance = { /** * Concatenated text of all segments in this utterance. */ text: string; /** * Segments included in this utterance. */ segments: RealtimeSegment[]; /** * Tokens included in this utterance. */ tokens: RealtimeToken[]; /** * Start time of the utterance in milliseconds (from first segment). */ start_ms?: number | undefined; /** * End time of the utterance in milliseconds (from last segment). */ end_ms?: number | undefined; /** * Speaker identifier when consistent across segments. */ speaker?: string | undefined; /** * Detected language code when consistent across segments. */ language?: string | undefined; /** * Milliseconds of audio that have been finalized at flush time. */ final_audio_proc_ms?: number | undefined; /** * Total milliseconds of audio processed at flush time. */ total_audio_proc_ms?: number | undefined; }; /** * Options for buffering real-time utterances. */ type RealtimeUtteranceBufferOptions = { /** * Fields to group by. A new segment starts when any of these fields changes * @default ['speaker', 'language'] */ group_by?: SegmentGroupKey[] | undefined; /** * When true, only tokens marked as final are buffered. * @default true */ final_only?: boolean | undefined; /** * Maximum number of tokens to keep in the buffer. * @default 2000 */ max_tokens?: number | undefined; /** * Maximum time window to keep in milliseconds (requires token timings). */ max_ms?: number | undefined; }; /** * A result message from the real-time WebSocket. */ type RealtimeResult = { /** * Tokens in this result. */ tokens: RealtimeToken[]; /** * Milliseconds of audio that have been finalized. */ final_audio_proc_ms: number; /** * Total milliseconds of audio processed. */ total_audio_proc_ms: number; /** * Whether this is the final result (session ending). */ finished?: boolean | undefined; }; /** * Typed event for async iterator consumption. */ type RealtimeEvent = { kind: 'result'; data: RealtimeResult; } | { kind: 'endpoint'; } | { kind: 'finalized'; } | { kind: 'finished'; }; /** * Session lifecycle states. */ type SttSessionState = 'idle' | 'connecting' | 'connected' | 'finishing' | 'finished' | 'canceled' | 'closed' | 'error'; /** * Reason for a state transition. * * Provided as an optional field on `state_change` events so consumers * can distinguish user-initiated actions from automatic reconnections, * connection failures, etc. */ type StateChangeReason = 'user_action' | 'connected' | 'connection_lost' | 'reconnecting' | 'reconnected' | 'error' | 'finished'; /** * Event handlers for the STT session. */ type SttSessionEvents = { /** * Parsed result received. */ result: (result: RealtimeResult) => void; /** * Individual token received. */ token: (token: RealtimeToken) => void; /** * Error occurred. */ error: (error: Error) => void; /** * Endpoint detected ( token). */ endpoint: () => void; /** * Finalization complete ( token). */ finalized: () => void; /** * Session finished (server signaled end of stream). */ finished: () => void; /** * Session connected and ready. */ connected: () => void; /** * Session disconnected. */ disconnected: (reason?: string) => void; /** * Session state transition. */ state_change: (update: { old_state: SttSessionState; new_state: SttSessionState; reason?: StateChangeReason; }) => void; }; /** * Audio data types accepted by sendAudio. */ type AudioData = Uint8Array | ArrayBuffer; /** * Options for streaming audio from an async iterable source. */ type SendStreamOptions = { /** * Delay in milliseconds between sending chunks. * Useful for simulating real-time pace when streaming pre-recorded files. * Not needed for live audio sources. */ pace_ms?: number | undefined; /** * When true, calls finish() automatically after the stream ends. * @default false */ finish?: boolean | undefined; }; //#endregion //#region ../core/src/realtime/errors.d.ts /** * Base error class for all real-time (WebSocket) SDK errors */ declare class RealtimeError extends SonioxError { /** Real-time error code */ readonly code: RealtimeErrorCode; /** * Original response payload for debugging. * Contains the raw WebSocket message that caused the error. */ readonly raw: unknown; constructor(message: string, code?: RealtimeErrorCode, statusCode?: number, raw?: unknown); /** * Creates a human-readable string representation */ toString(): string; /** * Converts to a plain object for logging/serialization */ toJSON(): Record; } /** * Authentication error (401). * Thrown when the API key is invalid or expired. */ declare class AuthError extends RealtimeError { constructor(message: string, statusCode?: number, raw?: unknown); } /** * Bad request error (400). * Thrown for invalid configuration or parameters. */ declare class BadRequestError extends RealtimeError { constructor(message: string, statusCode?: number, raw?: unknown); } /** * Quota error (402, 429). * Thrown when rate limits are exceeded or quota is exhausted. */ declare class QuotaError extends RealtimeError { constructor(message: string, statusCode?: number, raw?: unknown); } /** * Connection error. * Thrown for WebSocket connection failures and transport errors. */ declare class ConnectionError extends RealtimeError { constructor(message: string, raw?: unknown); } /** * Network error. * Thrown for server-side network issues (408, 500, 503). */ declare class NetworkError extends RealtimeError { constructor(message: string, statusCode?: number, raw?: unknown); } /** * Abort error. * Thrown when an operation is cancelled via AbortSignal. */ declare class AbortError extends RealtimeError { constructor(message?: string); } /** * State error. * Thrown when an operation is attempted in an invalid state. */ declare class StateError extends RealtimeError { constructor(message: string); } /** * Whether an error is safe to retry via automatic reconnection. * * Retriable: {@link ConnectionError}, {@link NetworkError} (transient transport/server issues). * Non-retriable: {@link AuthError}, {@link BadRequestError}, {@link QuotaError}, * {@link AbortError}, {@link StateError} (permanent or user-initiated). */ declare function isRetriableError(error: unknown): boolean; //#endregion //#region ../core/src/types/tts.d.ts /** * Supported audio formats for Text-to-Speech output. */ type TtsAudioFormat = 'pcm_f32le' | 'pcm_s16le' | 'pcm_mulaw' | 'pcm_alaw' | 'wav' | 'aac' | 'mp3' | 'opus' | 'flac' | (string & {}); /** * Input for creating a TTS stream. All fields are optional and are merged * with `tts_defaults` from the resolved connection config. After merging, * `model`, `language`, `voice`, and `audio_format` must be present. */ type TtsStreamInput = { /** * Text-to-Speech model to use. * @example 'tts-rt-v1' */ model?: string | undefined; /** * Language code for speech generation. * @example 'en' */ language?: string | undefined; /** * Voice identifier. * @example 'Adrian' */ voice?: string | undefined; /** * Output audio format * @example 'wav' */ audio_format?: TtsAudioFormat | undefined; /** * Output sample rate in Hz. Required for raw PCM formats. */ sample_rate?: number | undefined; /** * Codec bitrate in bps (for compressed formats). */ bitrate?: number | undefined; /** * Client-generated stream identifier. Must be unique among active streams * on the same connection. Auto-generated if omitted. */ stream_id?: string | undefined; }; /** * Fully resolved TTS stream config sent over the WebSocket. * All required fields are present after merging input with defaults. */ type TtsStreamConfig = { model: string; language: string; voice: string; audio_format: string; sample_rate?: number | undefined; bitrate?: number | undefined; stream_id: string; }; /** * Events emitted by a TTS WebSocket connection. */ type TtsConnectionEvents = { /** * A connection-level error occurred. Always a {@link RealtimeError} * subclass (e.g. {@link ConnectionError}, {@link NetworkError}, * {@link AuthError}). */ error: (error: RealtimeError) => void; /** The WebSocket connection was closed. */ close: () => void; }; /** * Options for creating a TTS connection. */ type TtsConnectionOptions = { /** * Interval for sending keepalive messages (milliseconds). * @default 5000 * @minimum 1000 */ keepalive_interval_ms?: number | undefined; /** * Maximum time to wait for the WebSocket connection to open (milliseconds). * @default 20000 */ connect_timeout_ms?: number | undefined; }; /** * Events emitted by a TTS stream. */ type TtsStreamEvents = { /** Decoded audio chunk received. */ audio: (chunk: Uint8Array) => void; /** Server marked the final audio payload for this stream. */ audioEnd: () => void; /** Stream has been fully terminated by the server. */ terminated: () => void; /** * A stream-level error occurred. Always a {@link RealtimeError} * subclass mapped from the server `error_code` / `error_message`. */ error: (error: RealtimeError) => void; }; /** * Lifecycle states for a TTS stream. */ type TtsStreamState = 'active' | 'finishing' | 'ended' | 'error'; /** * Raw JSON event received from the TTS WebSocket server. */ type TtsEvent = { stream_id?: string | undefined; audio?: string | undefined; audio_end?: boolean | undefined; terminated?: boolean | undefined; error_code?: number | undefined; error_message?: string | undefined; }; /** * Options for REST TTS generation (`generate` / `generateStream`). */ type GenerateSpeechOptions = { /** Input text to generate as speech. */ text: string; /** Text-to-Speech model to use. @default 'tts-rt-v1' */ model?: string | undefined; /** Language code. @default 'en' */ language?: string | undefined; /** Voice identifier. */ voice: string; /** * Output audio format * @default 'wav' */ audio_format?: string | undefined; /** Output sample rate in Hz. Required for raw PCM formats. */ sample_rate?: number | undefined; /** Codec bitrate in bps (for compressed formats). */ bitrate?: number | undefined; /** Optional AbortSignal for cancellation. */ signal?: AbortSignal | undefined; }; /** * A language supported by a Text-to-Speech model. */ type TtsLanguage = { /** ISO language code. */ code: string; /** Human-readable language name. */ name: string; }; /** * Voice gender metadata returned by the TTS models API. */ type TtsVoiceGender = 'male' | 'female' | 'neutral'; /** * A Text-to-Speech voice. */ type TtsVoice = { /** Unique identifier of the voice. */ id: string; /** Human-readable voice description. */ description: string; /** Voice gender metadata. */ gender: TtsVoiceGender; }; /** * A Text-to-Speech model. */ type TtsModel = { /** Unique identifier of the model. */ id: string; /** If this is an alias, the id of the aliased model. */ aliased_model_id?: string | null; /** Name of the model. */ name: string; /** Languages supported by this model. */ languages: TtsLanguage[]; /** Voices supported by this model. */ voices: TtsVoice[]; }; //#endregion //#region ../core/src/connection.d.ts /** * Context passed to the config resolver function by the SDK. * * `usage` indicates what the resolved config will be used for, so the * server can generate a temporary API key with the correct scope. * `params` is a freeform bag for any custom data the developer wants * to forward to their backend. */ type ConfigContext = { /** What the config will be used for. Set by the SDK internally. */ usage?: 'transcribe_websocket' | 'tts_rt' | undefined; /** Freeform data the developer can forward to their backend. */ params?: Record | undefined; }; /** * Soniox deployment region. * * Defined regions: * - `'eu'` — European Union (`*.eu.soniox.com`) * - `'jp'` — Japan (`*.jp.soniox.com`) * - `undefined` — Default (United States). The US region has no subdomain. * * A region name (other than `'us'`) is shorthand for setting `base_domain` * to `{region}.soniox.com`. The string `'us'` is accepted and normalized to * the default (United States) base domain; there is no `us.soniox.com` host. * * The type stays open (`string & {}`) for forward compatibility with regions * added after this SDK version was published, but passing an unknown region * simply prepends it as a subdomain and may not resolve. * * @see https://soniox.com/docs/stt/data-residency */ type SonioxRegion = 'eu' | 'jp' | (string & {}); /** * Connection configuration for Soniox APIs. * * Can be provided as a plain object (sync) or returned from an async function * to support fetching configuration from a server at runtime. */ type SonioxConnectionConfig = { /** API key for authentication. */ api_key: string; /** * Deployment region. Determines which regional endpoints are used. * Leave `undefined` for the default (US) region. * * Shorthand for `base_domain: '{region}.soniox.com'`. * `base_domain` takes precedence when both are provided. * * @see https://soniox.com/docs/stt/data-residency */ region?: SonioxRegion | undefined; /** * Base domain for all Soniox service URLs. * * A single override that derives all four service endpoints: * - `api_domain` → `https://api.{base_domain}` * - `stt_ws_url` → `wss://stt-rt.{base_domain}/transcribe-websocket` * - `tts_api_url` → `https://tts-rt.{base_domain}` * - `tts_ws_url` → `wss://tts-rt.{base_domain}/tts-websocket` * * Takes precedence over `region`. Individual URL fields (`api_domain`, * `stt_ws_url`, etc.) still take final precedence over this value. * * @example 'eu.soniox.com' */ base_domain?: string | undefined; /** * REST API domain override (e.g. `'https://api.eu.soniox.com'`). * When set, takes precedence over the region-derived domain. */ api_domain?: string | undefined; /** * STT WebSocket URL override (e.g. `'wss://stt-rt.eu.soniox.com/transcribe-websocket'`). * When set, takes precedence over the region-derived URL. */ stt_ws_url?: string | undefined; /** * TTS REST API URL override (e.g. `'https://tts-rt.eu.soniox.com'`). * When set, takes precedence over the region-derived URL. */ tts_api_url?: string | undefined; /** * TTS WebSocket URL override (e.g. `'wss://tts-rt.eu.soniox.com/tts-websocket'`). * When set, takes precedence over the region-derived URL. */ tts_ws_url?: string | undefined; /** * Server-provided STT session defaults (model, language hints, context, etc.). * * Available to the `session_config` function passed to `client.realtime.record()`, * allowing server-driven defaults. Not applied automatically — the caller must * explicitly spread them. */ stt_defaults?: Partial | undefined; /** * Server-provided TTS stream defaults (model, voice, language, audio_format, etc.). * * Automatically merged as the base layer when opening TTS streams. * Caller-provided fields override these defaults. */ tts_defaults?: Partial | undefined; /** * @deprecated Use `stt_defaults` instead. Kept as an alias for backward * compatibility; the resolver treats it as equivalent to `stt_defaults` * when that field is absent. Planned for removal in the next major version. */ session_defaults?: Partial | undefined; }; /** * Fully resolved connection configuration with all URLs determined. */ type ResolvedConnectionConfig = { api_key: string; api_domain: string; stt_ws_url: string; tts_api_url: string; tts_ws_url: string; /** Server-provided STT session defaults (empty object when not provided). */ stt_defaults: Partial; /** Server-provided TTS stream defaults (empty object when not provided). */ tts_defaults: Partial; /** * @deprecated Use `stt_defaults` instead. Kept in the resolver output as * an alias for backward compatibility; planned for removal in the next * major version. */ session_defaults: Partial; }; /** * Resolve a {@link SonioxConnectionConfig} into fully qualified URLs. * * Resolution priority (highest → lowest) for each URL: * 1. Explicit field (`api_domain`, `stt_ws_url`, `tts_api_url`, `tts_ws_url`) * 2. Derived from `base_domain` * 3. Derived from `region` → `{region}.soniox.com` * 4. Default US base domain (`soniox.com`) */ declare function resolveConnectionConfig(config: SonioxConnectionConfig): ResolvedConnectionConfig; //#endregion //#region ../core/src/realtime/stt.d.ts /** * Real-time Speech-to-Text session * * Provides WebSocket-based streaming transcription with support for: * - Event-based and async iterator consumption * - Pause/resume with automatic keepalive while paused * - AbortSignal cancellation * * @example * ```typescript * const session = new RealtimeSttSession(apiKey, wsUrl, { model: 'stt-rt-v5' }); * * session.on('result', (result) => { * console.log(result.tokens.map(t => t.text).join('')); * }); * * await session.connect(); * session.sendAudio(audioChunk); * await session.finish(); * ``` */ declare class RealtimeSttSession implements AsyncIterable { private readonly emitter; private readonly eventQueue; private iteratorAttached; private readonly apiKey; private readonly wsBaseUrl; private readonly config; private readonly keepaliveIntervalMs; private readonly connectTimeoutMs; private readonly signal; private ws; private _state; private _paused; private _pauseWarned; private keepaliveInterval; private finishResolver; private finishRejecter; private abortHandler; constructor(apiKey: string, wsBaseUrl: string, config: SttSessionConfig, options?: SttSessionOptions); /** * Current session state. */ get state(): SttSessionState; /** * Whether the session is currently paused. */ get paused(): boolean; /** * Connect to the Soniox WebSocket API. * * @throws {@link AbortError} If aborted * @throws {@link ConnectionError} If connection fails * @throws {@link StateError} If already connected */ connect(): Promise; /** * Send audio data to the server * * @param data - Audio data as Uint8Array or ArrayBuffer * @throws {@link AbortError} If aborted * @throws {@link StateError} If not connected */ sendAudio(data: AudioData): void; /** * Stream audio data from an async iterable source. * * @param stream - Async iterable yielding audio chunks * @param options - Optional pacing and auto-finish settings * @throws {@link AbortError} If aborted during streaming * @throws {@link StateError} If not connected */ sendStream(stream: AsyncIterable, options?: SendStreamOptions): Promise; /** * Pause audio transmission and starts automatic keepalive messages */ pause(): void; /** * Resume audio transmission */ resume(): void; /** * Requests the server to finalize current transcription */ finalize(options?: { trailing_silence_ms?: number; }): void; /** * Send a keepalive message */ keepAlive(): void; /** * Gracefully finish the session */ finish(): Promise; /** * Close (cancel) the session immediately without waiting */ close(): void; /** * Register an event handler */ on(event: E, handler: SttSessionEvents[E]): this; /** * Register a one-time event handler */ once(event: E, handler: SttSessionEvents[E]): this; /** * Remove an event handler */ off(event: E, handler: SttSessionEvents[E]): this; /** * Async iterator for consuming events. * * The returned iterator's `return()` resets the internal iterator-attach * flag and drops any buffered events, so consumers that exit `for await` * early (via `break` etc.) stop accruing memory while the session keeps * running. */ [Symbol.asyncIterator](): AsyncIterator; /** * @internal Debug-only: forcefully close the underlying WebSocket to * simulate an unexpected network disconnection. */ __debugForceDisconnect(): void; /** * Push an event to the async iterator queue only when a consumer has * attached via `[Symbol.asyncIterator]()`. Listener-only consumers * (the documented `.on()` pattern) never drain the queue, so pushing * unconditionally would leak buffered events on long-running sessions. */ private enqueueIfIterating; private createWebSocket; private handleMessage; private handleClose; private handleError; private handleAbort; private setState; private cleanup; private isTerminalState; private checkAborted; private settleFinish; private sendMessage; private startKeepalive; private stopKeepalive; private updateKeepalive; } //#endregion //#region ../core/src/realtime/emitter.d.ts /** * A minimal, runtime-agnostic typed event emitter. * Does not depend on Node.js EventEmitter. */ declare class TypedEmitter void>> { private listeners; private readonly errorEvent; /** * Register an event handler. */ on(event: E, handler: Events[E]): this; /** * Register a one-time event handler. */ once(event: E, handler: Events[E]): this; /** * Remove an event handler. */ off(event: E, handler: Events[E]): this; /** * Emit an event to all registered handlers. * Handler errors do not prevent other handlers from running. * Errors are reported to an `error` event if present, otherwise rethrown async. */ emit(event: E, ...args: Parameters): void; /** * Remove all event handlers. */ removeAllListeners(event?: keyof Events): void; private reportListenerError; private normalizeError; private scheduleThrow; } //#endregion //#region ../core/src/realtime/tts.d.ts /** * Handle for one TTS stream on a WebSocket connection. * * Emits typed events and supports async iteration over decoded audio chunks. * * @example Event-based * ```typescript * stream.on('audio', (chunk) => process(chunk)); * stream.on('terminated', () => console.log('done')); * stream.sendText("Hello world"); * stream.finish(); * ``` * * @example Async iteration * ```typescript * stream.sendText("Hello world"); * stream.finish(); * for await (const chunk of stream) { * process(chunk); * } * ``` */ declare class RealtimeTtsStream extends TypedEmitter implements AsyncIterable { readonly streamId: string; private _state; private readonly audioQueue; private iteratorAttached; private readonly connection; private readonly ownsConnection; /** @internal */ constructor(streamId: string, connection: RealtimeTtsConnection, ownsConnection: boolean); /** Current stream lifecycle state. */ get state(): TtsStreamState; /** * Send one text chunk to the TTS stream. * * @param text - Text to synthesize * @param options.end - If true, signals this is the final text chunk */ sendText(text: string, options?: { end?: boolean; }): void; /** * Pipe an async iterable of text chunks into the stream. * Automatically calls {@link finish} when the iterable completes. * * Designed for concurrent use: call `sendStream()` and consume audio * via `for await` or events simultaneously. * * @example LLM token piping * ```typescript * stream.sendStream(llmTokenStream); * for await (const audio of stream) { forward(audio); } * ``` */ sendStream(source: AsyncIterable): Promise; /** * Signal that no more text will be sent for this stream. * The server will finish generating audio and send `terminated`. */ finish(): void; /** * Cancel this stream. The server will stop generating and send `terminated`. */ cancel(): void; /** * Close this stream. For single-stream usage (created via `tts(input)`), * also closes the underlying WebSocket connection. */ close(): void; /** * Async iterator that yields decoded audio chunks. * * The returned iterator's `return()` resets the internal iterator-attach * flag and drops any buffered audio, so consumers that exit `for await` * early (via `break` etc.) stop accruing memory while the stream keeps * receiving server audio. */ [Symbol.asyncIterator](): AsyncIterator; /** * Push an audio chunk to the async iterator queue only when a consumer * has attached via `[Symbol.asyncIterator]()`. Listener-only consumers * (the documented `.on('audio', ...)` pattern) never drain the queue, * so pushing unconditionally would leak buffered chunks. */ private enqueueIfIterating; /** @internal Dispatch a server event to this stream. */ _handleEvent(event: TtsEvent): void; /** @internal Force-end this stream (connection closing). */ _forceEnd(): void; private _endStream; } /** * WebSocket connection for real-time Text-to-Speech. * * Supports up to 5 concurrent streams multiplexed by `stream_id`. * The connection automatically sends keepalive messages while open. * * @example Multi-stream * ```typescript * const conn = new RealtimeTtsConnection(apiKey, wsUrl, ttsDefaults); * await conn.connect(); * * const s1 = conn.stream({ model, voice, language, audio_format }); * s1.sendText("Hello"); * s1.finish(); * for await (const chunk of s1) { ... } * * conn.close(); * ``` */ declare class RealtimeTtsConnection extends TypedEmitter { private readonly apiKey; private readonly wsUrl; private readonly ttsDefaults; private readonly keepaliveIntervalMs; private readonly connectTimeoutMs; private ws; private connected; private connecting; private keepaliveTimer; private readonly activeStreams; constructor(apiKey: string, wsUrl: string, ttsDefaults?: Partial, options?: TtsConnectionOptions); /** Whether the WebSocket is connected. */ get isConnected(): boolean; /** * Open the WebSocket connection and start keepalive. * Called automatically by {@link stream} if not yet connected. */ connect(): Promise; /** * Open a new TTS stream on this connection. * Auto-connects if the WebSocket is not yet open. * * @param input - Stream configuration (merged with tts_defaults) * @returns A ready-to-use stream handle */ stream(input?: TtsStreamInput): Promise; /** @internal Open a stream, optionally marking it as connection-owning. */ _openStream(input: TtsStreamInput, ownsConnection: boolean): Promise; /** * Close the WebSocket connection and terminate all active streams. */ close(): void; /** @internal Send a JSON payload on the WebSocket. */ _sendJson(payload: Record): void; /** @internal Remove a stream from the active set. */ _deactivateStream(streamId: string): void; private createWebSocket; private handleMessage; private startKeepalive; private stopKeepalive; } //#endregion //#region ../core/src/realtime/segments.d.ts /** * Groups real-time tokens into segments based on specified grouping keys. * * A new segment starts when any of the `group_by` fields changes. * Tokens are concatenated as-is. * * @param tokens - Array of real-time tokens to segment * @param options - Segmentation options * @param options.group_by - Fields to group by (default: ['speaker', 'language']) * @param options.final_only - When true, only finalized tokens are included * @returns Array of segments with combined text and timing (if available) */ declare function segmentRealtimeTokens(tokens: RealtimeToken[], options?: RealtimeSegmentOptions): RealtimeSegment[]; //#endregion //#region ../core/src/realtime/segment-buffer.d.ts /** * Rolling buffer for turning real-time results into stable segments. */ declare class RealtimeSegmentBuffer { private tokens; private readonly groupBy; private readonly finalOnly; private readonly maxTokens; private readonly maxMs; constructor(options?: RealtimeSegmentBufferOptions); /** * Number of tokens currently buffered. */ get size(): number; /** * Add a real-time result and return stable segments. */ add(result: RealtimeResult): RealtimeSegment[]; /** * Clear all buffered tokens. */ reset(): void; /** * Flush all buffered tokens into segments and clear the buffer. * * Includes tokens that are not yet stable by final_audio_proc_ms. */ flushAll(): RealtimeSegment[]; private flushStable; private trim; } //#endregion //#region ../core/src/realtime/utterance-buffer.d.ts /** * Collects real-time results into utterances for endpoint-driven workflows. */ declare class RealtimeUtteranceBuffer { private readonly segmentBuffer; private pendingSegments; private lastFinalAudioProcMs; private lastTotalAudioProcMs; constructor(options?: RealtimeUtteranceBufferOptions); /** * Add a real-time result and collect stable segments. */ addResult(result: RealtimeResult): RealtimeSegment[]; /** * Mark an endpoint and flush the current utterance. */ markEndpoint(): RealtimeUtterance | undefined; /** * Clear buffered segments and tokens. */ reset(): void; } //#endregion //#region src/auth.d.ts /** * API key configuration and resolution for client-side usage * Every recording session fetches a fresh key */ /** * API key configuration. * * - `string` - A pre-fetched temporary API key (e.g., injected from SSR) * - `() => Promise` - An async function that fetches a fresh temporary key * from your backend. Called once per recording session. * * @deprecated Use {@link SonioxConnectionConfig} with `SonioxClientOptions.config` instead. * * @example * ```typescript * // Static key (for demos or SSR-injected keys) * const client = new SonioxClient({ api_key: 'temp:...' }); * * // Async function (recommended for production) * const client = new SonioxClient({ * api_key: async () => { * const res = await fetch('/api/get-temporary-key', { method: 'POST' }); * const { api_key } = await res.json(); * return api_key; * }, * }); * ``` * * Note: If you use Node.js, you can use the `SonioxNodeClient` to fetch a temporary API key via `client.auth.createTemporaryKey()`. */ type ApiKeyConfig = string | (() => Promise); /** * Resolves an ApiKeyConfig to a plain API key string. * @param config - The API key configuration * @returns The resolved API key string * @throws If the function rejects or returns a non-string value * @deprecated Use {@link SonioxConnectionConfig} with `SonioxClientOptions.config` instead. */ declare function resolveApiKey(config: ApiKeyConfig): Promise; //#endregion //#region src/permissions/types.d.ts /** * Platform-agnostic permission resolver interface */ /** * Unified permission status across all platforms. */ type PermissionStatus = 'granted' | 'denied' | 'prompt' | 'unavailable'; /** * Result of a permission check or request. */ type PermissionResult = { /** * Current permission status. */ status: PermissionStatus; /** * Whether the user can be prompted again. * `false` means permanently denied (e.g., browser "Block" or iOS settings). * Useful for showing "go to settings" instructions. */ can_request: boolean; }; /** * Permission types supported by the resolver. */ type PermissionType = 'microphone'; /** * Platform-agnostic permission resolver. * * Implementations handle platform-specific permission APIs: * - Browser: `navigator.permissions.query` + `getUserMedia` * - React Native: `expo-av` or `react-native-permissions` * * @example * ```typescript * // Check before recording * const mic = await resolver.check('microphone'); * if (mic.status === 'denied' && !mic.can_request) { * showGoToSettingsMessage(); * } * ``` */ interface PermissionResolver { /** * Check current permission status WITHOUT prompting the user. */ check(permission: PermissionType): Promise; /** * Request permission from the user (may show a system prompt). * On platforms where status is already 'granted', this is a no-op. */ request(permission: PermissionType): Promise; } //#endregion //#region src/audio/types.d.ts /** * Platform-agnostic audio source interface. * * Implementations handle platform-specific audio capture (browser mic, AudioWorklet, React Native, etc.). * Callbacks are passed into `start()` to guarantee they are attached before any data flows. */ /** * Callbacks for receiving audio data and errors from an AudioSource. */ type AudioSourceHandlers = { /** * Called when an audio chunk is available. * @param chunk - Raw audio data as ArrayBuffer */ onData: (chunk: ArrayBuffer) => void; /** * Called when a runtime error occurs during audio capture (after start). * @param error - The error that occurred */ onError: (error: Error) => void; /** * Called when the audio source is muted externally (e.g. OS-level or hardware mute). */ onMuted?: () => void; /** * Called when the audio source is unmuted after an external mute. */ onUnmuted?: () => void; }; /** * Platform-agnostic audio source interface. * * Implementations must: * - Begin capturing audio in `start()` and deliver chunks via `handlers.onData` * - Stop all capture and release resources in `stop()` * - Throw typed errors from `start()` if capture cannot begin (e.g., permission denied) * * @example * ```typescript * // Built-in browser source * const source = new MicrophoneSource(); * * // Custom source (e.g., React Native) * class MyAudioSource implements AudioSource { * async start(handlers: AudioSourceHandlers) { ... } * stop() { ... } * } * ``` */ interface AudioSource { /** * Start capturing audio. * * @param handlers - Callbacks for audio data and errors * @throws AudioPermissionError if microphone access is denied * @throws AudioDeviceError if no audio device is found * @throws AudioUnavailableError if audio capture is not supported */ start(handlers: AudioSourceHandlers): Promise; /** * Stop capturing audio and release all resources. * Safe to call multiple times. */ stop(): void; /** * Pause audio capture (optional). * When paused, no data should be delivered via onData. */ pause?(): void; /** * Resume audio capture after pause (optional). */ resume?(): void; /** * Reinitialize the audio encoder without releasing the underlying * capture device (optional). * * Called during reconnection so the new server session receives a * fresh audio stream with proper container headers. Implementations * that produce a header-less format (e.g. raw PCM) can omit this. */ restart?(): void; } //#endregion //#region src/recording.d.ts /** * Unified recording lifecycle states. */ type RecordingState = 'idle' | 'starting' | 'connecting' | 'recording' | 'paused' | 'reconnecting' | 'stopping' | 'stopped' | 'error' | 'canceled'; /** * Reconnection configuration for automatic WebSocket recovery. */ type ReconnectOptions = { /** * Enable automatic reconnection on retriable errors. * @default false */ auto_reconnect?: boolean | undefined; /** * Maximum number of consecutive reconnection attempts before giving up. * @default 3 */ max_reconnect_attempts?: number | undefined; /** * Base delay in milliseconds for exponential backoff (1x, 2x, 4x, ...). * @default 1000 */ reconnect_base_delay_ms?: number | undefined; /** * When true, clear accumulated transcript state (finalText, segments, * utterances) on reconnect. Window-tracking state is always reset. * @default false */ reset_transcript_on_reconnect?: boolean | undefined; }; /** * Payload for the `reconnecting` event. */ type ReconnectingEvent = { /** Current attempt number (1-based). */ attempt: number; /** Maximum attempts configured. */ max_attempts: number; /** Backoff delay before reconnect (ms). */ delay_ms: number; /** Call to cancel this reconnection attempt. */ preventDefault: () => void; }; /** * Events emitted by a Recording instance */ type RecordingEvents = { /** Parsed result received from the server. */ result: (result: RealtimeResult) => void; /** Individual token received. */ token: (token: RealtimeToken) => void; /** Error occurred during recording. */ error: (error: Error) => void; /** Endpoint detected (speaker finished talking). */ endpoint: () => void; /** Finalization complete. */ finalized: () => void; /** Recording finished (server acknowledged end of stream). */ finished: () => void; /** WebSocket connected and ready. */ connected: () => void; /** Recording state transition. */ state_change: (update: { old_state: RecordingState; new_state: RecordingState; reason?: StateChangeReason; }) => void; /** About to attempt a reconnection. Call `preventDefault()` to cancel. */ reconnecting: (event: ReconnectingEvent) => void; /** Successfully reconnected after a drop. */ reconnected: (event: { attempt: number; }) => void; /** * New STT session started (initial or after reconnect). * Consumers should reset any session-local tracking state (e.g. token * window comparisons). The `reset_transcript` flag indicates whether * accumulated transcript state should also be cleared. */ session_restart: (event: { reset_transcript: boolean; }) => void; /** Audio source was muted externally (e.g. OS-level or hardware mute). */ source_muted: () => void; /** Audio source was unmuted after an external mute. */ source_unmuted: () => void; }; /** * Options for creating a recording */ type RecordOptions = SttSessionConfig & ReconnectOptions & { /** * Audio source to use. Defaults to MicrophoneSource if not provided. */ source?: AudioSource | undefined; /** * AbortSignal for cancellation */ signal?: AbortSignal | undefined; /** * Maximum number of audio chunks to buffer while waiting for key/connection * @default 1000 */ buffer_queue_size?: number | undefined; /** * SDK-level session options (signal, etc.) */ session_options?: SttSessionOptions | undefined; /** * Function that receives the resolved connection config (including * `stt_defaults` from the server) and returns the final session config. * * When provided, its return value is used as the session config, * and any flat session config fields on this object are ignored. * * @example * ```typescript * client.realtime.record({ * session_config: (resolved) => ({ * ...resolved.stt_defaults, * enable_endpoint_detection: true, * }), * }); * ``` */ session_config?: ((resolved: ResolvedConnectionConfig) => SttSessionConfig) | undefined; }; /** * High-level recording orchestrator * * Manages the lifecycle of audio capture and real-time transcription: * 1. Starts audio source immediately (buffers chunks) * 2. Resolves connection config (API key + URLs, sync or async) * 3. Connects to the Soniox WebSocket API * 4. Drains buffered audio, then pipes live audio to the session * * @example * ```typescript * const recording = client.realtime.record({ model: 'stt-rt-v5' }); * recording.on('result', (r) => console.log(r.tokens)); * recording.on('error', (e) => console.error(e)); * * // Later: * await recording.stop(); * ``` */ /** @internal */ type SttConfigInput = SttSessionConfig | ((resolved: ResolvedConnectionConfig) => SttSessionConfig); declare class Recording { private readonly emitter; private readonly configResolver; private readonly sttConfigInput; private readonly sessionOptions; private readonly source; private readonly maxBufferSize; private readonly signal; private readonly autoReconnect; private readonly maxReconnectAttempts; private readonly reconnectBaseDelay; private readonly resetTranscriptOnReconnect; private session; private audioBuffer; private _state; private isBuffering; private _isSourceMuted; private _reconnectAttempt; private stopResolver; private stopRejecter; /** @internal */ constructor(configResolver: (context?: ConfigContext) => Promise, sttConfigInput: SttConfigInput, source: AudioSource, options?: { buffer_queue_size?: number; session_options?: SttSessionOptions; signal?: AbortSignal; auto_reconnect?: boolean; max_reconnect_attempts?: number; reconnect_base_delay_ms?: number; reset_transcript_on_reconnect?: boolean; }); /** * Current recording state */ get state(): RecordingState; /** * Register an event handler */ on(event: E, handler: RecordingEvents[E]): this; /** * Register a one-time event handler */ once(event: E, handler: RecordingEvents[E]): this; /** * Remove an event handler */ off(event: E, handler: RecordingEvents[E]): this; /** * Gracefully stop recording * * Stops the audio source and waits for the server to process all * buffered audio and return final results. * * @returns Promise that resolves when the server acknowledges completion */ stop(): Promise; /** * Immediately cancel recording without waiting for final results */ cancel(): void; /** * Request the server to finalize current non-final tokens. */ finalize(options?: { trailing_silence_ms?: number; }): void; /** * Pause recording. * * Pauses the audio source (stops microphone capture) and pauses the * session (activates automatic keepalive to prevent server disconnect). */ pause(): void; /** * Resume recording after pause. * * Resumes the audio source and session. Audio capture and transmission * continue from where they left off. If audio was buffered during a * reconnect while paused, the buffer is drained now. */ resume(): void; /** * @internal Debug-only: simulate an unexpected network disconnection. * Tears down the current session and feeds a retriable error into the * error handler so the reconnection logic kicks in exactly as it would * during a real connection drop. */ __debugForceDisconnect(): void; /** * Force a reconnection — tears down the current session and audio * encoder, then establishes a new session via the standard reconnect * flow (backoff, config re-resolution, buffer drain). * * Use this to recover from stale connections after platform lifecycle * events such as laptop sleep/wake (web `visibilitychange`) or app * backgrounding (React Native `AppState`). * * Requires `auto_reconnect` to be enabled. No-op when the recording * is not in `recording` or `paused` state. */ reconnect(): void; private run; private handleAudioData; private handleSourceMuted; private handleSourceUnmuted; private isMuteTrackingState; private wireSessionEvents; private handleAbort; private handleError; private shouldReconnect; private attemptReconnect; /** * Check whether an in-flight reconnect should be aborted. * Handles both terminal states and a pending stop() request. */ private shouldAbortReconnect; private cleanup; private setState; private isTerminalState; private settleStop; } //#endregion //#region src/client.d.ts /** * Options for creating a SonioxClient instance. */ type SonioxClientOptions = { /** * Connection configuration — sync object or async function. * * When provided as a function, it is called once per recording session, * allowing you to fetch a fresh temporary API key and connection settings * from your backend at runtime. * * @example * ```typescript * // Sync config with region * const client = new SonioxClient({ * config: { api_key: tempKey, region: 'eu' }, * }); * * // Async config (recommended for production) * const client = new SonioxClient({ * config: async () => { * const res = await fetch('/api/soniox-config', { method: 'POST' }); * return await res.json(); // { api_key, region, ... } * }, * }); * ``` */ config?: SonioxConnectionConfig | ((context?: ConfigContext) => Promise) | undefined; /** * API key configuration. * * - `string` - A pre-fetched temporary API key (e.g., injected from SSR) * - `() => Promise` - Async function that fetches a fresh key from your backend * * @deprecated Use `config` instead. */ api_key?: ApiKeyConfig | undefined; /** * WebSocket URL for real-time connections. * @default 'wss://stt-rt.soniox.com/transcribe-websocket' * @deprecated Use `config.stt_ws_url` or `config.region` instead. */ ws_base_url?: string | undefined; /** * Optional permission resolver for pre-flight microphone permission checks. * Not set by default (SSR-safe, RN-safe). * * @example * ```typescript * import { BrowserPermissionResolver } from '@soniox/client'; * const client = new SonioxClient({ * config: { api_key: tempKey }, * permissions: new BrowserPermissionResolver(), * }); * ``` */ permissions?: PermissionResolver | undefined; /** * Default maximum number of audio chunks to buffer while waiting for key/connection. * Can be overridden per-recording. * @default 1000 */ buffer_queue_size?: number | undefined; /** * Default session options applied to all sessions. * Can be overridden per-recording. */ default_session_options?: SttSessionOptions | undefined; }; /** * Options for creating a low-level STT session. */ type SttOptions = { /** * Resolved API key string (temporary key). */ api_key: string; /** * Session options (signal, etc.). */ session_options?: SttSessionOptions | undefined; }; /** * Main entry point for the Soniox client SDK. * * @example * ```typescript * // Recommended: async config with region * const client = new SonioxClient({ * config: async () => { * const res = await fetch('/api/soniox-config', { method: 'POST' }); * return await res.json(); // { api_key, region } * }, * }); * * // High-level: record from microphone * const recording = client.realtime.record({ model: 'stt-rt-v5' }); * recording.on('result', (r) => console.log(r.tokens)); * await recording.stop(); * * // Low-level: direct session access * const session = client.realtime.stt({ model: 'stt-rt-v5' }, { api_key: key }); * await session.connect(); * ``` */ declare class SonioxClient { /** @internal */ readonly _configResolver: (context?: ConfigContext) => Promise; /** * STT WebSocket URL resolved at construction time from the client's `config`, * when that config is a plain object (not an async function). Used by the * synchronous low-level `client.realtime.stt()` factory so it honors the * configured region. Remains `undefined` for async-config clients — those * must supply `ws_base_url` or use `client.realtime.record()`. * * @internal */ private readonly preResolvedSttWsUrl; private readonly permissionResolver; private readonly defaultBufferQueueSize; private readonly defaultSessionOptions; private readonly wsBaseUrl; /** * Real-time API namespace */ readonly realtime: { /** * Start a high-level recording session. * * Returns synchronously so callers can attach event listeners before * any async work (key fetch, mic access, connection) begins. * * @param options - Session config + recording options * @returns Recording instance */ record: (options: RecordOptions) => Recording; /** * Create a low-level STT session. * * The WebSocket URL is derived from the client's `config` (respecting * `region` / `base_domain` / `stt_ws_url`) when `config` is a plain * object, or from `ws_base_url` on the legacy path. If `config` was * passed as an async function, call `client.realtime.record()` instead, * or pass `ws_base_url` explicitly to `SonioxClient`. * * @param config - Session configuration (sent to server) * @param options - API key and session options * @returns RealtimeSttSession instance * @throws {@link SonioxError} if the WebSocket URL cannot be resolved * synchronously (async-config client without `ws_base_url`). */ stt: (config: SttSessionConfig, options: SttOptions) => RealtimeSttSession; /** * TTS factory — callable for single-stream, `.multiStream()` for multi-stream. * * Uses the client's config resolver to obtain credentials and TTS WebSocket URL. * * @example Single stream * ```typescript * const stream = await client.realtime.tts({ * model: 'tts-rt-v1', * voice: 'Adrian', * language: 'en', * audio_format: 'wav', * }); * stream.sendText("Hello"); * stream.finish(); * for await (const chunk of stream) { process(chunk); } * ``` * * @example Multi-stream * ```typescript * const conn = await client.realtime.tts.multiStream(); * const s1 = await conn.stream({ * model: 'tts-rt-v1', * voice: 'Adrian', * language: 'en', * audio_format: 'wav', * }); * ``` */ tts: ClientTtsFactory; }; /** * REST TTS API namespace. * * @example * ```typescript * const audio = await client.tts.generate({ * text: 'Hello', * voice: 'Adrian', * language: 'en', * }); * ``` */ readonly tts: { /** * Generate speech audio from text. Returns the full audio as a `Uint8Array`. */ generate(options: GenerateSpeechOptions): Promise; /** * Generate speech audio as a streaming async iterable. * Yields `Uint8Array` chunks as they arrive. */ generateStream(options: GenerateSpeechOptions): AsyncIterable; }; constructor(options: SonioxClientOptions); /** * Permission resolver, if configured. * Returns `undefined` if no resolver was provided (SSR-safe). * * @example * ```typescript * const mic = await client.permissions?.check('microphone'); * if (mic?.status === 'denied') { * showSettingsMessage(); * } * ``` */ get permissions(): PermissionResolver | undefined; private createRecording; private createSession; private createSingleTtsStream; private createTtsConnection; private ttsRestGenerate; private ttsRestGenerateStream; } /** * Callable TTS factory with `.multiStream()` for multi-stream connections. */ interface ClientTtsFactory { (input?: TtsStreamInput): Promise; multiStream(): Promise; } //#endregion //#region src/audio/microphone.d.ts /** * Options for MicrophoneSource */ type MicrophoneSourceOptions = { /** * MediaTrackConstraints for the audio track. * @default { echoCancellation: false, noiseSuppression: false, autoGainControl: false, channelCount: 1, sampleRate: 16000 } */ constraints?: MediaTrackConstraints | undefined; /** * MediaRecorder options. * @see https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder/MediaRecorder */ recorderOptions?: MediaRecorderOptions | undefined; /** * Time interval in milliseconds between audio data chunks. * @default 60 */ timesliceMs?: number | undefined; }; /** * Browser microphone audio source * * Uses `navigator.mediaDevices.getUserMedia` to capture audio from the microphone * and `MediaRecorder` to encode it into chunks. * * @example * ```typescript * const source = new MicrophoneSource(); * await source.start({ * onData: (chunk) => session.sendAudio(chunk), * onError: (err) => console.error(err), * }); * // Later: * source.stop(); * ``` */ declare class MicrophoneSource implements AudioSource { private readonly constraints; private readonly recorderOptions; private readonly timesliceMs; private mediaRecorder; private stream; private boundOnData; private boundOnError; private boundOnMute; private boundOnUnmute; private startGeneration; constructor(options?: MicrophoneSourceOptions); /** * Request microphone access and start recording * * @throws AudioUnavailableError if getUserMedia or MediaRecorder is not supported * @throws AudioPermissionError if microphone access is denied * @throws AudioDeviceError if no microphone is found */ start(handlers: AudioSourceHandlers): Promise; /** * Stop recording and release all resources */ stop(): void; /** * Pause audio capture */ pause(): void; /** * Resume audio capture */ resume(): void; /** * Reinitialize the MediaRecorder on the existing stream so the next * chunks contain a fresh container header (required after reconnecting * to a new server session). */ restart(): void; } //#endregion //#region src/audio/errors.d.ts /** * Error codes for audio-related errors */ type AudioErrorCode = 'permission_denied' | 'device_not_found' | 'audio_unavailable'; /** * Thrown when microphone access is denied by the user or blocked by the browser. * * Maps to `getUserMedia` `NotAllowedError` DOMException. */ declare class AudioPermissionError extends SonioxError { constructor(message?: string, cause?: unknown); } /** * Thrown when no audio input device is found * * Maps to `getUserMedia` `NotFoundError` DOMException. */ declare class AudioDeviceError extends SonioxError { constructor(message?: string, cause?: unknown); } /** * Thrown when audio capture is not supported in the current environment * * For example, when `getUserMedia` or `MediaRecorder` is not available. */ declare class AudioUnavailableError extends SonioxError { constructor(message?: string, cause?: unknown); } //#endregion //#region src/permissions/browser.d.ts /** * Browser permission resolver for checking and requesting microphone access. * * @example * ```typescript * const resolver = new BrowserPermissionResolver(); * const mic = await resolver.check('microphone'); * if (mic.status === 'prompt') { * const result = await resolver.request('microphone'); * if (result.status === 'denied') { * showDeniedMessage(); * } * } * ``` */ declare class BrowserPermissionResolver implements PermissionResolver { /** * Check current microphone permission status without prompting the user. */ check(permission: PermissionType): Promise; /** * Request microphone permission from the user. * This may show a browser permission prompt. */ request(permission: PermissionType): Promise; private checkMicrophone; private requestMicrophone; } //#endregion export { AbortError, type ApiKeyConfig, type AudioData, AudioDeviceError, type AudioErrorCode, type AudioFormat, AudioPermissionError, type AudioSource, type AudioSourceHandlers, AudioUnavailableError, AuthError, BadRequestError, BrowserPermissionResolver, type ClientTtsFactory, type ConfigContext, ConnectionError, type ContextGeneralEntry, type ContextTranslationTerm, type GenerateSpeechOptions, type HttpErrorCode, type HttpErrorDetails, type HttpMethod, MicrophoneSource, type MicrophoneSourceOptions, NetworkError, type OneWayTranslationConfig, type PermissionResolver, type PermissionResult, type PermissionStatus, type PermissionType, QuotaError, RealtimeError, type RealtimeErrorCode, type RealtimeEvent, type RealtimeResult, type RealtimeSegment, RealtimeSegmentBuffer, type RealtimeSegmentBufferOptions, type RealtimeSegmentOptions, RealtimeSttSession, type RealtimeToken, RealtimeTtsConnection, RealtimeTtsStream, type RealtimeUtterance, RealtimeUtteranceBuffer, type RealtimeUtteranceBufferOptions, type ReconnectOptions, type ReconnectingEvent, type RecordOptions, Recording, type RecordingEvents, type RecordingState, type ResolvedConnectionConfig, type SegmentGroupKey, type SendStreamOptions, SonioxClient, type SonioxClientOptions, type SonioxConnectionConfig, SonioxError, type SonioxErrorCode, SonioxHttpError, type SonioxRegion, type StateChangeReason, StateError, type SttOptions, type SttSessionConfig, type SttSessionEvents, type SttSessionOptions, type SttSessionState, type TranscriptionContext, type TranslationConfig, type TtsAudioFormat, type TtsConnectionEvents, type TtsConnectionOptions, type TtsEvent, type TtsLanguage, type TtsModel, type TtsStreamConfig, type TtsStreamEvents, type TtsStreamInput, type TtsStreamState, type TtsVoice, type TtsVoiceGender, type TwoWayTranslationConfig, TypedEmitter, isAbortError, isNotFoundError, isRetriableError, isSonioxError, isSonioxHttpError, resolveApiKey, resolveConnectionConfig, segmentRealtimeTokens }; //# sourceMappingURL=index.d.cts.map