import { TranscriptionProvider, TranscriptResult, TranscriptUpdate, TranscriptToken } from '@saraudio/core'; import { Logger } from '@saraudio/utils'; import { S as SonioxOptions } from './schema-CCBKk_0d.mjs'; export { d as SONIOX_ASYNC_MODELS, e as SONIOX_MODEL_DEFINITIONS, f as SONIOX_REALTIME_MODELS, a as SonioxAsyncModelId, b as SonioxModelId, c as SonioxRealtimeModelId } from './schema-CCBKk_0d.mjs'; import '@saraudio/core/json'; import 'zod'; type SonioxProvider = TranscriptionProvider; interface SonioxResolvedConfig { raw: SonioxOptions; wsUrl: string; httpBase: string; sampleRate: number; channels: 1 | 2; audioFormat: string; queueBudgetMs: number; wsKeepaliveMs: number; } /** * Soniox HTTP API models (as of Nov 2025). * Covers: Temporary API Keys, Files Upload, Transcriptions (create/get), Transcript retrieval. * Sources: * - Auth (Temporary Keys): https://soniox.com/docs/stt/api-reference/auth/create_temporary_api_key * - Files (Upload): https://soniox.com/docs/stt/api-reference/files/upload_file * - Transcriptions: https://soniox.com/docs/stt/api-reference/transcriptions */ /** * Common error envelope used by Soniox REST endpoints. */ interface SonioxHttpErrorEnvelope { /** HTTP status code of the error. */ status_code: number; /** Machine‑readable error type (e.g., 'unauthenticated', 'invalid_request', 'internal_error'). */ error_type: string; /** Human‑readable message describing the error. */ message: string; /** Optional list of validation error objects. */ validation_errors?: ReadonlyArray<{ /** Validation error type or code. */ error_type?: string; /** Where the error occurred (field/path). */ location?: string; /** Human‑readable validation message. */ message?: string; }>; /** Request correlation identifier assigned by the server. */ request_id?: string; } /** * POST /v1/auth/temporary-api-keys — Create a temporary API key for WebSocket usage. */ interface SonioxHttpCreateTempKeyRequest { /** Usage type; must be 'transcribe_websocket' for realtime streaming. */ usage_type: 'transcribe_websocket'; /** Expiration in seconds (1..3600). */ expires_in_seconds: number; /** Optional client reference for tracing (≤ 256 chars). */ client_reference_id?: string; } /** * Response of Create Temporary API Key. */ interface SonioxHttpCreateTempKeyResponse { /** The newly issued temporary API key string. */ api_key: string; /** Expiration timestamp (RFC 3339, UTC). */ expires_at: string; } /** * POST /v1/files — Upload a media file for batch transcription. * Multipart form: field 'file' is required; 'client_reference_id' optional. */ interface SonioxHttpUploadFileResponse { /** Server‑assigned file id (UUID). */ id: string; /** Original or stored filename. */ filename: string; /** File size in bytes. */ size: number; /** Upload creation timestamp (UTC). */ created_at: string; /** Optional echo of client reference id. */ client_reference_id?: string | null; } /** * POST /v1/transcriptions — Create a transcription job for a URL or previously uploaded file. */ type SonioxHttpTranslationConfig = { type: 'one_way'; target_language: string; } | { type: 'two_way'; language_a: string; language_b: string; }; interface SonioxHttpCreateTranscriptionRequest { /** Async model identifier to use for batch processing. */ model: string; /** HTTPS URL pointing to the audio content; mutually exclusive with file_id. */ audio_url?: string; /** Uploaded file id (UUID); mutually exclusive with audio_url. */ file_id?: string; /** Expected languages (BCP‑47). */ language_hints?: ReadonlyArray; /** * If true, restrict recognition to only the languages in `language_hints`. * If false/omitted, hints guide recognition but are not strict. */ language_hints_strict?: boolean; /** Enable per‑segment speaker diarization. */ enable_speaker_diarization?: boolean; /** Enable automatic language identification. */ enable_language_identification?: boolean; /** Optional translation configuration (one‑way/two‑way). */ translation?: SonioxHttpTranslationConfig; /** Domain/context information to guide recognition and formatting. */ context?: Record | string; /** HTTPS callback URL to receive completion/error notifications. */ webhook_url?: string; /** Webhook auth header name to include with callbacks. */ webhook_auth_header_name?: string; /** Webhook auth header value to include with callbacks. */ webhook_auth_header_value?: string; /** Optional client reference id for tracing. */ client_reference_id?: string; } /** * Response to Create Transcription (201) and shape for subsequent GET /v1/transcriptions/{id}. */ interface SonioxHttpTranscriptionResource { /** Transcription job id (UUID). */ id: string; /** Processing status. */ status: 'queued' | 'processing' | 'completed' | 'error'; /** Creation timestamp (UTC). */ created_at: string; /** Model identifier used for the job. */ model: string; /** Source audio URL if provided. */ audio_url: string | null; /** Uploaded file id (UUID) if used. */ file_id: string | null; /** Original filename for the uploaded file. */ filename?: string | null; /** Echo of language hints. */ language_hints: ReadonlyArray | null; /** Echo of context object or string. */ context: Record | string | null; /** Whether diarization was enabled. */ enable_speaker_diarization: boolean; /** Whether language identification was enabled. */ enable_language_identification: boolean; /** Duration of the audio in milliseconds, once known. */ audio_duration_ms?: number; /** Machine‑readable error type when failed. */ error_type?: string | null; /** Human‑readable error message when failed. */ error_message?: string | null; /** Webhook URL if configured. */ webhook_url?: string | null; /** Webhook auth header name if configured. */ webhook_auth_header_name?: string | null; /** Webhook auth header value; may be masked in responses. */ webhook_auth_header_value?: string | null; /** HTTP status code of the last webhook delivery attempt, if any. */ webhook_status_code?: number | null; /** Client reference id for tracing. */ client_reference_id?: string | null; } /** * GET /v1/transcriptions/{id}/transcript — Retrieve the finalized transcript and tokens. */ interface SonioxHttpTranscriptResponse { /** Transcription id (UUID). */ id: string; /** Full transcript text for the processed audio. */ text: string; /** Token‑level details including timings and confidence. */ tokens: ReadonlyArray<{ /** Token text. */ text: string; /** Token start timestamp in milliseconds. */ start_ms: number; /** Token end timestamp in milliseconds. */ end_ms: number; /** Confidence score in [0.0 .. 1.0]. */ confidence: number; /** Optional speaker label/index when diarization enabled. */ speaker?: number | string; /** Optional language of the token (BCP‑47). */ language?: string; }>; } /** Upload raw audio bytes as a file to Soniox Files API. Returns file id. */ declare function sonioxUploadFile(resolved: SonioxResolvedConfig, audio: Blob | ArrayBuffer | Uint8Array, opts?: { filename?: string; headers?: HeadersInit; }, logger?: Logger): Promise; /** Create a transcription job for a given file id or audio URL. */ declare function sonioxCreateTranscription(resolved: SonioxResolvedConfig, request: SonioxHttpCreateTranscriptionRequest, headersInit?: HeadersInit): Promise; /** Get transcription job resource by id. */ declare function sonioxGetTranscription(resolved: SonioxResolvedConfig, id: string, headersInit?: HeadersInit): Promise; /** Retrieve finalized transcript (text/tokens) for a job id. */ declare function sonioxGetTranscript(resolved: SonioxResolvedConfig, id: string, headersInit?: HeadersInit): Promise; /** * Convenience: upload → create job → poll until completed → fetch transcript → map to TranscriptResult. * Not used by live path; suitable for batch flows. */ declare function sonioxTranscribeFile(resolved: SonioxResolvedConfig, audio: Blob | ArrayBuffer | Uint8Array, request: Omit & { filename?: string; }, headersInit?: HeadersInit, logger?: Logger): Promise; declare function soniox(options: SonioxOptions): SonioxProvider; /** * Soniox Real‑Time WebSocket API model (as of Nov 2025). * Source: Soniox docs – Real‑time WebSocket API * https://soniox.com/docs/stt/api-reference/websocket-api */ /** * Client → Server: initial JSON configuration sent immediately after opening the WebSocket. * After this message, the client streams binary audio frames. */ type SonioxWsTranslationConfig = { type: 'one_way'; target_language: string; } | { type: 'two_way'; language_a: string; language_b: string; }; interface SonioxWsInitConfig { /** API key or temporary key used to authorize the stream. */ api_key: string; /** Real‑time model identifier (e.g., "stt-rt-v3" or "stt-rt-preview"). */ model: string; /** Input audio format; e.g., "pcm_s16le" for raw PCM16 little‑endian, or "auto" to auto‑detect. */ audio_format?: string; /** Number of audio channels for raw formats (1 or 2). */ num_channels?: number; /** Sample rate in Hz for raw formats (e.g., 16000). */ sample_rate?: number; /** List of expected languages (BCP‑47) to guide recognition, e.g., ["en","es"]. */ language_hints?: ReadonlyArray; /** * If true, restrict recognition to only the languages in `language_hints`. * If false/omitted, hints guide recognition but are not strict. */ language_hints_strict?: boolean; /** Enable automatic speaker diarization (speaker labels in outputs). */ enable_speaker_diarization?: boolean; /** Enable automatic language identification for the stream. */ enable_language_identification?: boolean; /** Enable server‑side endpoint detection (utterance segmentation). */ enable_endpoint_detection?: boolean; /** Client‑supplied correlation id for tracking across systems (≤ 256 chars). */ client_reference_id?: string; /** * Optional context to improve accuracy and formatting; exact structure is provider‑defined. * Common subfields include domain hints (key/value), free text, domain terms, translation terms. */ context?: unknown; /** * Optional translation configuration. * One‑way example: { type: 'one_way', target_language: 'en' } * Two‑way example: { type: 'two_way', language_a: 'en', language_b: 'es' } */ translation?: SonioxWsTranslationConfig; } /** * Server → Client: streaming response message containing partial and/or final tokens and progress. */ interface SonioxWsStreamResponse { /** Sequence of tokens (words/subwords) with metadata for this update. */ tokens?: ReadonlyArray; /** * Duration of audio (ms) processed into final, immutable tokens at the time of this message. */ final_audio_proc_ms?: number; /** * Duration of audio (ms) processed into final + non‑final tokens at the time of this message. */ total_audio_proc_ms?: number; } /** * Token metadata included in Soniox WebSocket streaming messages. */ interface SonioxWsToken { /** Token text (word/subword). */ text?: string; /** Start timestamp of the token in milliseconds (may be omitted for translation‑only tokens). */ start_ms?: number; /** End timestamp of the token in milliseconds (may be omitted for translation‑only tokens). */ end_ms?: number; /** Confidence score in [0.0 .. 1.0]. */ confidence?: number; /** True when the token is finalized and will not change in later updates. */ is_final?: boolean; /** Speaker label/index when diarization is enabled. */ speaker?: number | string; /** Language of the token text (BCP‑47), when available. */ language?: string; /** Source language for translation tokens, when available. */ source_language?: string; /** Translation state marker for translation tokens (provider‑specific values). */ translation_status?: string; } /** * Server → Client: terminal message that indicates the stream is finished. * The server will typically close the socket after this response. */ interface SonioxWsFinishedResponse { /** Usually empty at the end of stream; kept for schema consistency. */ tokens?: ReadonlyArray; /** Final duration (ms) processed into final tokens. */ final_audio_proc_ms: number; /** Total duration (ms) processed (final + non‑final). */ total_audio_proc_ms: number; /** True indicates no further data will be sent. */ finished: true; } /** * Server → Client: error message sent before closing the connection. */ interface SonioxWsErrorResponse { /** Often present (possibly empty) for schema consistency. */ tokens?: ReadonlyArray; /** HTTP‑like status code (e.g., 400, 401, 402, 408, 429, 500, 503). */ error_code: number; /** Human‑readable description of the error. */ error_message: string; } type SonioxRawMessage = SonioxWsStreamResponse | SonioxWsFinishedResponse | SonioxWsErrorResponse; type SonioxTokenMetadata = { language?: string; sourceLanguage?: string; translationStatus?: string; }; type SonioxUpdateMetadata = { finalAudioProcMs?: number; totalAudioProcMs?: number; finished?: boolean; speakerLabels?: Record; }; declare const isSonioxUpdate: (update: TranscriptUpdate) => update is TranscriptUpdate & { providerId: "soniox"; metadata?: SonioxUpdateMetadata; raw?: SonioxRawMessage; }; declare const asSonioxTokenMetadata: (token: TranscriptToken) => SonioxTokenMetadata | null; declare const asSonioxUpdateMetadata: (update: TranscriptUpdate) => SonioxUpdateMetadata | null; export { type SonioxHttpCreateTempKeyRequest, type SonioxHttpCreateTempKeyResponse, type SonioxHttpCreateTranscriptionRequest, type SonioxHttpErrorEnvelope, type SonioxHttpTranscriptResponse, type SonioxHttpTranscriptionResource, type SonioxHttpTranslationConfig, type SonioxHttpUploadFileResponse, SonioxOptions, type SonioxProvider, type SonioxRawMessage, type SonioxTokenMetadata, type SonioxUpdateMetadata, type SonioxWsErrorResponse, type SonioxWsFinishedResponse, type SonioxWsInitConfig, type SonioxWsStreamResponse, type SonioxWsToken, type SonioxWsTranslationConfig, asSonioxTokenMetadata, asSonioxUpdateMetadata, isSonioxUpdate, soniox, sonioxCreateTranscription, sonioxGetTranscript, sonioxGetTranscription, sonioxTranscribeFile, sonioxUploadFile };