import type { SpeechToTextClient } from "../../api/resources/speechToText/client/Client"; import { RealtimeConnection } from "./connection"; export declare enum AudioFormat { PCM_8000 = "pcm_8000", PCM_16000 = "pcm_16000", PCM_22050 = "pcm_22050", PCM_24000 = "pcm_24000", PCM_44100 = "pcm_44100", PCM_48000 = "pcm_48000", ULAW_8000 = "ulaw_8000" } export declare enum CommitStrategy { MANUAL = "manual", VAD = "vad" } interface BaseOptions { /** * Strategy for committing transcriptions. * @default CommitStrategy.MANUAL */ commitStrategy?: CommitStrategy; /** * Silence threshold in seconds for VAD (Voice Activity Detection). * Must be a positive number between 0.3 and 3.0 */ vadSilenceThresholdSecs?: number; /** * Threshold for voice activity detection. * Must be between 0.1 and 0.9. */ vadThreshold?: number; /** * Minimum speech duration in milliseconds. * Must be a positive integer between 50 and 2000. */ minSpeechDurationMs?: number; /** * Minimum silence duration in milliseconds. * Must be a positive integer between 50 and 2000. */ minSilenceDurationMs?: number; /** * Model ID to use for transcription. * Must be a valid model ID. */ modelId: string; /** * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. * Can sometimes improve transcription performance if known beforehand. */ languageCode?: string; /** * Whether to receive a committed_transcript_with_timestamps event which includes word-level timestamps. * @default false */ includeTimestamps?: boolean; } export interface AudioOptions extends BaseOptions { audioFormat: AudioFormat; sampleRate: number; url?: never; } /** * Options for streaming audio from a URL. * @remarks * **Node.js only**: Requires ffmpeg to be installed and available in PATH. * This will not work in browsers, Deno, or Cloudflare Workers. */ export interface UrlOptions extends BaseOptions { url: string; audioFormat?: never; sampleRate?: never; } /** * Real-time speech-to-text transcription client. * @remarks * **Node.js only**: This class uses Node.js-specific APIs (WebSocket from 'ws', child_process). * It will not work in browsers, Deno, or Cloudflare Workers without modifications. */ export declare class ScribeRealtime { private options; constructor(options?: SpeechToTextClient.Options); private getWebSocketUri; private checkFfmpegInstalled; private buildWebSocketUri; /** * Establishes a WebSocket connection for real-time speech-to-text transcription. * * @param options - Configuration options for the connection * @returns A promise that resolves to a RealtimeConnection instance * * @remarks * **Node.js only**: This method uses Node.js-specific APIs. * * When using `UrlOptions` with a URL, ffmpeg must be installed and available in PATH. * The SDK will automatically convert the stream to 16kHz mono PCM format. * * @example * ```typescript * // Manual audio streaming * const connection = await client.speechToText.realtime.connect({ * modelId: "scribe_v2_realtime", * audioFormat: AudioFormat.PCM_16000, * sampleRate: 16000, * }); * * // Automatic URL streaming (requires ffmpeg) * const connection = await client.speechToText.realtime.connect({ * modelId: "scribe_v2_realtime", * url: "https://example.com/stream.mp3", * }); * ``` */ connect(options: AudioOptions | UrlOptions): Promise; private streamFromUrl; } export {};