import WebSocket from "ws"; import type { ChildProcess } from "node:child_process"; import type { AudioFormat, CommitStrategy } from "./scribe"; export interface InputAudioChunk { message_type: "input_audio_chunk"; audio_base_64: string; commit: boolean; sample_rate: number; previous_text?: string; } export type WordsItemType = "word" | "spacing"; export interface WordsItem { text?: string; start?: number; end?: number; type?: WordsItemType; speaker_id?: string; logprob?: number; characters?: string[]; } export interface Config { sample_rate?: number; audio_format?: AudioFormat; language_code?: string; vad_commit_strategy?: CommitStrategy; vad_silence_threshold_secs?: number; vad_threshold?: number; min_speech_duration_ms?: number; min_silence_duration_ms?: number; model_id?: string; disable_logging?: boolean; include_timestamps?: boolean; } export interface SessionStartedMessage { message_type: "session_started"; session_id: string; config: Config; } export interface PartialTranscriptMessage { message_type: "partial_transcript"; text: string; } export interface CommittedTranscriptMessage { message_type: "committed_transcript"; text: string; } export interface CommittedTranscriptWithTimestampsMessage { message_type: "committed_transcript_with_timestamps"; text: string; language_code?: string; words?: WordsItem[]; } export interface ErrorMessage { message_type: "error"; error: string; } export interface AuthErrorMessage { message_type: "auth_error"; error: string; } export interface QuotaExceededErrorMessage { message_type: "quota_exceeded"; error: string; } export interface CommitThrottledErrorMessage { message_type: "commit_throttled"; error: string; } export interface TranscriberErrorMessage { message_type: "transcriber_error"; error: string; } export interface UnacceptedTermsErrorMessage { message_type: "unaccepted_terms_error"; error: string; } export interface RateLimitedErrorMessage { message_type: "rate_limited"; error: string; } export interface InputErrorMessage { message_type: "input_error"; error: string; } export interface QueueOverflowErrorMessage { message_type: "queue_overflow"; error: string; } export interface ResourceExhaustedErrorMessage { message_type: "resource_exhausted"; error: string; } export interface SessionTimeLimitExceededErrorMessage { message_type: "session_time_limit_exceeded"; error: string; } export interface ChunkSizeExceededErrorMessage { message_type: "chunk_size_exceeded"; error: string; } export interface InsufficientAudioActivityErrorMessage { message_type: "insufficient_audio_activity"; error: string; } /** * Union type for all server error messages. */ export type ServerErrorMessage = ErrorMessage | AuthErrorMessage | QuotaExceededErrorMessage | CommitThrottledErrorMessage | TranscriberErrorMessage | UnacceptedTermsErrorMessage | RateLimitedErrorMessage | InputErrorMessage | QueueOverflowErrorMessage | ResourceExhaustedErrorMessage | SessionTimeLimitExceededErrorMessage | ChunkSizeExceededErrorMessage | InsufficientAudioActivityErrorMessage; export type WebSocketMessage = SessionStartedMessage | PartialTranscriptMessage | CommittedTranscriptMessage | CommittedTranscriptWithTimestampsMessage | ServerErrorMessage; /** * Union type for all possible error payloads emitted by the ERROR event. * Includes server error messages and native WebSocket errors. */ export type RealtimeErrorPayload = ServerErrorMessage | Error; /** * Events emitted by the RealtimeConnection. */ export declare enum RealtimeEvents { /** Emitted when the session is successfully started */ SESSION_STARTED = "session_started", /** Emitted when a partial (interim) transcript is available */ PARTIAL_TRANSCRIPT = "partial_transcript", /** Emitted when a committed transcript is available */ COMMITTED_TRANSCRIPT = "committed_transcript", /** Emitted when a committed transcript with timestamps is available */ COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS = "committed_transcript_with_timestamps", /** Emitted when an error occurs - can be any error message from the server or a native WebSocket error */ ERROR = "error", /** Emitted when an auth error occurs */ AUTH_ERROR = "auth_error", /** Emitted when a quota exceeded error occurs */ QUOTA_EXCEEDED = "quota_exceeded", /** Emitted when the WebSocket connection is opened */ OPEN = "open", /** Emitted when the WebSocket connection is closed */ CLOSE = "close", /** Emitted when a commit throttled error occurs */ COMMIT_THROTTLED = "commit_throttled", /** Emitted when a transcriber error occurs */ TRANSCRIBER_ERROR = "transcriber_error", /** Emitted when a unaccepted terms error occurs */ UNACCEPTED_TERMS_ERROR = "unaccepted_terms_error", /** Emitted when a rate limited error occurs */ RATE_LIMITED = "rate_limited", /** Emitted when a input error occurs */ INPUT_ERROR = "input_error", /** Emitted when a queue overflow error occurs */ QUEUE_OVERFLOW = "queue_overflow", /** Emitted when a resource exhausted error occurs */ RESOURCE_EXHAUSTED = "resource_exhausted", /** Emitted when a session time limit exceeded error occurs */ SESSION_TIME_LIMIT_EXCEEDED = "session_time_limit_exceeded", /** Emitted when a chunk size exceeded error occurs */ CHUNK_SIZE_EXCEEDED = "chunk_size_exceeded", /** Emitted when a insufficient audio activity error occurs */ INSUFFICIENT_AUDIO_ACTIVITY = "insufficient_audio_activity" } /** * Type-safe event map for RealtimeConnection events. */ export interface RealtimeEventMap { [RealtimeEvents.SESSION_STARTED]: SessionStartedMessage; [RealtimeEvents.PARTIAL_TRANSCRIPT]: PartialTranscriptMessage; [RealtimeEvents.COMMITTED_TRANSCRIPT]: CommittedTranscriptMessage; [RealtimeEvents.COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS]: CommittedTranscriptWithTimestampsMessage; [RealtimeEvents.ERROR]: RealtimeErrorPayload; [RealtimeEvents.AUTH_ERROR]: AuthErrorMessage; [RealtimeEvents.QUOTA_EXCEEDED]: QuotaExceededErrorMessage; [RealtimeEvents.OPEN]: undefined; [RealtimeEvents.CLOSE]: undefined; [RealtimeEvents.COMMIT_THROTTLED]: CommitThrottledErrorMessage; [RealtimeEvents.TRANSCRIBER_ERROR]: TranscriberErrorMessage; [RealtimeEvents.UNACCEPTED_TERMS_ERROR]: UnacceptedTermsErrorMessage; [RealtimeEvents.RATE_LIMITED]: RateLimitedErrorMessage; [RealtimeEvents.INPUT_ERROR]: InputErrorMessage; [RealtimeEvents.QUEUE_OVERFLOW]: QueueOverflowErrorMessage; [RealtimeEvents.RESOURCE_EXHAUSTED]: ResourceExhaustedErrorMessage; [RealtimeEvents.SESSION_TIME_LIMIT_EXCEEDED]: SessionTimeLimitExceededErrorMessage; [RealtimeEvents.CHUNK_SIZE_EXCEEDED]: ChunkSizeExceededErrorMessage; [RealtimeEvents.INSUFFICIENT_AUDIO_ACTIVITY]: InsufficientAudioActivityErrorMessage; } /** * Manages a real-time transcription WebSocket connection. * * @remarks * **Node.js only**: This class uses Node.js-specific WebSocket implementation. * * @example * ```typescript * const connection = await client.speechToText.realtime.connect({ * modelId: "scribe_v2_realtime", * audioFormat: AudioFormat.PCM_16000, * sampleRate: 16000, * }); * * connection.on(RealtimeEvents.SESSION_STARTED, (data) => { * console.log("Session started"); * }); * * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => { * console.log("Partial:", data.transcript); * }); * * connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => { * console.log("Final:", data.transcript); * connection.close(); * }); * * // Send audio data * connection.send({ audioBase64: base64String }); * * // Commit and close * connection.commit(); * ``` */ export declare class RealtimeConnection { private websocket; private eventEmitter; private ffmpegProcess; private currentSampleRate; constructor(sampleRate: number); /** * @internal * Used internally by ScribeRealtime to attach the WebSocket after connection is created. */ setWebSocket(websocket: WebSocket): void; /** * @internal * Used internally by ScribeRealtime to attach ffmpeg process for cleanup. */ setFfmpegProcess(ffmpegProcess: ChildProcess): void; /** * Attaches an event listener for the specified event. * * @param event - The event to listen for (use RealtimeEvents enum) * @param listener - The callback function to execute when the event fires * * @example * ```typescript * connection.on(RealtimeEvents.SESSION_STARTED, (data) => { * console.log("Session started", data); * }); * * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => { * console.log("Partial:", data.transcript); * }); * * connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => { * console.log("Final:", data.transcript); * }); * * connection.on(RealtimeEvents.ERROR, (error) => { * // error can be any error message type or native Error * if ('message_type' in error) { * console.error("Server error:", error.message_type, error.error); * } else { * console.error("WebSocket error:", error.message); * } * }); * ``` */ on(event: E, listener: (data: RealtimeEventMap[E]) => void): void; /** * Removes an event listener for the specified event. * * @param event - The event to stop listening for * @param listener - The callback function to remove * * @example * ```typescript * const handler = (data) => console.log(data); * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler); * * // Later, remove the listener * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler); * ``` */ off(event: E, listener: (data: RealtimeEventMap[E]) => void): void; /** * Sends audio data to the transcription service. * * @param data - Audio data configuration * @param data.audioBase64 - Base64-encoded audio data * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false) * @param data.sampleRate - Sample rate of the audio (default: configured sample rate) * @param data.previousText - Send text context to the model. Can only be sent alongside the first audio chunk. If sent in a subsequent chunk, an error will be returned. * @throws {Error} If the WebSocket connection is not open * * @example * ```typescript * // Send audio chunk without committing * connection.send({ * audioBase64: base64EncodedAudio, * }); * * // Send audio chunk with custom sample rate * connection.send({ * audioBase64: base64EncodedAudio, * sampleRate: 16000, * }); * ``` */ send(data: { audioBase64: string; commit?: boolean; sampleRate?: number; previousText?: string; }): void; /** * Commits the segment, triggering a COMMITTED_TRANSCRIPT event and clearing the buffer. * It's recommend to commit often when using CommitStrategy.MANUAL to keep latency low. * * @throws {Error} If the WebSocket connection is not open * * @remarks * Only needed when using CommitStrategy.MANUAL. * When using CommitStrategy.VAD, commits are handled automatically by the server. * * @example * ```typescript * // Send all audio chunks * for (const chunk of audioChunks) { * connection.send({ audioBase64: chunk }); * } * * // Finalize the transcription * connection.commit(); * ``` */ commit(): void; /** * Closes the WebSocket connection and cleans up resources. * This will terminate any ongoing transcription and stop ffmpeg processes if running. * * @remarks * After calling close(), this connection cannot be reused. * Create a new connection if you need to start transcribing again. * * @example * ```typescript * connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => { * console.log("Final:", data.transcript); * connection.close(); * }); * ``` */ close(): void; private cleanup; }