import type * as core from "../../../../../core/index.js"; import type * as SarvamAI from "../../../../index.js"; /** * @example * { * file: fs.createReadStream("/path/to/your/file") * } */ export interface SpeechToTextTranslationRequest { /** * The audio file to transcribe. Supported formats include WAV, MP3, AAC, AIFF, OGG, OPUS, FLAC, MP4/M4A, AMR, WMA, WebM, and PCM formats. The API automatically detects most codec formats, but for PCM files (pcm_s16le, pcm_l16, pcm_raw), you must specify the input_audio_codec parameter. PCM files are supported only at 16kHz sample rate. * Works best at 16kHz. Multiple channels will be merged. */ file: core.file.Uploadable; /** Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models. */ prompt?: string; /** * Model to be used for speech to text translation. * * - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English. * - Example: Hindi audio → English text output */ model?: SarvamAI.SpeechToTextTranslateModel; /** Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate. */ input_audio_codec?: SarvamAI.InputAudioCodec; }