import type * as SarvamAI from "../index.js"; export interface SpeechToTextJobParameters { /** * Specifies the language of the input audio in BCP-47 format. * * **Available Options:** * - `unknown` (default): Use when the language is not known; the API will auto-detect. * - `hi-IN`: Hindi * - `bn-IN`: Bengali * - `kn-IN`: Kannada * - `ml-IN`: Malayalam * - `mr-IN`: Marathi * - `od-IN`: Odia * - `pa-IN`: Punjabi * - `ta-IN`: Tamil * - `te-IN`: Telugu * - `en-IN`: English * - `gu-IN`: Gujarati * * **Additional Options (saaras:v3 only):** * - `as-IN`: Assamese * - `ur-IN`: Urdu * - `ne-IN`: Nepali * - `kok-IN`: Konkani * - `ks-IN`: Kashmiri * - `sd-IN`: Sindhi * - `sa-IN`: Sanskrit * - `sat-IN`: Santali * - `mni-IN`: Manipuri * - `brx-IN`: Bodo * - `mai-IN`: Maithili * - `doi-IN`: Dogri */ language_code?: SarvamAI.SpeechToTextLanguage | undefined; /** * Model to be used for speech to text. * * - **saarika:v2.5** (default): Transcribes audio in the spoken language. * * - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix. */ model?: SarvamAI.SpeechToTextModel | undefined; /** * Mode of operation. **Only applicable when using saaras:v3 model.** * * Example audio: 'मेरा फोन नंबर है 9840950950' * * - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization. * - Output: `मेरा फोन नंबर है 9840950950` * * - **translate**: Translates speech from any supported Indic language to English. * - Output: `My phone number is 9840950950` * * - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is. * - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero` * * - **translit**: Romanization - Transliterates speech to Latin/Roman script only. * - Output: `mera phone number hai 9840950950` * * - **codemix**: Code-mixed text with English words in English and Indic words in native script. * - Output: `मेरा phone number है 9840950950` */ mode?: SarvamAI.Mode | undefined; /** Whether to include timestamps in the response */ with_timestamps?: boolean | undefined; /** Enables speaker diarization, which identifies and separates different speakers in the audio. In beta mode */ with_diarization?: boolean | undefined; /** Number of speakers to be detected in the audio. This is used when with_diarization is true. */ num_speakers?: number | undefined; /** Audio codec/format of uploaded files. The API automatically detects most formats; for PCM files (pcm_s16le, pcm_l16, pcm_raw), you must specify this parameter. PCM files are supported only at 16kHz sample rate. */ input_audio_codec?: SarvamAI.InputAudioCodec | undefined; }