import type * as SarvamAI from "../../../../index.js";
/**
 * @example
 *     {
 *         text: "x"
 *     }
 */
export interface TextToSpeechStreamRequest {
    /**
     * The text to be converted into streamed speech.
     *
     * **Features:**
     * - Max 3500 characters
     * - Supports code-mixed text (English and Indic languages)
     *
     * **Important Note:**
     * - For numbers larger than 4 digits, use commas (e.g., '10,000' instead of '10000')
     * - This ensures proper pronunciation as a whole number
     */
    text: string;
    /** The language code in BCP-47 format. */
    target_language_code?: SarvamAI.TextToSpeechLanguage;
    /**
     * The speaker voice to be used for the output audio.
     *
     * **Default:** shubh (for bulbul:v3), anushka (for bulbul:v2)
     *
     * **Note:** Speaker selection must match the chosen model version.
     *
     * **Important:** Speaker names are case-sensitive and must be lowercase (e.g., `ritu` not `Ritu`).
     */
    speaker?: SarvamAI.TextToSpeechSpeaker;
    /**
     * Controls the pitch of the audio. Range: -0.75 to 0.75. Default is 0.0.
     *
     * **Note:** Only supported for bulbul:v2.
     */
    pitch?: number;
    /**
     * Controls the speed of the audio. Default is 1.0.
     *
     * **Model-specific ranges:**
     * - **bulbul:v3:** 0.5 to 2.0
     * - **bulbul:v2:** 0.3 to 3.0
     */
    pace?: number;
    /**
     * Controls the loudness of the audio. Range: 0.3 to 3.0. Default is 1.0.
     *
     * **Note:** Only supported for bulbul:v2.
     */
    loudness?: number;
    /**
     * Specifies the sample rate of the output audio. Default is 22050 Hz.
     *
     * **Note:** OPUS codec only supports 8000, 12000, 16000, 24000, 48000 Hz.
     */
    speech_sample_rate?: SarvamAI.SpeechSampleRate;
    /** Controls whether normalization of English words and numeric entities is performed. Default is false. */
    enable_preprocessing?: boolean;
    /** Specifies the model to use for text-to-speech conversion. Default is bulbul:v2. */
    model?: SarvamAI.TextToSpeechModel;
    /**
     * Controls the randomness of the output. Range: 0.01 to 1.0. Default is 0.6.
     *
     * **Note:** Only supported for bulbul:v3.
     */
    temperature?: number;
    /** Enable caching for the request. Default is false. Currently in beta. */
    enable_cached_responses?: boolean;
    /**
     * The ID of a pronunciation dictionary to apply during synthesis. When provided, matching words in the input text will be replaced with their custom pronunciations before generating speech.
     *
     * Create and manage dictionaries via the [Pronunciation Dictionary API](https://docs.sarvam.ai/api-reference-docs/pronunciation-dictionary/create). Only supported by **bulbul:v3**.
     */
    dict_id?: string;
    /** Specifies the codec for the streamed output audio (e.g., 'mp3'). */
    output_audio_codec?: SarvamAI.SpeechStreamCodec;
    /** Bitrate for the streamed output audio. Default is '128k'. */
    output_audio_bitrate?: SarvamAI.SpeechStreamBitrate;
}