/** * Configuration message required as the first message after establishing the WebSocket connection. * This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle * by sending a new config message. When a config update is sent, any text currently in the buffer * will be automatically flushed and processed before applying the new configuration. * * **Model-Specific Notes:** * - **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz. * - **bulbul:v3:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz. */ export interface ConfigureConnection { type: "config"; data: ConfigureConnection.Data; } export declare namespace ConfigureConnection { interface Data { /** * Specifies the model to use for text-to-speech conversion. * - **bulbul:v2** (default): Standard TTS model with pitch/loudness support * - **bulbul:v3**: Advanced model with temperature control (no pitch/loudness) */ model?: Data.Model | undefined; /** The language of the text in BCP-47 format */ target_language_code: Data.TargetLanguageCode; /** * The speaker voice to be used for the output audio. * * **Model Compatibility:** * - **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh * - **bulbul:v3:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia * * **Note:** Speaker selection must match the chosen model version. */ speaker: Data.Speaker; /** * Controls the pitch of the audio. Lower values result in a deeper voice, * while higher values make it sharper. The suitable range is between -0.75 * and 0.75. Default is 0.0. * * **Note:** NOT supported for bulbul:v3. Will be ignored if provided. */ pitch?: number | undefined; /** * Controls the speed of the audio. Lower values result in slower speech, * while higher values make it faster. Default is 1.0. * * **Model-specific ranges:** * - **bulbul:v2:** 0.3 to 3.0 * - **bulbul:v3:** 0.5 to 2.0 */ pace?: number | undefined; /** * Controls the loudness of the audio. Lower values result in quieter audio, * while higher values make it louder. The suitable range is between 0.3 * and 3.0. Default is 1.0. * * **Note:** NOT supported for bulbul:v3. Will be ignored if provided. */ loudness?: number | undefined; /** * Controls the randomness of the output. Lower values make the output more * focused and deterministic, while higher values make it more random. * The suitable range is between 0.01 and 1.0. Default is 0.6. * * **Note:** Only supported for bulbul:v3. Will be ignored for bulbul:v2. */ temperature?: number | undefined; /** * Specifies the sample rate of the output audio. Supported values are * 8000, 16000, 22050, 24000 Hz. * * **Model-specific defaults:** * - **bulbul:v2:** 22050 Hz * - **bulbul:v3:** 24000 Hz */ speech_sample_rate?: number | undefined; /** * Controls whether normalization of English words and numeric entities * (e.g., numbers, dates) is performed. Set to true for better handling * of mixed-language text. * * **Model-specific defaults:** * - **bulbul:v2:** false (optional) * - **bulbul:v3:** Always enabled (cannot be disabled) */ enable_preprocessing?: boolean | undefined; /** Audio codec (currently supports MP3 only, optimized for real-time playback) */ output_audio_codec?: Data.OutputAudioCodec | undefined; /** Audio bitrate (choose from 5 supported bitrate options) */ output_audio_bitrate?: Data.OutputAudioBitrate | undefined; /** * The ID of a pronunciation dictionary to apply during synthesis. * When provided, matching words in the input text will be replaced * with their custom pronunciations before generating speech. * * Create and manage dictionaries via the `/text-to-speech/pronunciation-dictionary` endpoints. * * **Note:** Only supported by **bulbul:v3**. */ dict_id?: string | undefined; /** Minimum character length that triggers buffer flushing for TTS model processing */ min_buffer_size?: number | undefined; /** Maximum length for sentence splitting (adjust based on content length) */ max_chunk_length?: number | undefined; } namespace Data { /** * Specifies the model to use for text-to-speech conversion. * - **bulbul:v2** (default): Standard TTS model with pitch/loudness support * - **bulbul:v3**: Advanced model with temperature control (no pitch/loudness) */ const Model: { readonly BulbulV2: "bulbul:v2"; readonly BulbulV3: "bulbul:v3"; }; type Model = (typeof Model)[keyof typeof Model]; /** The language of the text in BCP-47 format */ const TargetLanguageCode: { readonly BnIn: "bn-IN"; readonly EnIn: "en-IN"; readonly GuIn: "gu-IN"; readonly HiIn: "hi-IN"; readonly KnIn: "kn-IN"; readonly MlIn: "ml-IN"; readonly MrIn: "mr-IN"; readonly OdIn: "od-IN"; readonly PaIn: "pa-IN"; readonly TaIn: "ta-IN"; readonly TeIn: "te-IN"; }; type TargetLanguageCode = (typeof TargetLanguageCode)[keyof typeof TargetLanguageCode]; /** * The speaker voice to be used for the output audio. * * **Model Compatibility:** * - **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh * - **bulbul:v3:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia * * **Note:** Speaker selection must match the chosen model version. */ const Speaker: { readonly Anushka: "anushka"; readonly Abhilash: "abhilash"; readonly Manisha: "manisha"; readonly Vidya: "vidya"; readonly Arya: "arya"; readonly Karun: "karun"; readonly Hitesh: "hitesh"; readonly Aditya: "aditya"; readonly Ritu: "ritu"; readonly Priya: "priya"; readonly Neha: "neha"; readonly Rahul: "rahul"; readonly Pooja: "pooja"; readonly Rohan: "rohan"; readonly Simran: "simran"; readonly Kavya: "kavya"; readonly Amit: "amit"; readonly Dev: "dev"; readonly Ishita: "ishita"; readonly Shreya: "shreya"; readonly Ratan: "ratan"; readonly Varun: "varun"; readonly Manan: "manan"; readonly Sumit: "sumit"; readonly Roopa: "roopa"; readonly Kabir: "kabir"; readonly Aayan: "aayan"; readonly Shubh: "shubh"; readonly Ashutosh: "ashutosh"; readonly Advait: "advait"; readonly Amelia: "amelia"; readonly Sophia: "sophia"; }; type Speaker = (typeof Speaker)[keyof typeof Speaker]; /** Audio codec (currently supports MP3 only, optimized for real-time playback) */ const OutputAudioCodec: { readonly Linear16: "linear16"; readonly Mulaw: "mulaw"; readonly Alaw: "alaw"; readonly Opus: "opus"; readonly Flac: "flac"; readonly Aac: "aac"; readonly Wav: "wav"; readonly Mp3: "mp3"; }; type OutputAudioCodec = (typeof OutputAudioCodec)[keyof typeof OutputAudioCodec]; /** Audio bitrate (choose from 5 supported bitrate options) */ const OutputAudioBitrate: { readonly ThirtyTwoK: "32k"; readonly SixtyFourK: "64k"; readonly NinetySixK: "96k"; readonly OneHundredTwentyEightK: "128k"; readonly OneHundredNinetyTwoK: "192k"; }; type OutputAudioBitrate = (typeof OutputAudioBitrate)[keyof typeof OutputAudioBitrate]; } }