interface VoiceTag { ContentCategories: Array; VoicePersonalities: Array; } interface Voice { Name: string; ShortName: string; Gender: "Female" | "Male"; Locale: string; SuggestedCodec: string; FriendlyName: string; Status: "GA"; VoiceTag: VoiceTag; } declare function getVoices(): Promise>; /** * Metadata structure for audio processing */ interface AudioMetadata { /** Array of word boundary information */ Metadata: [WordBoundary]; } /** * Word boundary information from the TTS service */ interface WordBoundary { /** Type of boundary marker */ Type: "WordBoundary"; /** Detailed boundary data */ Data: { /** Time offset in milliseconds */ Offset: number; /** Duration in milliseconds */ Duration: number; /** Text information */ text: { /** The word text */ Text: string; /** Length of the text */ Length: number; /** Type of boundary */ BoundaryType: "WordBoundary"; }; }; } /** * Options for parsing and generating subtitles */ interface ParseSubtitleOptions { /** * Method to split the text into subtitle cues * - 'word': splits by word count * - 'duration': splits by time duration * @default 'word' */ splitBy: "word" | "duration"; /** * Number of words per subtitle cue when using 'word' splitBy * @default 10 */ wordsPerCue?: number; /** * Duration in milliseconds per subtitle cue when using 'duration' splitBy * @default 5000 */ durationPerCue?: number; /** Audio metadata used for timing information */ metadata: Array; } /** * Represents a single subtitle entry with timing information */ interface ParseSubtitleResult { /** The text content of the subtitle */ text: string; /** Start time in milliseconds */ start: number; /** End time in milliseconds */ end: number; /** Duration in milliseconds */ duration: number; } /** * Configuration options for synthesizing audio from text */ interface SynthesizeOptions { /** The text that will be synthesized as audio */ text: string; /** Voice persona used to read the message * @default 'en-US-AvaNeural' */ voice?: string; /** Language code for the voice * @default 'en-US' */ language?: string; /** Audio output format * @default 'audio-24khz-96kbitrate-mono-mp3' */ outputFormat?: string; /** Speaking rate/speed of the voice * @default 'default' */ rate?: string; /** Voice pitch adjustment * @default 'default' */ pitch?: string; /** Voice volume level * @default 'default' */ volume?: string; /** Subtitle generation options (excluding metadata) * @default { splitBy: 'word', wordsPerCue: 10, durationPerCue: 5000 } */ subtitle?: Omit; } /** * Result of the text-to-speech synthesis process */ interface SynthesizeResult { /** Generated audio blob */ audio: Blob; /** Generated subtitle cues */ subtitle: Array; } /** * Asynchronously generates audio and subtitle data based on the provided options. * * @param options - The options for generating audio and subtitle data. * @return A promise that resolves with the generated audio and subtitle data. */ declare function synthesize(options: SynthesizeOptions): Promise; /** * Creates an async generator that yields chunks of synthesized audio data. * Each chunk is automatically processed to remove metadata headers. * * @example * ```typescript * // Basic usage * const generator = synthesizeStream({ text: "Hello world" }); * for await (const chunk of generator) { * // chunk is a Uint8Array of raw audio data * // Process or save the chunk as needed * } * * // Combining all chunks into one array * const chunks: Uint8Array[] = []; * for await (const chunk of synthesizeStream({ text: "Hello world" })) { * chunks.push(chunk); * } * // Now chunks contains all the audio data * ``` * * @param options - Configuration options for speech synthesis * @param options.text - The text to synthesize * @param [options.voice] - Voice persona to use (default: 'en-US-AvaNeural') * @param [options.language] - Language code (default: 'en-US') * @param [options.outputFormat] - Audio format (default: 'audio-24khz-48kbitrate-mono-mp3') * @param [options.rate] - Speaking rate (default: 1) * @param [options.pitch] - Voice pitch (default: 1) * @param [options.volume] - Audio volume (default: 1) * @returns An async generator yielding processed Uint8Array chunks of audio data */ declare function synthesizeStream(options: SynthesizeOptions): AsyncGenerator; export { type AudioMetadata, type ParseSubtitleOptions, type ParseSubtitleResult, type SynthesizeOptions, type SynthesizeResult, type WordBoundary, getVoices, synthesize, synthesizeStream };