interface VoiceTag {
    ContentCategories: Array<string>;
    VoicePersonalities: Array<string>;
}
interface Voice {
    Name: string;
    ShortName: string;
    Gender: "Female" | "Male";
    Locale: string;
    SuggestedCodec: string;
    FriendlyName: string;
    Status: "GA";
    VoiceTag: VoiceTag;
}
declare function getVoices(): Promise<Array<Voice>>;

/**
 * Metadata structure for audio processing
 */
interface AudioMetadata {
    /** Array of word boundary information */
    Metadata: [WordBoundary];
}
/**
 * Word boundary information from the TTS service
 */
interface WordBoundary {
    /** Type of boundary marker */
    Type: "WordBoundary";
    /** Detailed boundary data */
    Data: {
        /** Time offset in milliseconds */
        Offset: number;
        /** Duration in milliseconds */
        Duration: number;
        /** Text information */
        text: {
            /** The word text */
            Text: string;
            /** Length of the text */
            Length: number;
            /** Type of boundary */
            BoundaryType: "WordBoundary";
        };
    };
}

/**
 * Options for parsing and generating subtitles
 */
interface ParseSubtitleOptions {
    /**
     * Method to split the text into subtitle cues
     * - 'word': splits by word count
     * - 'duration': splits by time duration
     * @default 'word'
     */
    splitBy: "word" | "duration";
    /**
     * Number of words per subtitle cue when using 'word' splitBy
     * @default 10
     */
    wordsPerCue?: number;
    /**
     * Duration in milliseconds per subtitle cue when using 'duration' splitBy
     * @default 5000
     */
    durationPerCue?: number;
    /** Audio metadata used for timing information */
    metadata: Array<AudioMetadata>;
}
/**
 * Represents a single subtitle entry with timing information
 */
interface ParseSubtitleResult {
    /** The text content of the subtitle */
    text: string;
    /** Start time in milliseconds */
    start: number;
    /** End time in milliseconds */
    end: number;
    /** Duration in milliseconds */
    duration: number;
}

/**
 * Configuration options for synthesizing audio from text
 */
interface SynthesizeOptions {
    /** The text that will be synthesized as audio */
    text: string;
    /** Voice persona used to read the message
     * @default 'en-US-AvaNeural'
     */
    voice?: string;
    /** Language code for the voice
     * @default 'en-US'
     */
    language?: string;
    /** Audio output format
     * @default 'audio-24khz-96kbitrate-mono-mp3'
     */
    outputFormat?: string;
    /** Speaking rate/speed of the voice
     * @default 'default'
     */
    rate?: string;
    /** Voice pitch adjustment
     * @default 'default'
     */
    pitch?: string;
    /** Voice volume level
     * @default 'default'
     */
    volume?: string;
    /** Subtitle generation options (excluding metadata)
     * @default { splitBy: 'word', wordsPerCue: 10, durationPerCue: 5000 }
     */
    subtitle?: Omit<ParseSubtitleOptions, "metadata">;
}
/**
 * Result of the text-to-speech synthesis process
 */
interface SynthesizeResult {
    /** Generated audio blob */
    audio: Blob;
    /** Generated subtitle cues */
    subtitle: Array<ParseSubtitleResult>;
}

/**
 * Asynchronously generates audio and subtitle data based on the provided options.
 *
 * @param options - The options for generating audio and subtitle data.
 * @return  A promise that resolves with the generated audio and subtitle data.
 */
declare function synthesize(options: SynthesizeOptions): Promise<SynthesizeResult>;

/**
 * Creates an async generator that yields chunks of synthesized audio data.
 * Each chunk is automatically processed to remove metadata headers.
 *
 * @example
 * ```typescript
 * // Basic usage
 * const generator = synthesizeStream({ text: "Hello world" });
 * for await (const chunk of generator) {
 *   // chunk is a Uint8Array of raw audio data
 *   // Process or save the chunk as needed
 * }
 *
 * // Combining all chunks into one array
 * const chunks: Uint8Array[] = [];
 * for await (const chunk of synthesizeStream({ text: "Hello world" })) {
 *   chunks.push(chunk);
 * }
 * // Now chunks contains all the audio data
 * ```
 *
 * @param options - Configuration options for speech synthesis
 * @param options.text - The text to synthesize
 * @param [options.voice] - Voice persona to use (default: 'en-US-AvaNeural')
 * @param [options.language] - Language code (default: 'en-US')
 * @param [options.outputFormat] - Audio format (default: 'audio-24khz-48kbitrate-mono-mp3')
 * @param [options.rate] - Speaking rate (default: 1)
 * @param [options.pitch] - Voice pitch (default: 1)
 * @param [options.volume] - Audio volume (default: 1)
 * @returns An async generator yielding processed Uint8Array chunks of audio data
 */
declare function synthesizeStream(options: SynthesizeOptions): AsyncGenerator<Uint8Array>;

export { type AudioMetadata, type ParseSubtitleOptions, type ParseSubtitleResult, type SynthesizeOptions, type SynthesizeResult, type WordBoundary, getVoices, synthesize, synthesizeStream };