/** * AudioAgent - Speech synthesis and transcription agent * * Wraps AI SDK's generateSpeech and transcribe functions for * text-to-speech and speech-to-text capabilities. * * Requires AI SDK: npm install ai @ai-sdk/openai * * @example Text-to-Speech * ```typescript * import { AudioAgent } from 'praisonai'; * * const agent = new AudioAgent({ * provider: 'openai', * voice: 'alloy' * }); * * const audio = await agent.speak('Hello, world!'); * // Returns audio buffer * ``` * * @example Speech-to-Text * ```typescript * const agent = new AudioAgent({ provider: 'openai' }); * * const text = await agent.transcribe('./audio.mp3'); * console.log(text); // "Hello, world!" * ``` */ /** * Supported audio providers */ export type AudioProvider = 'openai' | 'elevenlabs' | 'google' | 'deepgram' | 'groq'; /** * Voice options by provider */ export type OpenAIVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer'; export type ElevenLabsVoice = string; /** * Audio format options */ export type AudioFormat = 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'; /** * Configuration for AudioAgent */ export interface AudioAgentConfig { /** Name of the agent */ name?: string; /** Audio provider (default: 'openai') */ provider?: AudioProvider; /** Voice to use for TTS */ voice?: string; /** TTS model to use (provider-specific) */ model?: string; /** Audio output format */ format?: AudioFormat; /** Speed multiplier for TTS (0.25 to 4.0) */ speed?: number; /** Language for transcription */ language?: string; /** Enable verbose logging */ verbose?: boolean; } /** * Options for speak method */ export interface SpeakOptions { /** Override voice for this call */ voice?: string; /** Override model for this call */ model?: string; /** Override format for this call */ format?: AudioFormat; /** Override speed for this call */ speed?: number; } /** * Options for transcribe method */ export interface TranscribeOptions { /** Language hint for transcription */ language?: string; /** Include word-level timestamps */ timestamps?: boolean; /** Return detailed segments */ segments?: boolean; } /** * Result from speak method */ export interface SpeakResult { /** Audio data as Buffer or ArrayBuffer */ audio: Buffer | ArrayBuffer; /** Duration in seconds (if available) */ duration?: number; /** Audio format */ format: string; } /** * Result from transcribe method */ export interface TranscribeResult { /** Transcribed text */ text: string; /** Detected language */ language?: string; /** Duration in seconds */ duration?: number; /** Word-level timestamps (if requested) */ words?: Array<{ word: string; start: number; end: number; }>; /** Segments (if requested) */ segments?: Array<{ text: string; start: number; end: number; }>; } /** * AudioAgent - Speech synthesis and transcription */ export declare class AudioAgent { readonly id: string; readonly name: string; private config; constructor(config?: AudioAgentConfig); /** * Get default TTS model for provider */ private getDefaultModel; /** * Generate speech from text (Text-to-Speech) * * @param text - Text to convert to speech * @param options - Override options for this call * @returns Audio data with metadata * * @example * ```typescript * const result = await agent.speak('Hello, world!'); * fs.writeFileSync('output.mp3', result.audio); * ``` */ speak(text: string, options?: SpeakOptions): Promise; /** * Get provider-specific speech model */ private getSpeechModel; /** * Transcribe audio to text (Speech-to-Text) * * @param audioInput - Audio file path, URL, or Buffer * @param options - Transcription options * @returns Transcribed text with metadata * * @example From file * ```typescript * const result = await agent.transcribe('./audio.mp3'); * console.log(result.text); * ``` * * @example From Buffer * ```typescript * const audioBuffer = fs.readFileSync('./audio.mp3'); * const result = await agent.transcribe(audioBuffer); * ``` */ transcribe(audioInput: string | Buffer | ArrayBuffer, options?: TranscribeOptions): Promise; /** * Prepare audio input for transcription */ private prepareAudioInput; /** * Get provider-specific transcription model */ private getTranscriptionModel; /** * Chat method for agent-like interface * Determines whether to speak or transcribe based on input */ chat(input: string): Promise; } /** * Factory function to create AudioAgent */ export declare function createAudioAgent(config?: AudioAgentConfig): AudioAgent; export default AudioAgent;