import { tool } from '@strands-agents/sdk' import { z } from 'zod' /** speak — browser-native TTS via SpeechSynthesis */ export const speakTool = tool({ name: 'speak', description: 'Speak text aloud using browser TTS (SpeechSynthesis API).', inputSchema: z.object({ text: z.string(), rate: z.number().optional().describe('Speech rate (0.1-10, default 1)'), pitch: z.number().optional().describe('Speech pitch (0-2, default 1)'), voice: z.string().optional().describe('Voice name or lang (e.g. en-US)'), }), callback: (input) => { try { if (!('speechSynthesis' in window)) return JSON.stringify({ status: 'error', error: 'SpeechSynthesis not supported' }) speechSynthesis.cancel() const u = new SpeechSynthesisUtterance(input.text) if (input.rate) u.rate = input.rate if (input.pitch) u.pitch = input.pitch if (input.voice) { const voices = speechSynthesis.getVoices() const match = voices.find(v => v.name === input.voice || v.lang === input.voice) if (match) u.voice = match } speechSynthesis.speak(u) return JSON.stringify({ status: 'speaking', textLength: input.text.length }) } catch (err: unknown) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) export const stopSpeakingTool = tool({ name: 'stop_speaking', description: 'Stop all TTS playback', inputSchema: z.object({}), callback: () => { try { speechSynthesis.cancel(); return JSON.stringify({ status: 'stopped' }) } catch (err: unknown) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) export const listVoicesTool = tool({ name: 'list_voices', description: 'List available TTS voices', inputSchema: z.object({}), callback: () => { try { const voices = speechSynthesis.getVoices().map(v => ({ name: v.name, lang: v.lang, default: v.default, localService: v.localService, })) return JSON.stringify({ status: 'success', count: voices.length, voices }) } catch (err: unknown) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) /** listen — start browser SpeechRecognition (Chrome/Edge) */ export const listenTool = tool({ name: 'listen', description: 'Listen to user speech via SpeechRecognition. Returns transcript. Chrome/Edge only.', inputSchema: z.object({ lang: z.string().optional(), timeout: z.number().optional().describe('Max listen time in ms (default 10000)'), }), callback: async (input) => { try { const SR = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition if (!SR) return JSON.stringify({ status: 'error', error: 'SpeechRecognition not supported (Chrome/Edge only)' }) return await new Promise((resolve) => { const rec = new SR() rec.lang = input.lang || 'en-US' rec.continuous = false rec.interimResults = false let done = false const finish = (payload: any) => { if (!done) { done = true; resolve(JSON.stringify(payload)) } } rec.onresult = (e: any) => { const transcript = e.results[0][0].transcript const confidence = e.results[0][0].confidence finish({ status: 'success', transcript, confidence }) } rec.onerror = (e: any) => finish({ status: 'error', error: e.error }) rec.onend = () => finish({ status: 'ended', note: 'No speech detected' }) rec.start() setTimeout(() => { try { rec.stop() } catch {} }, input.timeout || 10000) }) } catch (err: unknown) { return JSON.stringify({ status: 'error', error: (err as Error).message }) } }, }) export const AUDIO_TOOLS = [speakTool, stopSpeakingTool, listVoicesTool, listenTool]