/** A unit of speech handed to a {@link SpeechEngine}. */ interface SpeechRequest { /** Plain text to speak. The widget strips Markdown before calling the engine. */ text: string; /** Preferred voice identifier (engine-specific; browser = voice name). */ voice?: string; /** Speech rate (engine-specific range; browser: 0.1–10, default 1). */ rate?: number; /** Speech pitch (engine-specific range; browser: 0–2, default 1). */ pitch?: number; } /** Lifecycle callbacks a {@link SpeechEngine} invokes during playback. */ interface SpeechCallbacks { /** Audio has started playing. */ onStart?: () => void; /** Playback finished naturally (or was canceled). */ onEnd?: () => void; /** Playback failed. */ onError?: (error: Error) => void; } /** * Pluggable text-to-speech engine behind the "Read aloud" message action. * * The widget ships a browser Web Speech API engine by default. Provide a hosted * engine (Runtype TTS, ElevenLabs, a server proxy, …) by implementing this * interface and returning it from {@link TextToSpeechConfig.createEngine}; a * server engine can stream audio into the realtime voice * {@link VoicePlaybackEngine}. */ interface SpeechEngine { /** Stable identifier, e.g. `"browser"`, `"runtype"`, or a custom id. */ readonly id: string; /** * Whether {@link pause}/{@link resume} are supported. When `false`, the UI * offers play/stop only (tapping a playing message stops it). */ readonly supportsPause: boolean; /** Begin speaking. Drives the lifecycle through `callbacks`. */ speak(request: SpeechRequest, callbacks: SpeechCallbacks): void; /** Pause playback (no-op if unsupported). */ pause(): void; /** Resume paused playback (no-op if unsupported). */ resume(): void; /** Stop playback and discard any queued audio. */ stop(): void; /** Optional teardown of engine resources. */ destroy?(): void; } /** * Streaming PCM playback engine used by the realtime voice provider. * * The provider feeds raw PCM16 LE mono @ 24kHz (WAV header already stripped) * via `enqueue`, signals end-of-reply with `markStreamEnd`, and learns when * playback has fully drained via `onFinished`. The default implementation is * `AudioPlaybackManager`; an optional jitter-buffered AudioWorklet engine ships * from `@runtypelabs/persona/voice-worklet-player` and can be injected via the * `runtype.createPlaybackEngine` config hook. */ interface VoicePlaybackEngine { /** Enqueue a raw PCM16 LE mono @ 24kHz chunk (no WAV header). */ enqueue(pcm: Uint8Array): void; /** Signal that no more chunks will arrive for the current reply. */ markStreamEnd(): void; /** Immediately stop playback and discard queued audio. */ flush(): void; /** Register a callback fired once all queued audio has finished playing. */ onFinished(callback: () => void): void; /** Release all audio resources. */ destroy(): Promise | void; } /** * A {@link VoicePlaybackEngine} that also supports pause/resume. Returned by * `createPcmStreamPlayer` (`@runtypelabs/persona/voice-worklet-player`): a * jitter-buffered AudioWorklet player for raw PCM16 / 24 kHz / mono streams. * Reuse it inside a hosted {@link SpeechEngine} to get gapless playback with a * configurable prebuffer and graceful underrun handling — feed each streamed * chunk to {@link VoicePlaybackEngine.enqueue} and the worklet does the rest. */ interface PcmStreamPlayer extends VoicePlaybackEngine { /** Pause playback; the audio clock suspends and {@link resume} continues in place. */ pause(): void; /** Resume playback after {@link pause}. */ resume(): void; /** * Register a callback fired once audible playback actually begins — i.e. the * prebuffer waterline filled and the first sample reached the output. Use this * (rather than "first chunk enqueued") to flip a UI from loading to playing, so * the spinner holds through the prebuffer. Fires once per playback session * (cleared by {@link VoicePlaybackEngine.flush}); a mid-reply underrun re-buffer * does not re-fire it. */ onStarted(callback: () => void): void; } interface PcmStreamPlayerOptions { /** * Audio (ms) to buffer before the first sample, and to re-buffer after an * underrun. Higher = smoother on bursty/jittery streams, at the cost of a * slightly later first sound (latency ↔ smoothness). Default 150 — good for * realtime, server-paced audio; HTTP-pulled TTS often wants ~400–600. */ prebufferMs?: number; } /** * Create a jitter-buffered AudioWorklet PCM player with pause/resume. * * Feed it raw PCM16 / 24 kHz / mono via `enqueue()`; it handles prebuffering, * gapless playback, and graceful underrun. Reuse it inside a hosted * {@link SpeechEngine} so streamed TTS plays smoothly: * * @example * import { createPcmStreamPlayer } from '@runtypelabs/persona/voice-worklet-player' * const player = await createPcmStreamPlayer({ prebufferMs: 500 }) * // for each streamed chunk: player.enqueue(pcmChunk) * player.markStreamEnd() */ declare function createPcmStreamPlayer(options?: PcmStreamPlayerOptions): Promise; /** * Realtime-named alias of {@link createPcmStreamPlayer} (default prebuffer), * typed as a plain {@link VoicePlaybackEngine}. Pass it to the realtime voice * provider's `createPlaybackEngine`: * * @example * import { createWorkletPlaybackEngine } from '@runtypelabs/persona/voice-worklet-player' * * initAgentWidget({ config: { voiceRecognition: { enabled: true, provider: { * type: 'runtype', * runtype: { agentId, createPlaybackEngine: createWorkletPlaybackEngine }, * } } } }) */ declare function createWorkletPlaybackEngine(): Promise; interface RuntypeSpeechEngineOptions { /** * Runtype API host, e.g. `https://api.runtype.com` (typically the widget's * `apiUrl`). A trailing slash is tolerated. */ host: string; /** Agent whose configured voice synthesizes the text. */ agentId: string; /** Browser-safe client token — the same one the chat widget uses. */ clientToken: string; /** * Default voice id, used when a `SpeechRequest` doesn't carry its own. When * omitted the agent's configured voice is used. */ voice?: string; /** * Audio (ms) the player buffers before the first sample and after an * underrun. Runtype streams steadily, so the default (200) keeps first sound * close to time-to-first-byte while still riding out small hiccups. Applies to * the default {@link AudioPlaybackManager}; a custom `createPlaybackEngine` is * responsible for its own prebuffer. */ prebufferMs?: number; /** * Factory for the streaming PCM player. Defaults to the in-bundle, main-thread * {@link AudioPlaybackManager} (with `prebufferMs`). Pass `createPcmStreamPlayer` * from `@runtypelabs/persona/voice-worklet-player` for the jitter-buffered * AudioWorklet player (it then ships in your bundle, not Persona's). May be * async — it is resolved on first playback, inside the user gesture. */ createPlaybackEngine?: () => PcmStreamPlayer | Promise; /** * Optional hook for surfacing fetch/stream failures (a missing endpoint, an * expired token, an upstream 4xx) to a log or telemetry. The widget itself * only returns the read-aloud button to idle (or, with a fallback engine, * silently switches to the browser voice), so without this the reason is * invisible. */ onError?: (error: Error) => void; } /** Streaming `SpeechEngine` backed by Runtype's `/v1/agents/:id/speak`. */ declare class RuntypeSpeechEngine implements SpeechEngine { private readonly opts; readonly id = "runtype-tts"; readonly supportsPause = true; private player; private playerPromise; private generation; constructor(opts: RuntypeSpeechEngineOptions); private ensurePlayer; speak(request: SpeechRequest, callbacks: SpeechCallbacks): void; private run; pause(): void; resume(): void; stop(): void; destroy(): void; } interface FallbackSpeechEngineOptions { /** * Called once when the primary engine fails before audio starts and the * fallback takes over — so a silent downgrade is still observable in dev/ * telemetry even though the user keeps hearing speech. */ onFallback?: (error: Error) => void; } /** A `SpeechEngine` that falls back from `primary` to `fallback` per utterance. */ declare class FallbackSpeechEngine implements SpeechEngine { private readonly primary; private readonly fallback; private readonly options; readonly id = "fallback"; private active; constructor(primary: SpeechEngine, fallback: SpeechEngine, options?: FallbackSpeechEngineOptions); get supportsPause(): boolean; speak(request: SpeechRequest, callbacks: SpeechCallbacks): void; pause(): void; resume(): void; stop(): void; destroy(): void; } export { FallbackSpeechEngine, type FallbackSpeechEngineOptions, type PcmStreamPlayer, type PcmStreamPlayerOptions, RuntypeSpeechEngine, type RuntypeSpeechEngineOptions, type VoicePlaybackEngine, createPcmStreamPlayer, createWorkletPlaybackEngine };