import { Settings } from "./constants"; import MoonshineModel from "./model"; import { AudioNodeVAD } from "@ricky0123/vad-web"; import Log from "./log"; /** * Callbacks are invoked at different phases of the lifecycle as audio is transcribed. You can control the behavior of the application * in response to model loading, starting of transcription, stopping of transcription, and updates to the transcription of the audio stream. * * @property onPermissionsRequested() - called when permissions to a user resource (e.g., microphone) have been requested (but not necessarily granted yet) * * @property onError(error: MoonshineError) - called when an error occurs. * * @property onModelLoadStarted() - called when the {@link MoonshineModel} and VAD begins to load (or download, if hosted elsewhere) * * @property onModelLoaded() - called when the {@link MoonshineModel} and VAD are loaded. This means the Transcriber is now ready to use. * * @property onTranscribeStarted() - called once when transcription starts * * @property onTranscribeStopped() - called once when transcription stops * * @property onTranscriptionUpdated(text: string) - called every {@link Settings.STREAM_UPDATE_INTERVAL} milliseconds while * transcription is active if useVAD == false. Use this callback when you don't need long-running transcription - you only care about * the most-recent transcription output. Note that the transcription output may be empty in some cases. * * @property onTranscriptionCommitted(text: string) - called every {@link Settings.STREAM_COMMIT_INTERVAL} milliseconds while * transcription is active and useVAD == false, or every {@link Settings.VAD_COMMIT_INTERVAL} when useVAD == true. Use this callback * for a long-running transcription of audio, like captioning a video or microphone stream. * * @property onSpeechStart() - called when the VAD model detects the start of speech * * @property onSpeechEnd() - called when the VAD model detects the end of speech * * @interface */ interface TranscriberCallbacks { onPermissionsRequested: () => any; onError: (error) => any; onModelLoadStarted: () => any; onModelLoaded: () => any; onTranscribeStarted: () => any; onTranscribeStopped: () => any; onTranscriptionUpdated: (text: string) => any; onTranscriptionCommitted: (text: string) => any; onSpeechStart: () => any; onSpeechEnd: () => any; } const defaultTranscriberCallbacks: TranscriberCallbacks = { onPermissionsRequested: function () { Log.log("Transcriber.onPermissionsRequested()"); }, onError: function (error) { Log.error("Transcriber.onError(" + error + ")"); }, onModelLoadStarted: function () { Log.log("Transcriber.onModelLoadStarted()"); }, onModelLoaded: function () { Log.log("Transcriber.onModelLoaded()"); }, onTranscribeStarted: function () { Log.log("Transcriber.onTranscribeStarted()"); }, onTranscribeStopped: function () { Log.log("Transcriber.onTranscribeStopped()"); }, onTranscriptionUpdated: function (text: string | undefined) { Log.log("Transcriber.onTranscriptionUpdated(" + text + ")"); }, onTranscriptionCommitted: function (text: string | undefined) { Log.log("Transcriber.onTranscriptionCommitted(" + text + ")"); }, onSpeechStart: function () { Log.log("Transcriber.onSpeechStart()"); }, onSpeechEnd: function () { Log.log("Transcriber.onSpeechEnd()"); }, }; /** * Implements real-time transcription of an audio stream sourced from a WebAudio-compliant MediaStream object. * * Read more about working with MediaStreams: {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaStream} */ class Transcriber { private vadModel: AudioNodeVAD; static model: MoonshineModel; callbacks: TranscriberCallbacks; private frameBuffer: Float32Array; private useVAD: boolean; private mediaStream: MediaStream; protected audioContext: AudioContext; public isActive: boolean = false; /** * Creates a transcriber for transcribing a MediaStream from any source. After creating the {@link Transcriber}, you must invoke * {@link Transcriber.attachStream} to provide a MediaStream that you want to transcribe. * * @param modelURL The URL that the underlying {@link MoonshineModel} weights should be loaded from, * relative to {@link Settings.BASE_ASSET_PATH.MOONSHINE}. * * @param callbacks A set of {@link TranscriberCallbacks} used to trigger behavior at different steps of the * transcription lifecycle. For transcription-only use cases, you should define the {@link TranscriberCallbacks} yourself; * when using the transcriber for voice control, you should create a {@link VoiceController} and pass it in. * * @param useVAD A boolean specifying whether or not to use Voice Activity Detection (VAD) on audio processed by the transcriber. * When set to `true`, the transcriber will only process speech at the end of each chunk of voice activity. * * @example * This basic example demonstrates the use of the transcriber with custom callbacks: * * ``` ts * import Transcriber from "@usefulsensors/moonshine-js"; * * var transcriber = new Transcriber( * "model/tiny", * { * onModelLoadStarted() { * console.log("onModelLoadStarted()"); * }, * onTranscribeStarted() { * console.log("onTranscribeStarted()"); * }, * onTranscribeStopped() { * console.log("onTranscribeStopped()"); * }, * onTranscriptionUpdated(text: string | undefined) { * console.log( * "onTranscriptionUpdated(" + text + ")" * ); * }, * onTranscriptionCommitted(text: string | undefined) { * console.log( * "onTranscriptionCommitted(" + text + ")" * ); * }, * } * ); * * // Get a MediaStream from somewhere (user mic, active tab, an