import { ChunkedArrayBufferStream } from 'microsoft-cognitiveservices-speech-sdk/distrib/lib/src/common/Exports'; import { PcmRecorder } from 'microsoft-cognitiveservices-speech-sdk/distrib/lib/src/common.browser/Exports'; import { AudioStreamNode, DeviceInfo, Format } from './CustomAudioInputStream'; import bytesPerSample from './bytesPerSample'; import createAudioConfig from './createAudioConfig'; import createAudioContext from './createAudioContext'; import getUserMedia from './getUserMedia'; // This is how often we are flushing audio buffer to the network. Modify this value will affect latency. const DEFAULT_BUFFER_DURATION_IN_MS = 100; // TODO: [P2] #3975 We should consider building our own PcmRecorder: // - Use Audio Worklet via blob URL // - Not hardcoding the sample rate or other values // PcmRecorder always downscale to 16000 Hz. We cannot use the dynamic value from MediaConstraints or MediaTrackSettings. const PCM_RECORDER_HARDCODED_SETTINGS: MediaTrackSettings = Object.freeze({ channelCount: 1, sampleRate: 16000, sampleSize: 16 }); const PCM_RECORDER_HARDCODED_FORMAT: Format = Object.freeze({ bitsPerSample: PCM_RECORDER_HARDCODED_SETTINGS.sampleSize, // `channelCount` is not on @types/web@0.0.54 yet, related to https://github.com/microsoft/TypeScript-DOM-lib-generator/issues/1290. // @ts-ignore channels: PCM_RECORDER_HARDCODED_SETTINGS.channelCount, samplesPerSec: PCM_RECORDER_HARDCODED_SETTINGS.sampleRate }); type MicrophoneAudioInputStreamOptions = { /** Specifies the constraints for selecting an audio device. */ audioConstraints?: true | MediaTrackConstraints; /** Specifies the `AudioContext` to use. This object must be primed and ready to use. */ audioContext: AudioContext; /** Specifies the buffering delay on how often to flush audio data to network. Increasing the value will increase audio latency. Default is 100 ms. */ bufferDurationInMS?: number; /** Specifies whether to display diagnostic information. */ debug?: true; /** Specifies if telemetry data should be sent. If not specified, telemetry data will NOT be sent. */ enableTelemetry?: true; /** Specifies the `AudioWorklet` URL for `PcmRecorder`. If not specified, will use script processor on UI thread instead. */ pcmRecorderWorkletUrl?: string; }; function createMicrophoneAudioConfig(options: MicrophoneAudioInputStreamOptions) { const { audioConstraints, audioContext, debug, enableTelemetry, pcmRecorderWorkletUrl } = options; const bufferDurationInMS = options.bufferDurationInMS || DEFAULT_BUFFER_DURATION_IN_MS; // Related to #4523. // When bumping to recent version of `microsoft-cognitiveservices-speech-sdk@>=1.23.0`, pass `true` to the constructor. // const pcmRecorder = new PcmRecorder(true); const pcmRecorder = new PcmRecorder(); pcmRecorderWorkletUrl && pcmRecorder.setWorkletUrl(pcmRecorderWorkletUrl); return createAudioConfig({ async attach(audioNodeId: string): Promise<{ audioStreamNode: AudioStreamNode; deviceInfo: DeviceInfo; format: Format; }> { // We need to get new MediaStream on every attach(). // This is because PcmRecorder.releaseMediaResources() disconnected/stopped them. const mediaStream = await getUserMedia({ audio: audioConstraints, video: false }); const [firstAudioTrack] = mediaStream.getAudioTracks(); if (!firstAudioTrack) { throw new Error('No audio device is found.'); } const outputStream = new ChunkedArrayBufferStream( // Speech SDK quirks: PcmRecorder hardcoded sample rate of 16000 Hz. bytesPerSample(PCM_RECORDER_HARDCODED_SETTINGS) * // eslint-disable-next-line no-magic-numbers ((bufferDurationInMS || DEFAULT_BUFFER_DURATION_IN_MS) / 1000), audioNodeId ); pcmRecorder.record(audioContext, mediaStream, outputStream); return { audioStreamNode: { // Speech SDK quirks: In SDK's original MicAudioSource implementation, it call turnOff() during detach(). // That means, it call turnOff(), then detach(), then turnOff() again. Seems redundant. // When using with Direct Line Speech, turnOff() is never called. detach: (): Promise => { // Speech SDK quirks: In SDK, it call outputStream.close() in turnOff() before outputStream.readEnded() in detach(). // I think it make sense to call readEnded() before close(). outputStream.readEnded(); outputStream.close(); // PcmRecorder.releaseMediaResources() will disconnect/stop the MediaStream. // We cannot use MediaStream again after turned off. pcmRecorder.releaseMediaResources(audioContext); // MediaStream will become inactive after all tracks are removed. mediaStream.getTracks().forEach(track => mediaStream.removeTrack(track)); // ESLint: "return" is required by TypeScript // eslint-disable-next-line no-useless-return return; }, id: () => audioNodeId, read: () => outputStream.read() }, deviceInfo: { manufacturer: 'Bot Framework Web Chat', model: enableTelemetry ? firstAudioTrack.label : '', type: enableTelemetry ? 'Microphones' : 'Unknown' }, // Speech SDK quirks: PcmRecorder hardcoded sample rate of 16000 Hz. // We cannot obtain this number other than looking at their source code. // I.e. no getter property. // PcmRecorder always downscale to 16000 Hz. We cannot use the dynamic value from MediaConstraints or MediaTrackSettings. format: PCM_RECORDER_HARDCODED_FORMAT }; }, debug }); } export default function createMicrophoneAudioConfigAndAudioContext({ audioContext, audioInputDeviceId, enableTelemetry }: { audioContext?: AudioContext; audioInputDeviceId?: string; enableTelemetry?: true; }) { // Web Chat has an implementation of AudioConfig for microphone that would enable better support on Safari: // - Maintain same instance of `AudioContext` across recognitions; // - Resume suspended `AudioContext` on user gestures. // // This is filed as https://github.com/microsoft/cognitive-services-speech-sdk-js/issues/571. // Before Speech SDK team take our suggestion, we need to continue using a custom AudioConfig object to persist the blessing. audioContext || (audioContext = createAudioContext()); return { audioConfig: createMicrophoneAudioConfig({ audioConstraints: audioInputDeviceId ? { deviceId: audioInputDeviceId } : true, audioContext, enableTelemetry: enableTelemetry ? true : undefined }), audioContext }; }