{"version":3,"sources":["src/common.speech/SynthesisTurn.ts"],"names":[],"mappings":"AAGA,OAAO,EAIH,iBAAiB,EACpB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAE1E,OAAO,EACH,kBAAkB,EAGlB,qBAAqB,EACxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,kBAAkB,EAAgB,MAAM,6CAA6C,CAAC;AAC/F,OAAO,EAEH,oBAAoB,EAGvB,MAAM,sBAAsB,CAAC;AAE9B,MAAM,WAAW,yBAAyB;IACtC,UAAU,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,uBAAuB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IAC/B,OAAO,EAAE,yBAAyB,CAAC;IACnC,KAAK,EAAE,uBAAuB,CAAC;IAC/B,MAAM,EAAE;QACJ,gBAAgB,EAAE,MAAM,CAAC;KAC5B,CAAC;CACL;AAED,qBAAa,aAAa;IAEtB,IAAW,SAAS,IAAI,MAAM,CAE7B;IAED,IAAW,QAAQ,IAAI,MAAM,CAE5B;IAED,IAAW,QAAQ,CAAC,KAAK,EAAE,MAAM,EAEhC;IAED,IAAW,iBAAiB,IAAI,qBAAqB,CAEpD;IAED,IAAW,iBAAiB,CAAC,MAAM,EAAE,qBAAqB,EAEzD;IAED,IAAW,qBAAqB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEhD;IAED,IAAW,gBAAgB,IAAI,OAAO,CAErC;IAED,IAAW,cAAc,IAAI,OAAO,CAEnC;IAED,IAAW,iBAAiB,IAAI,MAAM,CAErC;IAED,IAAW,qBAAqB,IAAI,MAAM,CAEzC;IAGD,IAAW,aAAa,IAAI,MAAM,CAEjC;IAED,IAAW,aAAa,IAAI,MAAM,CAEjC;IAED,IAAW,eAAe,IAAI,kBAAkB,CAQ/C;IAED,OAAO,CAAC,cAAc,CAAkB;IACxC,OAAO,CAAC,oBAAoB,CAAS;IACrC,OAAO,CAAC,kBAAkB,CAAkB;IAC5C,OAAO,CAAC,oBAAoB,CAAkB;IAC9C,OAAO,CAAC,iBAAiB,CAAa;IACtC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,gBAAgB,CAAiB;IACzC,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,qBAAqB,CAA4B;IACzD,OAAO,CAAC,iBAAiB,CAAc;IACvC,OAAO,CAAC,2BAA2B,CAAc;IACjD,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,uBAAuB,CAAa;IAC5C,OAAO,CAAC,kBAAkB,CAAa;IACvC,OAAO,CAAC,2BAA2B,CAAa;IAChD,OAAO,CAAC,0BAA0B,CAAS;IAC3C,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,UAAU,CAAU;IAC5B,OAAO,CAAC,wBAAwB,CAAoB;IACpD,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,aAAa,CAAS;IAG9B,OAAO,CAAC,sBAAsB,CAAa;IAC3C,OAAO,CAAC,qBAAqB,CAAc;IAC3C,OAAO,CAAC,kBAAkB,CAAc;IACxC,OAAO,CAAC,oBAAoB,CAAc;IAC1C,OAAO,CAAC,kBAAkB,CAAc;;IAU3B,mBAAmB,IAAI,OAAO,CAAC,WAAW,CAAC;IAW3C,6BAA6B,IAAI,OAAO,CAAC,WAAW,CAAC;IAgB3D,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,CAAC,EAAE,iBAAiB,GAAG,IAAI;IA8BlH,oBAAoB,CAAC,gBAAgB,EAAE,MAAM,GAAG,IAAI;IAKpD,eAAe,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAMvC,8BAA8B,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAWxD,wBAAwB,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAKpD,wBAAwB,IAAI,IAAI;IAMhC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAqBtD,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAgB7C,mBAAmB,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAIvD,wBAAwB,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAM5D,YAAY,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAI1C,wBAAwB,IAAI,OAAO,CAAC,qBAAqB,CAAC;IAsBhE,OAAO,IAAI,IAAI;IAOf,kBAAkB,IAAI,IAAI;IAIjC;;;OAGG;IACI,0BAA0B,IAAI,MAAM;IAM3C,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,oBAAoB,GAAG,IAAI;IAIpD;;;;OAIG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ;IAIvB,OAAO,CAAC,gBAAgB;IAwBxB,OAAO,CAAC,UAAU;YAaJ,sBAAsB;IAWpC;;;;OAIG;IACH,OAAO,CAAC,YAAY;CAGvB","file":"SynthesisTurn.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport {\r\n    createNoDashGuid,\r\n    Deferred,\r\n    Events,\r\n    IAudioDestination\r\n} from \"../common/Exports.js\";\r\nimport { AudioOutputFormatImpl } from \"../sdk/Audio/AudioOutputFormat.js\";\r\nimport { PullAudioOutputStreamImpl } from \"../sdk/Audio/AudioOutputStream.js\";\r\nimport {\r\n    PropertyCollection,\r\n    PropertyId,\r\n    ResultReason,\r\n    SpeechSynthesisResult,\r\n} from \"../sdk/Exports.js\";\r\nimport { ISynthesisMetadata, MetadataType } from \"./ServiceMessages/SynthesisAudioMetadata.js\";\r\nimport {\r\n    ConnectingToSynthesisServiceEvent,\r\n    SpeechSynthesisEvent,\r\n    SynthesisStartedEvent,\r\n    SynthesisTriggeredEvent,\r\n} from \"./SynthesisEvents.js\";\r\n\r\nexport interface ISynthesisResponseContext {\r\n    serviceTag: string;\r\n}\r\n\r\nexport interface ISynthesisResponseAudio {\r\n    type: string;\r\n    streamId: string;\r\n}\r\n\r\nexport interface ISynthesisResponse {\r\n    context: ISynthesisResponseContext;\r\n    audio: ISynthesisResponseAudio;\r\n    webrtc: {\r\n        connectionString: string;\r\n    };\r\n}\r\n\r\nexport class SynthesisTurn {\r\n\r\n    public get requestId(): string {\r\n        return this.privRequestId;\r\n    }\r\n\r\n    public get streamId(): string {\r\n        return this.privStreamId;\r\n    }\r\n\r\n    public set streamId(value: string) {\r\n        this.privStreamId = value;\r\n    }\r\n\r\n    public get audioOutputFormat(): AudioOutputFormatImpl {\r\n        return this.privAudioOutputFormat;\r\n    }\r\n\r\n    public set audioOutputFormat(format: AudioOutputFormatImpl) {\r\n        this.privAudioOutputFormat = format;\r\n    }\r\n\r\n    public get turnCompletionPromise(): Promise<void> {\r\n        return this.privTurnDeferral.promise;\r\n    }\r\n\r\n    public get isSynthesisEnded(): boolean {\r\n        return this.privIsSynthesisEnded;\r\n    }\r\n\r\n    public get isSynthesizing(): boolean {\r\n        return this.privIsSynthesizing;\r\n    }\r\n\r\n    public get currentTextOffset(): number {\r\n        return this.privTextOffset;\r\n    }\r\n\r\n    public get currentSentenceOffset(): number {\r\n        return this.privSentenceOffset;\r\n    }\r\n\r\n    // The number of bytes received for current turn\r\n    public get bytesReceived(): number {\r\n        return this.privBytesReceived;\r\n    }\r\n\r\n    public get audioDuration(): number {\r\n        return this.privAudioDuration;\r\n    }\r\n\r\n    public get extraProperties(): PropertyCollection {\r\n        if (!!this.privWebRTCSDP) {\r\n            const properties = new PropertyCollection();\r\n            properties.setProperty(PropertyId.TalkingAvatarService_WebRTC_SDP, this.privWebRTCSDP);\r\n            return properties;\r\n        }\r\n\r\n        return undefined;\r\n    }\r\n\r\n    private privIsDisposed: boolean = false;\r\n    private privAuthFetchEventId: string;\r\n    private privIsSynthesizing: boolean = false;\r\n    private privIsSynthesisEnded: boolean = false;\r\n    private privBytesReceived: number = 0;\r\n    private privRequestId: string;\r\n    private privStreamId: string;\r\n    private privTurnDeferral: Deferred<void>;\r\n    private privInTurn: boolean = false;\r\n    private privAudioOutputFormat: AudioOutputFormatImpl;\r\n    private privAudioOutputStream: PullAudioOutputStreamImpl;\r\n    private privReceivedAudio: ArrayBuffer;\r\n    private privReceivedAudioWithHeader: ArrayBuffer;\r\n    private privTextOffset: number = 0;\r\n    private privNextSearchTextIndex: number = 0;\r\n    private privSentenceOffset: number = 0;\r\n    private privNextSearchSentenceIndex: number = 0;\r\n    private privPartialVisemeAnimation: string;\r\n    private privRawText: string;\r\n    private privIsSSML: boolean;\r\n    private privTurnAudioDestination: IAudioDestination;\r\n    private privAudioDuration: number;\r\n    private privWebRTCSDP: string;\r\n\r\n    // Latency tracking\r\n    private privSynthesisStartTime: number = 0;\r\n    private privConnectionLatency: number = -1;\r\n    private privNetworkLatency: number = -1;\r\n    private privFirstByteLatency: number = -1;\r\n    private privServiceLatency: number = -1;\r\n\r\n    public constructor() {\r\n        this.privRequestId = createNoDashGuid();\r\n        this.privTurnDeferral = new Deferred<void>();\r\n\r\n        // We're not in a turn, so resolve.\r\n        this.privTurnDeferral.resolve();\r\n    }\r\n\r\n    public async getAllReceivedAudio(): Promise<ArrayBuffer> {\r\n        if (!!this.privReceivedAudio) {\r\n            return Promise.resolve(this.privReceivedAudio);\r\n        }\r\n        if (!this.privIsSynthesisEnded) {\r\n            return null;\r\n        }\r\n        await this.readAllAudioFromStream();\r\n        return Promise.resolve(this.privReceivedAudio);\r\n    }\r\n\r\n    public async getAllReceivedAudioWithHeader(): Promise<ArrayBuffer> {\r\n        if (!!this.privReceivedAudioWithHeader) {\r\n            return this.privReceivedAudioWithHeader;\r\n        }\r\n        if (!this.privIsSynthesisEnded) {\r\n            return null;\r\n        }\r\n        if (this.audioOutputFormat.hasHeader) {\r\n            const audio: ArrayBuffer = await this.getAllReceivedAudio();\r\n            this.privReceivedAudioWithHeader = this.audioOutputFormat.addHeader(audio);\r\n            return this.privReceivedAudioWithHeader;\r\n        } else {\r\n            return this.getAllReceivedAudio();\r\n        }\r\n    }\r\n\r\n    public startNewSynthesis(requestId: string, rawText: string, isSSML: boolean, audioDestination?: IAudioDestination): void {\r\n        this.privIsSynthesisEnded = false;\r\n        this.privIsSynthesizing = true;\r\n        this.privRequestId = requestId;\r\n        this.privRawText = rawText;\r\n        this.privIsSSML = isSSML;\r\n        this.privAudioOutputStream = new PullAudioOutputStreamImpl();\r\n        this.privAudioOutputStream.format = this.privAudioOutputFormat;\r\n        this.privReceivedAudio = null;\r\n        this.privReceivedAudioWithHeader = null;\r\n        this.privBytesReceived = 0;\r\n        this.privTextOffset = 0;\r\n        this.privNextSearchTextIndex = 0;\r\n        this.privSentenceOffset = 0;\r\n        this.privNextSearchSentenceIndex = 0;\r\n        this.privPartialVisemeAnimation = \"\";\r\n        this.privWebRTCSDP = \"\";\r\n        // Reset latency tracking for this synthesis request\r\n        this.privSynthesisStartTime = Date.now();\r\n        this.privConnectionLatency = -1;\r\n        this.privNetworkLatency = -1;\r\n        this.privFirstByteLatency = -1;\r\n        this.privServiceLatency = -1;\r\n        if (audioDestination !== undefined) {\r\n            this.privTurnAudioDestination = audioDestination;\r\n            this.privTurnAudioDestination.format = this.privAudioOutputFormat;\r\n        }\r\n        this.onEvent(new SynthesisTriggeredEvent(this.requestId, undefined, audioDestination === undefined ? undefined : audioDestination.id()));\r\n    }\r\n\r\n    public onPreConnectionStart(authFetchEventId: string): void {\r\n        this.privAuthFetchEventId = authFetchEventId;\r\n        this.onEvent(new ConnectingToSynthesisServiceEvent(this.privRequestId, this.privAuthFetchEventId));\r\n    }\r\n\r\n    public onAuthCompleted(isError: boolean): void {\r\n        if (isError) {\r\n            this.onComplete();\r\n        }\r\n    }\r\n\r\n    public onConnectionEstablishCompleted(statusCode: number): void {\r\n        if (statusCode === 200) {\r\n            this.privConnectionLatency = Date.now() - this.privSynthesisStartTime;\r\n            this.onEvent(new SynthesisStartedEvent(this.requestId, this.privAuthFetchEventId));\r\n            this.privBytesReceived = 0;\r\n            return;\r\n        } else if (statusCode === 403) {\r\n            this.onComplete();\r\n        }\r\n    }\r\n\r\n    public onServiceResponseMessage(responseJson: string): void {\r\n        const response: ISynthesisResponse = JSON.parse(responseJson) as ISynthesisResponse;\r\n        this.streamId = response.audio.streamId;\r\n    }\r\n\r\n    public onServiceTurnEndResponse(): void {\r\n        this.privInTurn = false;\r\n        this.privTurnDeferral.resolve();\r\n        this.onComplete();\r\n    }\r\n\r\n    public onServiceTurnStartResponse(responseJson: string): void {\r\n        if (!!this.privTurnDeferral && !!this.privInTurn) {\r\n            // What? How are we starting a turn with another not done?\r\n            this.privTurnDeferral.reject(\"Another turn started before current completed.\");\r\n            // Avoid UnhandledPromiseRejection if privTurnDeferral is not being awaited\r\n            // eslint-disable-next-line @typescript-eslint/no-empty-function\r\n            this.privTurnDeferral.promise.then().catch((): void => { });\r\n        }\r\n        this.privInTurn = true;\r\n        this.privTurnDeferral = new Deferred<void>();\r\n        // If connection was reused, onConnectionEstablishCompleted was never called; treat as 0.\r\n        if (this.privConnectionLatency < 0) {\r\n            this.privConnectionLatency = 0;\r\n        }\r\n        this.privNetworkLatency = Date.now() - this.privSynthesisStartTime - this.privConnectionLatency;\r\n        const response: ISynthesisResponse = JSON.parse(responseJson) as ISynthesisResponse;\r\n        if (!!response.webrtc) {\r\n            this.privWebRTCSDP = response.webrtc.connectionString;\r\n        }\r\n    }\r\n\r\n    public onAudioChunkReceived(data: ArrayBuffer): void {\r\n        if (this.isSynthesizing) {\r\n            if (this.privFirstByteLatency < 0) {\r\n                this.privFirstByteLatency = Date.now() - this.privSynthesisStartTime;\r\n                if (this.privNetworkLatency >= 0) {\r\n                    this.privServiceLatency = this.privFirstByteLatency - this.privConnectionLatency - this.privNetworkLatency;\r\n                }\r\n            }\r\n            this.privAudioOutputStream.write(data);\r\n            this.privBytesReceived += data.byteLength;\r\n            if (this.privTurnAudioDestination !== undefined) {\r\n                this.privTurnAudioDestination.write(data);\r\n            }\r\n        }\r\n    }\r\n\r\n    public onTextBoundaryEvent(metadata: ISynthesisMetadata): void {\r\n        this.updateTextOffset(metadata.Data.text.Text, metadata.Type);\r\n    }\r\n\r\n    public onVisemeMetadataReceived(metadata: ISynthesisMetadata): void {\r\n        if (metadata.Data.AnimationChunk !== undefined) {\r\n            this.privPartialVisemeAnimation += metadata.Data.AnimationChunk;\r\n        }\r\n    }\r\n\r\n    public onSessionEnd(metadata: ISynthesisMetadata): void {\r\n        this.privAudioDuration = metadata.Data.Offset;\r\n    }\r\n\r\n    public async constructSynthesisResult(): Promise<SpeechSynthesisResult> {\r\n        const finishLatency = Date.now() - this.privSynthesisStartTime;\r\n        const audioBuffer: ArrayBuffer = await this.getAllReceivedAudioWithHeader();\r\n        const properties = new PropertyCollection();\r\n        if (!!this.privWebRTCSDP) {\r\n            properties.setProperty(PropertyId.TalkingAvatarService_WebRTC_SDP, this.privWebRTCSDP);\r\n        }\r\n        properties.setProperty(PropertyId.SpeechServiceResponse_SynthesisFirstByteLatencyMs, String(Math.max(0, this.privFirstByteLatency)));\r\n        properties.setProperty(PropertyId.SpeechServiceResponse_SynthesisFinishLatencyMs, String(finishLatency));\r\n        properties.setProperty(PropertyId.SpeechServiceResponse_SynthesisConnectionLatencyMs, String(Math.max(0, this.privConnectionLatency)));\r\n        properties.setProperty(PropertyId.SpeechServiceResponse_SynthesisNetworkLatencyMs, String(Math.max(0, this.privNetworkLatency)));\r\n        properties.setProperty(PropertyId.SpeechServiceResponse_SynthesisServiceLatencyMs, String(Math.max(0, this.privServiceLatency)));\r\n        return new SpeechSynthesisResult(\r\n            this.requestId,\r\n            ResultReason.SynthesizingAudioCompleted,\r\n            audioBuffer,\r\n            undefined,\r\n            properties,\r\n            this.audioDuration\r\n        );\r\n    }\r\n\r\n    public dispose(): void {\r\n        if (!this.privIsDisposed) {\r\n            // we should have completed by now. If we did not its an unknown error.\r\n            this.privIsDisposed = true;\r\n        }\r\n    }\r\n\r\n    public onStopSynthesizing(): void {\r\n        this.onComplete();\r\n    }\r\n\r\n    /**\r\n     * Gets the viseme animation string (merged from animation chunk), and clears the internal\r\n     * partial animation.\r\n     */\r\n    public getAndClearVisemeAnimation(): string {\r\n        const animation: string = this.privPartialVisemeAnimation;\r\n        this.privPartialVisemeAnimation = \"\";\r\n        return animation;\r\n    }\r\n\r\n    protected onEvent(event: SpeechSynthesisEvent): void {\r\n        Events.instance.onEvent(event);\r\n    }\r\n\r\n    /**\r\n     * Check if the text is an XML(SSML) tag\r\n     * @param text\r\n     * @private\r\n     */\r\n    private static isXmlTag(text: string): boolean {\r\n        return text.length >= 2 && text[0] === \"<\" && text[text.length - 1] === \">\";\r\n    }\r\n\r\n    private updateTextOffset(text: string, type: MetadataType): void {\r\n        if (type === MetadataType.WordBoundary) {\r\n            this.privTextOffset = this.privRawText.indexOf(text, this.privNextSearchTextIndex);\r\n            if (this.privTextOffset >= 0) {\r\n                this.privNextSearchTextIndex = this.privTextOffset + text.length;\r\n                if (this.privIsSSML) {\r\n                    if (this.withinXmlTag(this.privTextOffset) && !SynthesisTurn.isXmlTag(text)) {\r\n                        this.updateTextOffset(text, type);\r\n                    }\r\n                }\r\n            }\r\n        } else {\r\n            this.privSentenceOffset = this.privRawText.indexOf(text, this.privNextSearchSentenceIndex);\r\n            if (this.privSentenceOffset >= 0) {\r\n                this.privNextSearchSentenceIndex = this.privSentenceOffset + text.length;\r\n                if (this.privIsSSML) {\r\n                    if (this.withinXmlTag(this.privSentenceOffset) && !SynthesisTurn.isXmlTag(text)) {\r\n                        this.updateTextOffset(text, type);\r\n                    }\r\n                }\r\n            }\r\n        }\r\n    }\r\n\r\n    private onComplete(): void {\r\n        if (this.privIsSynthesizing) {\r\n            this.privIsSynthesizing = false;\r\n            this.privIsSynthesisEnded = true;\r\n            this.privAudioOutputStream.close();\r\n            this.privInTurn = false;\r\n            if (this.privTurnAudioDestination !== undefined) {\r\n                this.privTurnAudioDestination.close();\r\n                this.privTurnAudioDestination = undefined;\r\n            }\r\n        }\r\n    }\r\n\r\n    private async readAllAudioFromStream(): Promise<void> {\r\n        if (this.privIsSynthesisEnded) {\r\n            this.privReceivedAudio = new ArrayBuffer(this.bytesReceived);\r\n            try {\r\n                await this.privAudioOutputStream.read(this.privReceivedAudio);\r\n            } catch (e) {\r\n                this.privReceivedAudio = new ArrayBuffer(0);\r\n            }\r\n        }\r\n    }\r\n\r\n    /**\r\n     * Check if current idx is in XML(SSML) tag\r\n     * @param idx\r\n     * @private\r\n     */\r\n    private withinXmlTag(idx: number): boolean {\r\n        return this.privRawText.indexOf(\"<\", idx + 1) > this.privRawText.indexOf(\">\", idx + 1);\r\n    }\r\n}\r\n"]}