import { BaseStats, flatten } from '../stats';
import { SoundStateChangeHandler } from './sound-detector';
import { videoLoggerSystem } from '../logger';

export class RNSpeechDetector {
  private readonly pc1 = new RTCPeerConnection({});
  private readonly pc2 = new RTCPeerConnection({});
  private audioStream: MediaStream | undefined;
  private externalAudioStream: MediaStream | undefined;
  private isStopped = false;

  constructor(externalAudioStream?: MediaStream) {
    this.externalAudioStream = externalAudioStream;
  }

  /**
   * Starts the speech detection.
   */
  public async start(onSoundDetectedStateChanged: SoundStateChangeHandler) {
    let detachListeners: (() => void) | undefined;
    let unsubscribe: (() => void) | undefined;

    try {
      this.isStopped = false;
      const audioStream =
        this.externalAudioStream != null
          ? this.externalAudioStream
          : await navigator.mediaDevices.getUserMedia({ audio: true });
      this.audioStream = audioStream;

      const onPc1IceCandidate = (e: RTCPeerConnectionIceEvent) => {
        this.forwardIceCandidate(this.pc2, e.candidate);
      };
      const onPc2IceCandidate = (e: RTCPeerConnectionIceEvent) => {
        this.forwardIceCandidate(this.pc1, e.candidate);
      };
      const onTrackPc2 = (e: RTCTrackEvent) => {
        e.streams[0].getTracks().forEach((track) => {
          // In RN, the remote track is automatically added to the audio output device
          // so we need to mute it to avoid hearing the audio back
          // @ts-expect-error _setVolume is a private method in react-native-webrtc
          track._setVolume(0);
        });
      };

      this.pc1.addEventListener('icecandidate', onPc1IceCandidate);
      this.pc2.addEventListener('icecandidate', onPc2IceCandidate);
      this.pc2.addEventListener('track', onTrackPc2);
      detachListeners = () => {
        this.pc1.removeEventListener('icecandidate', onPc1IceCandidate);
        this.pc2.removeEventListener('icecandidate', onPc2IceCandidate);
        this.pc2.removeEventListener('track', onTrackPc2);
      };

      audioStream
        .getTracks()
        .forEach((track) => this.pc1.addTrack(track, audioStream));
      const offer = await this.pc1.createOffer({});
      await this.pc2.setRemoteDescription(offer);
      await this.pc1.setLocalDescription(offer);
      const answer = await this.pc2.createAnswer();
      await this.pc1.setRemoteDescription(answer);
      await this.pc2.setLocalDescription(answer);
      unsubscribe = this.onSpeakingDetectedStateChange(
        onSoundDetectedStateChanged,
      );
      return () => {
        detachListeners?.();
        unsubscribe?.();
        this.stop();
      };
    } catch (error) {
      detachListeners?.();
      unsubscribe?.();
      this.stop();

      const logger = videoLoggerSystem.getLogger('RNSpeechDetector');
      logger.error('error handling permissions: ', error);
      return () => {};
    }
  }

  /**
   * Stops the speech detection and releases all allocated resources.
   */
  private stop() {
    if (this.isStopped) return;
    this.isStopped = true;

    this.pc1.close();
    this.pc2.close();

    if (this.externalAudioStream != null) {
      this.externalAudioStream = undefined;
    } else {
      this.cleanupAudioStream();
    }
  }

  /**
   * Public method that detects the audio levels and returns the status.
   */
  private onSpeakingDetectedStateChange(
    onSoundDetectedStateChanged: SoundStateChangeHandler,
  ) {
    const initialBaselineNoiseLevel = 0.13;
    let baselineNoiseLevel = initialBaselineNoiseLevel;
    let speechDetected = false;
    let speechTimer: NodeJS.Timeout | undefined;
    let silenceTimer: NodeJS.Timeout | undefined;
    const audioLevelHistory: number[] = []; // Store recent audio levels for smoother detection
    const historyLength = 10;
    const silenceThreshold = 1.1;
    const resetThreshold = 0.9;
    const speechTimeout = 500; // Speech is set to true after 500ms of audio detection
    const silenceTimeout = 5000; // Reset baseline after 5 seconds of silence

    const checkAudioLevel = async () => {
      try {
        const stats = await this.pc1.getStats();
        const report = flatten(stats);
        // Audio levels are present inside stats of type `media-source` and of kind `audio`
        const audioMediaSourceStats = report.find(
          (stat) =>
            stat.type === 'media-source' &&
            (stat as RTCRtpStreamStats).kind === 'audio',
        ) as BaseStats;
        if (audioMediaSourceStats) {
          const { audioLevel } = audioMediaSourceStats;
          if (audioLevel) {
            // Update audio level history (with max historyLength sized array)
            audioLevelHistory.push(audioLevel);
            if (audioLevelHistory.length > historyLength) {
              audioLevelHistory.shift();
            }

            // Calculate average audio level
            const avgAudioLevel =
              audioLevelHistory.reduce((a, b) => a + b, 0) /
              audioLevelHistory.length;

            // Update baseline (if necessary) based on silence detection
            if (avgAudioLevel < baselineNoiseLevel * silenceThreshold) {
              if (!silenceTimer) {
                silenceTimer = setTimeout(() => {
                  baselineNoiseLevel = Math.min(
                    avgAudioLevel * resetThreshold,
                    initialBaselineNoiseLevel,
                  );
                }, silenceTimeout);
              }
            } else {
              clearTimeout(silenceTimer);
              silenceTimer = undefined;
            }

            // Speech detection with hysteresis
            if (avgAudioLevel > baselineNoiseLevel * 1.5) {
              if (!speechDetected) {
                speechDetected = true;
                onSoundDetectedStateChanged({
                  isSoundDetected: true,
                  audioLevel,
                });
              }

              clearTimeout(speechTimer);

              speechTimer = setTimeout(() => {
                speechDetected = false;
                onSoundDetectedStateChanged({
                  isSoundDetected: false,
                  audioLevel: 0,
                });
              }, speechTimeout);
            }
          }
        }
      } catch (error) {
        const logger = videoLoggerSystem.getLogger('RNSpeechDetector');
        logger.error('error checking audio level from stats', error);
      }
    };

    const intervalId = setInterval(checkAudioLevel, 250);
    return () => {
      clearInterval(intervalId);
      clearTimeout(speechTimer);
      clearTimeout(silenceTimer);
    };
  }

  private cleanupAudioStream() {
    if (!this.audioStream) {
      return;
    }
    this.audioStream.getTracks().forEach((track) => track.stop());
    if (
      // @ts-expect-error release() is present in react-native-webrtc
      typeof this.audioStream.release === 'function'
    ) {
      // @ts-expect-error called to dispose the stream in RN
      this.audioStream.release();
    }
  }

  private forwardIceCandidate(
    destination: RTCPeerConnection,
    candidate: RTCIceCandidate | null,
  ) {
    if (
      this.isStopped ||
      !candidate ||
      destination.signalingState === 'closed'
    ) {
      return;
    }
    destination.addIceCandidate(candidate).catch(() => {
      // silently ignore the error
      const logger = videoLoggerSystem.getLogger('RNSpeechDetector');
      logger.info('cannot add ice candidate - ignoring');
    });
  }
}