import * as React from 'react';
import { ConnectionState, ParticipantKind, Track } from 'livekit-client';
import type { RemoteParticipant } from 'livekit-client';
import {
  ParticipantAgentAttributes,
  type ReceivedTranscriptionSegment,
  type TrackReference,
} from '@livekit/components-core';
import { useRemoteParticipants } from './useRemoteParticipants';
import { useParticipantTracks } from './useParticipantTracks';
import { useTrackTranscription } from './useTrackTranscription';
import { useConnectionState } from './useConnectionStatus';
import { useParticipantAttributes } from './useParticipantAttributes';
import { AgentState } from './useAgent';

/**
 * @beta
 */
export interface VoiceAssistant {
  /**
   * The agent participant.
   */
  agent: RemoteParticipant | undefined;
  /**
   * The current state of the agent.
   */
  state: AgentState;
  /**
   * The microphone track published by the agent or associated avatar worker (if any).
   */
  audioTrack: TrackReference | undefined;
  /**
   * The camera track published by the agent or associated avatar worker (if any).
   */
  videoTrack: TrackReference | undefined;
  /**
   * The transcriptions of the agent's microphone track (if any).
   */
  agentTranscriptions: ReceivedTranscriptionSegment[];
  /**
   * The agent's participant attributes.
   */
  agentAttributes: RemoteParticipant['attributes'] | undefined;
}

const state_attribute = ParticipantAgentAttributes.AgentState;

/**
 * This hook looks for the first agent-participant in the room.
 * @remarks This hook requires an agent running with livekit-agents \>= 0.9.0
 * @example
 * ```tsx
 * const { state, audioTrack, agentTranscriptions, agentAttributes } = useVoiceAssistant();
 * ```
 * @beta
 */
export function useVoiceAssistant(): VoiceAssistant {
  const remoteParticipants = useRemoteParticipants();
  const agent = remoteParticipants.find(
    (p) =>
      p.kind === ParticipantKind.AGENT &&
      !(ParticipantAgentAttributes.PublishOnBehalf in p.attributes),
  );
  const worker = remoteParticipants.find(
    (p) =>
      p.kind === ParticipantKind.AGENT &&
      p.attributes[ParticipantAgentAttributes.PublishOnBehalf] === agent?.identity,
  );
  const agentTracks = useParticipantTracks(
    [Track.Source.Microphone, Track.Source.Camera],
    agent?.identity,
  );
  const workerTracks = useParticipantTracks(
    [Track.Source.Microphone, Track.Source.Camera],
    worker?.identity,
  );
  const audioTrack =
    agentTracks.find((t) => t.source === Track.Source.Microphone) ??
    workerTracks.find((t) => t.source === Track.Source.Microphone);
  const videoTrack =
    agentTracks.find((t) => t.source === Track.Source.Camera) ??
    workerTracks.find((t) => t.source === Track.Source.Camera);
  const { segments: agentTranscriptions } = useTrackTranscription(audioTrack);
  const connectionState = useConnectionState();
  const { attributes } = useParticipantAttributes({ participant: agent });

  const state: AgentState = React.useMemo(() => {
    if (connectionState === ConnectionState.Disconnected) {
      return 'disconnected';
    } else if (
      connectionState === ConnectionState.Connecting ||
      !agent ||
      !attributes?.[state_attribute]
    ) {
      return 'connecting';
    } else {
      return attributes[state_attribute] as AgentState;
    }
  }, [attributes, agent, connectionState]);

  return {
    agent,
    state,
    audioTrack,
    videoTrack,
    agentTranscriptions,
    agentAttributes: attributes,
  };
}