import { initContract } from '@ts-rest/core';
import { z } from 'zod';
import { errorResponseSchema, insufficientTokensErrorSchema } from '../schemas/common.schemas';

const c = initContract();

/** Intent shape matches mobile TransactionIntent (record_sale, stock_query, restock, credit_summary, credit_payment). */
export const voiceIntentSchema = z.object({
  type: z.enum(['record_sale', 'stock_query', 'restock', 'credit_summary', 'credit_payment']),
  productId: z.string().optional().nullable(),
  quantity: z.number().optional(),
  unitPrice: z.number().optional(),
  customerId: z.string().optional(),
  amount: z.number().optional(),
});


export type VoiceIntent = z.infer<typeof voiceIntentSchema>;

const voiceBatchItemSchema = z.object({
  type: z.literal('record_sale'),
  productId: z.string(),
  quantity: z.number(),
  unitPrice: z.number().optional(),
});

export type VoiceBatchItem = z.infer<typeof voiceBatchItemSchema>;

export const voiceParseResponseSchema = z.object({
  intent: voiceIntentSchema.nullable(),
  batchIntents: z.array(voiceBatchItemSchema).nullable(),
});

export type VoiceParseResponse = z.infer<typeof voiceParseResponseSchema>;

export const voiceTranscribeResponseSchema = z.object({
  transcript: z.string(),
  intent: voiceIntentSchema.nullable(),
  batchIntents: z.array(voiceBatchItemSchema).nullable(),
});

export type VoiceTranscribeResponse = z.infer<typeof voiceTranscribeResponseSchema>;

/** One flat intent per row for ActionPreview-style review (ambient / multi-turn transcript). */
export const voiceAmbientExtractResponseSchema = z.object({
  intents: z.array(voiceIntentSchema),
});

export type VoiceAmbientExtractResponse = z.infer<typeof voiceAmbientExtractResponseSchema>;

export const shopkeeperVoiceContract = c.router({
  /** Parse transcript to intent. Rule-based first; LLM fallback when null. Used by mobile when local parse fails. */
  parseTranscript: {
    method: 'POST',
    path: '/shopkeeper/voice/parse',
    body: z.object({
      transcript: z.string(),
      productHints: z.string().optional(), // comma-separated shop product names for LLM entity resolution
    }),
    responses: {
      200: voiceParseResponseSchema,
      401: errorResponseSchema,
      402: insufficientTokensErrorSchema,
    },
    summary: 'Parse voice transcript to transaction intent (keyword + optional LLM fallback)',
  },

  /**
   * Transcribe audio and parse intent in one round trip.
   * Mobile sends base64-encoded WAV (16kHz mono); API calls Whisper then VoiceParseService.
   * Used as fallback when local regex parse fails and Whisper accuracy is needed.
   * All AI calls stay server-side — no OpenAI key on device.
   */
  transcribeAudio: {
    method: 'POST',
    path: '/shopkeeper/voice/transcribe',
    body: z.object({
      audioBase64: z.string(),
      language: z.string().optional(),       // 'en' | 'ha' | 'yo' | 'ig'
      productHints: z.string().optional(),   // comma-separated shop product names for Whisper prompt
    }),
    responses: {
      200: voiceTranscribeResponseSchema,
      401: errorResponseSchema,
      402: insufficientTokensErrorSchema,
      422: errorResponseSchema,
    },
    summary: 'Transcribe audio via Whisper and parse intent — single round trip, AI stays server-side',
  },

  /**
   * After ambient listening ends, send the full rolling transcript. LLM extracts every concrete
   * shop instruction (sales, stock, credit, restock), ignoring small talk. Same intent shapes as /voice/parse.
   */
  extractAmbientIntents: {
    method: 'POST',
    path: '/shopkeeper/voice/extract-ambient',
    body: z.object({
      transcript: z.string(),
      productHints: z.string().optional(),
    }),
    responses: {
      200: voiceAmbientExtractResponseSchema,
      401: errorResponseSchema,
      402: insufficientTokensErrorSchema,
    },
    summary:
      'Extract structured transaction intents from a long conversation transcript (ambient listening hand-off)',
  },
});