/**
 * CDP Edge — ML Clustering (Fase 1)
 * Handlers das rotas /api/segmentation/*
 */

import { tryParseJson } from '../utils.js';
import { Env } from '../../types.js';

// ── Tipos ───────────────────────────────────────────────────────────────────────
interface KmeansResult {
  assignments: number[];
  centroids: number[][];
}

interface ClusterStats {
  c: number;
  size: number;
  pct: number;
  avgLtv: number;
  avgEng: number;
  avgDays: number;
  topSource: string;
  topState: string;
  topIntent: string;
}

interface Cluster {
  cluster_id: number;
  name: string;
  size: number;
  percentage: number;
  action_recommendation: string;
  characteristics: {
    avg_ltv_class: number;
    avg_engagement_score: number;
    avg_intention_level: number;
    avg_days_since_lead: number;
    dominant_countries: string[];
    dominant_states: string[];
    dominant_utm_sources: string[];
    top_features: string[];
  };
}

interface ClusterInfo {
  cluster_id: number;
  name: string;
  action: string;
}

// ── Helpers K-means vetorial ──────────────────────────────────────────────────

function _cosDist(a: number[], b: number[]): number {
  let dot = 0, na = 0, nb = 0;
  for (let i = 0; i < a.length; i++) { dot += a[i]*b[i]; na += a[i]*a[i]; nb += b[i]*b[i]; }
  return 1 - dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-10);
}

function _kmeansRun(vectors: number[][], k: number, maxIter = 25): KmeansResult {
  const n = vectors.length, dim = vectors[0].length;
  const centroids = [vectors[Math.floor(Math.random() * n)]];
  while (centroids.length < k) {
    const dists = vectors.map(v => Math.min(...centroids.map(c => _cosDist(v, c))));
    const sum = dists.reduce((a, b) => a + b, 0);
    let r = Math.random() * sum, cumul = 0;
    for (let i = 0; i < n; i++) { cumul += dists[i]; if (cumul >= r) { centroids.push(vectors[i]); break; } }
    if (centroids.length < k) centroids.push(vectors[Math.floor(Math.random() * n)]);
  }
  let assignments = new Array(n).fill(0);
  for (let iter = 0; iter < maxIter; iter++) {
    let changed = false;
    for (let i = 0; i < n; i++) {
      let best = 0, bestD = Infinity;
      for (let c = 0; c < k; c++) { const d = _cosDist(vectors[i], centroids[c]); if (d < bestD) { bestD = d; best = c; } }
      if (assignments[i] !== best) { assignments[i] = best; changed = true; }
    }
    if (!changed) break;
    for (let c = 0; c < k; c++) {
      const members = vectors.filter((_, i) => assignments[i] === c);
      if (!members.length) continue;
      for (let d = 0; d < dim; d++) centroids[c][d] = members.reduce((s, v) => s + v[d], 0) / members.length;
    }
  }
  return { assignments, centroids };
}

function _silhouette(vectors: number[][], assignments: number[], k: number): number {
  const n = vectors.length;
  let total = 0;
  for (let i = 0; i < n; i++) {
    const ci = assignments[i];
    const same = vectors.filter((_, j) => j !== i && assignments[j] === ci);
    const a = same.length ? same.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / same.length : 0;
    let b = Infinity;
    for (let c = 0; c < k; c++) {
      if (c === ci) continue;
      const other = vectors.filter((_, j) => assignments[j] === c);
      if (other.length) b = Math.min(b, other.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / other.length);
    }
    total += b === Infinity ? 0 : (b - a) / Math.max(a, b);
  }
  return Math.round((total / n) * 1000) / 1000;
}

function _buildLeadProfile(l: any): string {
  return [
    `LTV: ${l.predicted_ltv_class || 'desconhecido'}`,
    `engajamento: ${Math.round(l.engagement_score || 0)}`,
    `intenção: ${l.intention_level || 'desconhecida'}`,
    `origem: ${l.utm_source || 'direto'}`,
    `canal: ${l.utm_medium || 'desconhecido'}`,
    `país: ${l.country || 'BR'}`,
    `estado: ${l.state || ''}`,
    `hora: ${l.hour_of_day || 12}h`,
    (l.is_weekend ? 'fim-de-semana' : 'dia-útil'),
    `recência: ${l.days_since_lead || 0} dias`,
  ].filter(Boolean).join(', ');
}

// ── POST /api/segmentation/cluster ────────────────────────────────────────────
// Clustering real: embeddinggemma-300m → K-means vetorial → Granite para nomear
export async function handleSegmentationCluster(env: Env, request: Request, headers: Headers): Promise<Response> {
  if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
  if (!env.AI) return new Response(JSON.stringify({ error: 'Workers AI não configurado' }), { status: 503, headers });

  const url            = new URL(request.url);
  const algorithm      = url.searchParams.get('algorithm') || 'kmeans';
  const nClusters      = Math.min(10, Math.max(2, parseInt(url.searchParams.get('n_clusters') || '5')));
  const clientVertical = url.searchParams.get('vertical') || 'general';
  const forceRecluster = url.searchParams.get('force') === 'true';

  if (!['kmeans', 'dbscan', 'hierarchical'].includes(algorithm)) {
    return new Response(JSON.stringify({ error: 'algorithm deve ser: kmeans, dbscan ou hierarchical', received: algorithm }), { status: 400, headers });
  }

  try {
    if (!forceRecluster) {
      const existing = await env.DB.prepare(`
        SELECT id, created_at, cluster_name FROM ml_segments
        WHERE clustering_algorithm = ? AND is_active = 1 AND client_vertical = ?
        ORDER BY created_at DESC LIMIT 1
      `).bind(algorithm, clientVertical).first();
      if (existing) {
        const ageDays = (Date.now() - new Date((existing as any).created_at).getTime()) / 864e5;
        if (ageDays < 7) {
          return new Response(JSON.stringify({
            success: true, message: 'Cluster existente ainda válido (< 7 dias). Use ?force=true para re-clustering.',
            cluster_id: (existing as any).id, cluster_name: (existing as any).cluster_name,
            age_days: Math.round(ageDays * 10) / 10, use_existing: true,
          }), { status: 200, headers });
        }
      }
    }

    const leadsRes = await env.DB.prepare(`
      SELECT id, predicted_ltv_class, engagement_score, intention_level,
             country, state, utm_source, utm_medium, bot_score,
             CAST(strftime('%H', created_at) AS INTEGER) AS hour_of_day,
             CAST(julianday('now') - julianday(created_at) AS INTEGER) AS days_since_lead,
             CASE WHEN strftime('%w', created_at) IN ('0','6') THEN 1 ELSE 0 END AS is_weekend
      FROM leads
      WHERE created_at >= datetime('now', '-6 months') AND (bot_score IS NULL OR bot_score < 2)
      ORDER BY RANDOM() LIMIT 2000
    `).all();

    const leads = leadsRes.results || [];
    if (leads.length < 50) {
      return new Response(JSON.stringify({ error: 'Dados insuficientes para clustering. Mínimo: 50 leads.', leads_found: leads.length, required: 50 }), { status: 400, headers });
    }

    const startTime = Date.now();
    const sample    = leads.slice(0, 100);
    const profiles  = sample.map(_buildLeadProfile);

    // Embeddings reais via embeddinggemma-300m
    const embRes = await env.AI.run('@cf/baai/bge-m3', { text: profiles });
    const vectors = (embRes as any).data as number[][];
    if (!vectors || vectors.length < nClusters) throw new Error(`embeddinggemma retornou ${vectors?.length ?? 0} vetores`);

    // K-means vetorial real
    const { assignments } = _kmeansRun(vectors, nClusters);
    const silhouetteScore = _silhouette(vectors, assignments, nClusters);

    // Agregação por cluster para nomear com Granite
    const clusterStats: (ClusterStats | null)[] = Array.from({ length: nClusters }, (_, c) => {
      const members = sample.filter((_, i) => assignments[i] === c);
      if (!members.length) return null;
      const ltvMap: Record<string, number> = { High: 1, Medium: 0.5, Low: 0 };
      const avgLtv  = members.reduce((s: number, l: any) => s + (ltvMap[l.predicted_ltv_class] ?? 0), 0) / members.length;
      const avgEng  = members.reduce((s: number, l: any) => s + (l.engagement_score || 0), 0) / members.length;
      const avgDays = members.reduce((s: number, l: any) => s + (l.days_since_lead || 0), 0) / members.length;
      const freq = (arr: string[]) => arr.length ? [...arr.reduce((m,s) => m.set(s,(m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : null;
      return {
        c, size: members.length, pct: Math.round(members.length / sample.length * 100),
        avgLtv, avgEng, avgDays,
        topSource: freq(members.map((l: any) => l.utm_source).filter(Boolean)) || 'direto',
        topState:  freq(members.map((l: any) => l.state).filter(Boolean))      || 'BR',
        topIntent: freq(members.map((l: any) => l.intention_level).filter(Boolean)) || 'desconhecida',
      };
    }).filter(Boolean) as ClusterStats[];

    // Type guard function to filter null values
    function isNotNull<T>(value: T | null): value is T {
      return value !== null;
    }

    const validClusterStats = clusterStats.filter(isNotNull);

    // Granite apenas para nomear segmentos
    const namingPrompt =
`Você é especialista em segmentação de clientes. Dê um nome descritivo em português e uma recomendação de campanha para cada segmento. Retorne SOMENTE JSON válido:
{"segments":[{"cluster_id":0,"name":"...","action":"..."},...]}

${validClusterStats.map(s => `Cluster ${s.c}: LTV=${s.avgLtv.toFixed(2)}, engajamento=${s.avgEng.toFixed(0)}, intenção="${s.topIntent}", origem="${s.topSource}", estado="${s.topState}", recência=${s.avgDays.toFixed(0)} dias, tamanho=${s.size}`).join('\n')}`;

    const nameRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', { messages: [{ role: 'user', content: namingPrompt }], max_tokens: 800 });
    let clusterNames: Record<number, ClusterInfo> = {};
    try {
      const m = ((nameRes as any)?.response || '').match(/\{[\s\S]*\}/);
      if (m) {
        const parsed = JSON.parse(m[0]);
        (parsed.segments || []).forEach((s: any) => {
          if (typeof s.cluster_id === 'number') {
            clusterNames[s.cluster_id] = {
              cluster_id: s.cluster_id,
              name: s.name || `Segmento ${s.cluster_id + 1}`,
              action: s.action || '',
            };
          }
        });
      }
    } catch { /* usa nomes fallback */ }

    const duration = Date.now() - startTime;

    const clusters: Cluster[] = validClusterStats.map(s => ({
      cluster_id: s.c,
      name: clusterNames[s.c]?.name || `Segmento ${s.c + 1}`,
      size: s.size, percentage: s.pct,
      action_recommendation: clusterNames[s.c]?.action || '',
      characteristics: {
        avg_ltv_class: s.avgLtv, avg_engagement_score: s.avgEng,
        avg_intention_level: s.avgLtv, avg_days_since_lead: s.avgDays,
        dominant_countries: ['BR'], dominant_states: [s.topState || 'BR'],
        dominant_utm_sources: [s.topSource || 'direto'], top_features: ['ltv', 'engagement', 'intention'],
      },
    }));

    await env.DB.prepare(`UPDATE ml_segments SET is_active = 0 WHERE clustering_algorithm = ? AND client_vertical = ? AND is_active = 1`).bind(algorithm, clientVertical).run();

    const now = new Date().toISOString();
    for (const cluster of clusters) {
      const ch = cluster.characteristics;
      await env.DB.prepare(`
        INSERT INTO ml_segments (
          cluster_id, cluster_name, clustering_algorithm, client_vertical, size, percentage,
          avg_ltv_class, avg_behavior_score, avg_engagement_score, avg_intention_level, avg_days_since_lead,
          dominant_countries, dominant_states, dominant_utm_sources, dominant_features,
          silhouette_score, action_recommendations, bid_recommendations, campaign_recommendations,
          is_active, created_at, updated_at
        ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?)
      `).bind(
        cluster.cluster_id, cluster.name, algorithm, clientVertical, cluster.size, cluster.percentage,
        ch.avg_ltv_class, ch.avg_engagement_score, ch.avg_engagement_score, ch.avg_intention_level, ch.avg_days_since_lead,
        JSON.stringify(ch.dominant_countries), JSON.stringify(ch.dominant_states),
        JSON.stringify(ch.dominant_utm_sources), JSON.stringify(ch.top_features),
        silhouetteScore,
        JSON.stringify([cluster.action_recommendation]), JSON.stringify([]), JSON.stringify([]),
        now, now,
      ).run();
    }

    try {
      await env.DB.prepare(`
        INSERT INTO ml_clustering_history (clustering_id, started_at, completed_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, parameters, results_summary)
        VALUES (0, ?, datetime('now'), ?, ?, ?, ?, ?, 'completed', ?, ?)
      `).bind(new Date(startTime).toISOString(), algorithm, leads.length, clusters.length, duration, Math.ceil(duration * 0.01),
        JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical, engine: 'embeddinggemma-300m+kmeans' }),
        JSON.stringify({ clusters: clusters.length, silhouette: silhouetteScore }),
      ).run();
    } catch (e: any) { console.error('[Segmentation] history log error:', e?.message || String(e)); }

    return new Response(JSON.stringify({
      success: true, algorithm, engine: 'embeddinggemma-300m + kmeans vetorial',
      n_clusters: clusters.length, client_vertical: clientVertical,
      leads_analyzed: leads.length, sample_embedded: sample.length,
      duration_ms: duration, silhouette_score: silhouetteScore,
      clusters, generated_at: now,
    }), { status: 200, headers });

  } catch (err: any) {
    console.error('[Segmentation] cluster error:', err?.message || String(err));
    try {
      if (env.DB) await env.DB.prepare(`
        INSERT INTO ml_clustering_history (clustering_id, started_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, error_message, parameters, results_summary)
        VALUES (0, datetime('now'), ?, 0, 0, 0, 0, 'failed', ?, ?, '{}')
      `).bind(algorithm, err?.message || String(err), JSON.stringify({ algorithm, n_clusters: nClusters })).run();
    } catch { /* não bloquear */ }
    return new Response(JSON.stringify({ error: 'Erro ao executar clustering', message: err?.message || String(err) }), { status: 500, headers });
  }
}

// ── GET /api/segmentation/list ────────────────────────────────────────────────
export async function handleSegmentationList(env: Env, request: Request, headers: Headers): Promise<Response> {
  if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });

  const url       = new URL(request.url);
  const algorithm = url.searchParams.get('algorithm') || null;
  const vertical  = url.searchParams.get('vertical')  || null;

  try {
    const conditions: string[] = ['is_active = 1'];
    const bindings: (string | number)[]   = [];
    if (algorithm) { conditions.push('clustering_algorithm = ?'); bindings.push(algorithm); }
    if (vertical)  { conditions.push('client_vertical = ?');      bindings.push(vertical); }

    const result   = await env.DB.prepare(`
      SELECT id, cluster_id, cluster_name, clustering_algorithm, client_vertical,
             size, percentage, avg_ltv_class, avg_behavior_score, avg_engagement_score,
             avg_intention_level, avg_days_since_lead, silhouette_score,
             dominant_countries, dominant_states, dominant_utm_sources, dominant_features,
             action_recommendations, bid_recommendations, campaign_recommendations,
             is_active, created_at, updated_at
      FROM ml_segments
      WHERE ${conditions.join(' AND ')}
      ORDER BY created_at DESC
      LIMIT 50
    `).bind(...bindings).all();

    const segments = (result.results || []).map((s: any) => ({
      ...s,
      dominant_countries:       tryParseJson(s.dominant_countries, []),
      dominant_states:          tryParseJson(s.dominant_states, []),
      dominant_utm_sources:     tryParseJson(s.dominant_utm_sources, []),
      dominant_features:        tryParseJson(s.dominant_features, []),
      action_recommendations:   tryParseJson(s.action_recommendations, []),
      bid_recommendations:      tryParseJson(s.bid_recommendations, []),
      campaign_recommendations: tryParseJson(s.campaign_recommendations, []),
    }));

    return new Response(JSON.stringify({ success: true, total: segments.length, segments }), { status: 200, headers });
  } catch (err: any) {
    console.error('[Segmentation] list error:', err?.message || String(err));
    return new Response(JSON.stringify({ error: err?.message || String(err) }), { status: 500, headers });
  }
}

// ── GET /api/segmentation/outliers ───────────────────────────────────────────
export async function handleSegmentationOutliers(env: Env, request: Request, headers: Headers): Promise<Response> {
  if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });

  const url   = new URL(request.url);
  const limit = Math.min(parseInt(url.searchParams.get('limit') || '50'), 200);
  const days  = parseInt(url.searchParams.get('days') || '30');

  try {
    const result = await env.DB.prepare(`
      SELECT msm.lead_id, msm.cluster_id, msm.confidence, msm.is_outlier, msm.outlier_reason, msm.assigned_at,
             l.email, l.phone, l.country, l.state, l.city, l.utm_source, l.bot_score, l.engagement_score, l.intention_level, l.created_at AS lead_created_at
      FROM ml_segment_members msm
      LEFT JOIN leads l ON CAST(msm.lead_id AS INTEGER) = l.id
      WHERE msm.is_outlier = 1 AND msm.assigned_at >= datetime('now', '-' || ? || ' days')
      ORDER BY msm.assigned_at DESC
      LIMIT ?
    `).bind(days, limit).all();

    return new Response(JSON.stringify({ success: true, total: (result.results || []).length, period_days: days, outliers: result.results || [] }), { status: 200, headers });
  } catch (err: any) {
    console.error('[Segmentation] outliers error:', err?.message || String(err));
    return new Response(JSON.stringify({ error: err?.message || String(err) }), { status: 500, headers });
  }
}

// ── PUT /api/segmentation/update ─────────────────────────────────────────────
export async function handleSegmentationUpdate(env: Env, request: Request, headers: Headers): Promise<Response> {
  if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });

  let body: any;
  try { body = await request.json(); }
  catch { return new Response(JSON.stringify({ error: 'JSON inválido no body da requisição' }), { status: 400, headers }); }

  const { cluster_id, action_recommendations, bid_recommendations, campaign_recommendations } = body;
  if (cluster_id === undefined || cluster_id === null) {
    return new Response(JSON.stringify({ error: 'cluster_id é obrigatório' }), { status: 400, headers });
  }

  try {
    const sets: string[] = [];
    const bindings: (string | number)[] = [];
    if (action_recommendations   !== undefined) { sets.push('action_recommendations = ?');   bindings.push(JSON.stringify(action_recommendations)); }
    if (bid_recommendations      !== undefined) { sets.push('bid_recommendations = ?');      bindings.push(JSON.stringify(bid_recommendations)); }
    if (campaign_recommendations !== undefined) { sets.push('campaign_recommendations = ?'); bindings.push(JSON.stringify(campaign_recommendations)); }

    if (sets.length === 0) {
      return new Response(JSON.stringify({ error: 'Nenhum campo válido para atualizar (action_recommendations, bid_recommendations, campaign_recommendations)' }), { status: 400, headers });
    }

    sets.push("updated_at = datetime('now')");
    bindings.push(cluster_id);

    await env.DB.prepare(`UPDATE ml_segments SET ${sets.join(', ')} WHERE id = ?`).bind(...bindings).run();
    return new Response(JSON.stringify({ success: true, cluster_id, fields_updated: sets.length - 1 }), { status: 200, headers });
  } catch (err: any) {
    console.error('[Segmentation] update error:', err?.message || String(err));
    return new Response(JSON.stringify({ error: err?.message || String(err) }), { status: 500, headers });
  }
}