/** * CDP Edge — Logistic Regression (pure TS, sem deps externas) * Treina modelo de predição de conversão com dados reais do D1. * * Features usadas (todas normalizadas 0-1): * utm_source, engagement_score, intention_level, recency, * has_email, has_phone, is_br, hour_normalized */ import { Env } from '../../types.js'; import { D1Database, KVNamespace } from '@cloudflare/workers-types'; // ── Tipos ─────────────────────────────────────────────────────────────────────── export interface DatasetRow { features: number[]; label: number; } export interface LogisticModel { bias: number; weights: number[]; accuracy: number; positiveRate: number; sampleSize: number; threshold: number; featureNames: string[]; trainedAt: string; } export interface ExtractedFeatures { utm_score: number; engagement: number; intention: number; recency: number; has_email: number; has_phone: number; is_br: number; hour: number; } // ── Feature Engineering ─────────────────────────────────────────────────────── const UTM_SCORES: Record = { facebook: 0.90, instagram: 0.90, meta: 0.90, google: 0.82, youtube: 0.82, tiktok: 0.75, email: 0.68, sms: 0.68, organic: 0.30, direct: 0.20, }; const INTENTION_SCORES: Record = { comprador: 1.00, high_intent: 1.00, interessado: 0.60, nurture: 0.30, curioso: 0.15, }; export function extractFeatures(row: any): number[] { const src = (row.utm_source || '').toLowerCase().trim(); const intention = (row.intention_level || '').toLowerCase().trim(); const daysSince = row.days_since_lead || 0; return [ UTM_SCORES[src] ?? (src ? 0.10 : 0.05), // utm_score Math.min((row.engagement_score || 0) / 5, 1), // engagement (0-5 → 0-1) INTENTION_SCORES[intention] ?? 0, // intention Math.max(0, 1 - daysSince / 90), // recency (0=90 dias, 1=hoje) row.has_email ? 1 : 0, // has_email row.has_phone ? 1 : 0, // has_phone row.is_br ? 1 : 0, // is_br ((row.hour || 12) / 23), // hour normalized ]; } // ── Sigmoid ─────────────────────────────────────────────────────────────────── function sigmoid(z: number): number { if (z > 20) return 1; if (z < -20) return 0; return 1 / (1 + Math.exp(-z)); } function dot(weights: number[], features: number[]): number { return features.reduce((sum, f, i) => sum + (weights[i] || 0) * f, 0); } // ── Treinamento ─────────────────────────────────────────────────────────────── /** * Treina regressão logística com gradiente descendente. */ export function trainLogisticRegression(dataset: DatasetRow[], opts: { iterations?: number; learningRate?: number; lambda?: number } = {}): LogisticModel | null { if (!dataset || dataset.length < 50) { return null; // dados insuficientes } const iterations = opts.iterations || 200; const learningRate = opts.learningRate || 0.1; const lambda = opts.lambda || 0.01; // L2 regularization const nFeatures = dataset[0].features.length; let bias = 0; let weights = new Array(nFeatures).fill(0); const positives = dataset.filter(d => d.label === 1).length; const positiveRate = positives / dataset.length; // Se menos de 5% positivos, não treina (dados de compra insuficientes) if (positiveRate < 0.03) return null; for (let iter = 0; iter < iterations; iter++) { let dBias = 0; const dWeights = new Array(nFeatures).fill(0); for (const { features, label } of dataset) { const z = dot(weights, features) + bias; const pred = sigmoid(z); const error = pred - label; dBias += error; for (let j = 0; j < nFeatures; j++) { dWeights[j] += error * features[j]; } } const n = dataset.length; bias -= learningRate * (dBias / n); for (let j = 0; j < nFeatures; j++) { // L2: penaliza pesos grandes para evitar overfitting weights[j] -= learningRate * ((dWeights[j] / n) + lambda * weights[j]); } } // Calcular acurácia no conjunto de treino let correct = 0; const threshold = positiveRate > 0.3 ? 0.5 : Math.max(0.3, positiveRate * 1.5); for (const { features, label } of dataset) { const z = dot(weights, features) + bias; const pred = sigmoid(z) >= threshold ? 1 : 0; if (pred === label) correct++; } const accuracy = correct / dataset.length; return { bias, weights, accuracy, positiveRate, sampleSize: dataset.length, threshold, featureNames: ['utm_score', 'engagement', 'intention', 'recency', 'has_email', 'has_phone', 'is_br', 'hour'], trainedAt: new Date().toISOString(), }; } // ── Inferência ──────────────────────────────────────────────────────────────── /** * Prediz score de conversão (0-100) usando pesos treinados. */ export function predictWithWeights(model: LogisticModel, features: number[]): number { const z = dot(model.weights, features) + model.bias; const prob = sigmoid(z); return Math.round(prob * 100); } // ── Helpers de persistência ─────────────────────────────────────────────────── export const LTV_WEIGHTS_KV_KEY = 'ltv_weights_active'; export async function loadActiveWeights(env: Env): Promise { // 1. Tentar KV (cache ~7 dias) if (env.GEO_CACHE) { try { const cached = await env.GEO_CACHE.get(LTV_WEIGHTS_KV_KEY, 'json') as LogisticModel | null; if (cached?.weights?.length) return cached; } catch (err: any) { console.error('[Logistic] Error fetching LTV weights from KV cache:', { key: LTV_WEIGHTS_KV_KEY, error: err?.message || String(err), stack: err?.stack, }); } } // 2. Fallback: D1 if (!env.DB) return null; try { const row = await env.DB.prepare( `SELECT weights_json FROM ltv_model_weights WHERE is_active = 1 ORDER BY trained_at DESC LIMIT 1` ).first(); if (!row || !(row as any).weights_json) return null; const model = JSON.parse((row as any).weights_json) as LogisticModel; // Popular KV para próximas requests if (env.GEO_CACHE && model?.weights?.length) { env.GEO_CACHE.put(LTV_WEIGHTS_KV_KEY, JSON.stringify(model), { expirationTtl: 604800 }).catch(() => {}); } return model; } catch { return null; } } export async function saveWeights(DB: D1Database, model: LogisticModel): Promise { if (!DB || !model) return; const now = new Date().toISOString(); // Desativar modelo anterior await DB.prepare(`UPDATE ltv_model_weights SET is_active = 0 WHERE is_active = 1`).run(); // Inserir novo como ativo await DB.prepare(` INSERT INTO ltv_model_weights (trained_at, is_active, sample_size, positive_rate, accuracy, weights_json) VALUES (?, 1, ?, ?, ?, ?) `).bind( now, model.sampleSize, model.positiveRate, model.accuracy, JSON.stringify(model), ).run(); }