// src/formatters/html-report-formatter-enhanced.ts // Enhanced HTML report with conversation timeline, concern tracking, and metric evolution import fs from 'fs'; import path from 'path'; import { AgentResult } from '../agents/agent.interface'; import { EvaluationHistoryEntry } from '../types/output.types'; import { SEVEN_PILLARS, getAgentWeight, calculateWeightedAverage, PillarName, } from '../constants/agent-weights.constants'; interface AgentEvaluation { agentName: string; agentRole?: string; // Technical key for weight lookup (e.g., "business-analyst") icon: string; color: string; round: number; summary: string; details: string; metrics?: Record; concernsRaised: string[]; referencesTo: string[]; } interface MetricEvolution { metric: string; rounds: Map; // Dynamic map of round number to value changed: boolean; } /** * Map agent role descriptions to short display names */ const AGENT_NAME_MAP: Record = { 'Evaluates business value, functional impact, and estimates ideal implementation time': 'Business Analyst', 'Evaluates test automation quality, testing frameworks, and automated test infrastructure': 'SDET', 'Explains implementation decisions, trade-offs, and estimates actual time spent': 'Developer Author', 'Evaluates architecture, design patterns, code complexity, and technical debt': 'Senior Architect', 'Reviews code quality, suggests improvements, and evaluates implementation details': 'Developer Reviewer', }; /** * Short descriptions for agent cards (displayed below agent names) */ const AGENT_DESCRIPTIONS: Record = { 'Business Analyst': 'Evaluates business value, functional impact, and ideal time estimates', SDET: 'Evaluates test automation quality, testing frameworks, and infrastructure maturity', 'Developer Author': 'Evaluates actual time spent, implementation approach, and development effort', 'Senior Architect': 'Evaluates code complexity, architecture design, and technical debt', 'Developer Reviewer': 'Evaluates code quality, best practices, and maintainability', }; /** * Metric metadata with reference values and formatting information */ const METRIC_METADATA: Record< string, { unit: string; scale: string; description: string; tooltip: string; format: (value: number) => string; } > = { functionalImpact: { unit: '/ 10', scale: 'Higher is better', description: 'Functional Impact', tooltip: 'How much value does this change add? (1-10 scale)', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)} / 10`, }, idealTimeHours: { unit: 'hours', scale: 'Ideal estimate', description: 'Ideal Time Estimate', tooltip: 'How many hours should this task ideally take?', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)}h`, }, testCoverage: { unit: '/ 10', scale: 'Higher is better', description: 'Test Coverage', tooltip: 'How well is the code covered by tests? (1-10 scale)', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)} / 10`, }, codeQuality: { unit: '/ 10', scale: 'Higher is better', description: 'Code Quality', tooltip: 'How well-written and maintainable is the code? (1-10 scale)', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)} / 10`, }, codeComplexity: { unit: '/ 10', scale: 'Lower is better', description: 'Code Complexity', tooltip: 'How complex is the implementation? (1-10, lower is simpler)', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)} / 10`, }, actualTimeHours: { unit: 'hours', scale: 'Actual effort', description: 'Actual Time Spent', tooltip: 'How many hours were actually spent on this task?', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)}h`, }, technicalDebtHours: { unit: 'hours', scale: 'Lower is better', description: 'Technical Debt', tooltip: 'How many hours of future work does this introduce? (lower is better)', format: (v: number) => `${(Math.round(v * 10) / 10).toFixed(1)}h`, }, }; /** * Detect agent name from result metadata or content */ function detectAgentName(result: AgentResult, idx: number): string { // Prefer agent name from orchestrator if available if (result.agentName) { // Map long descriptions to short names return AGENT_NAME_MAP[result.agentName] || result.agentName; } // Fallback to content-based detection (less reliable) const summary = result.summary?.toLowerCase() || ''; const details = result.details?.toLowerCase() || ''; const combined = summary + ' ' + details; if ( combined.includes('business analyst') || combined.includes('functional impact') || combined.includes('ideal time') ) { return 'Business Analyst'; } if ( combined.includes('sdet') || combined.includes('test automation') || combined.includes('testing framework') || combined.includes('test infrastructure') ) { return 'SDET'; } if ( combined.includes('developer author') || combined.includes('actual time') || combined.includes('spent about') ) { return 'Developer Author'; } if ( combined.includes('senior architect') || combined.includes('code complexity') || combined.includes('technical debt') ) { return 'Senior Architect'; } if ( combined.includes('developer reviewer') || combined.includes('code quality') || combined.includes('refactoring') ) { return 'Developer Reviewer'; } return `Agent ${idx + 1}`; } /** * Extract concerns raised from agent details */ function extractConcerns(details: string): string[] { const concerns: string[] = []; const concernPatterns = [ /(?:concern|worried|issue|problem|risk)[^.!?]*[.!?]/gi, /(?:missing|lacking|no)[^.!?]*(?:test|coverage|validation)[^.!?]*[.!?]/gi, ]; concernPatterns.forEach((pattern) => { const matches = details.match(pattern); if (matches) { concerns.push(...matches.map((m) => m.trim())); } }); return concerns.slice(0, 3); // Limit to top 3 concerns } /** * Extract references to other agents */ function extractReferences(summary: string, details: string): string[] { const combined = summary + ' ' + details; const references: string[] = []; const agentNames = [ 'Business Analyst', 'SDET', 'Developer Author', 'Senior Architect', 'Developer Reviewer', ]; agentNames.forEach((name) => { if (combined.toLowerCase().includes(name.toLowerCase())) { references.push(name); } }); return [...new Set(references)]; // Deduplicate } /** * Group results by agent and round (handles sequential round-robin ordering) */ function groupResultsByAgent(results: AgentResult[]): Map { const grouped = new Map(); const agentSeenInRound = new Map>(); const iconMap: Record = { 'Business Analyst': '👔', SDET: '🤖', 'Developer Author': '👨‍💻', 'Senior Architect': '🏛️', 'Developer Reviewer': '💻', }; const colorMap: Record = { 'Business Analyst': 'info', SDET: 'warning', 'Developer Author': 'success', 'Senior Architect': 'primary', 'Developer Reviewer': 'secondary', }; // First pass: detect unique agents const uniqueAgents = new Set(); results.forEach((result, idx) => { const agentName = detectAgentName(result, idx); uniqueAgents.add(agentName); }); const agentCount = uniqueAgents.size; console.log(`Detected ${agentCount} unique agents:`, Array.from(uniqueAgents)); // Second pass: assign rounds based on agent repetition // Track which agents we've seen to determine round transitions const agentOccurrences = new Map(); results.forEach((result, idx) => { const agentName = detectAgentName(result, idx); // Increment occurrence count for this agent const occurrences = (agentOccurrences.get(agentName) || 0) + 1; agentOccurrences.set(agentName, occurrences); // Round is determined by how many times we've seen this agent const round = occurrences; // Use structured concerns from agent if available, fallback to regex extraction const concerns = result.concerns && result.concerns.length > 0 ? result.concerns : extractConcerns(result.details || ''); const references = extractReferences(result.summary || '', result.details || ''); const evaluation: AgentEvaluation = { agentName, agentRole: result.agentRole, // Preserve technical key for weight lookup icon: iconMap[agentName] || '🤖', color: colorMap[agentName] || 'secondary', round, summary: result.summary || 'No summary provided', details: result.details || 'No details provided', metrics: result.metrics, concernsRaised: concerns, referencesTo: references.filter((ref) => ref !== agentName), }; const existing = grouped.get(agentName) || []; grouped.set(agentName, [...existing, evaluation]); // Track which rounds we've seen this agent in if (!agentSeenInRound.has(agentName)) { agentSeenInRound.set(agentName, new Set()); } agentSeenInRound.get(agentName)!.add(round); }); // Log summary grouped.forEach((evals, agent) => { console.log( `${agent}: ${evals.length} responses (rounds: ${evals.map((e) => e.round).join(', ')})` ); }); return grouped; } /** * Calculate metric evolution across rounds (dynamic for ANY number of rounds) */ function calculateMetricEvolution( groupedResults: Map ): MetricEvolution[] { const metricMap = new Map(); // Group all evaluations by round const roundMap = new Map(); groupedResults.forEach((evaluations) => { evaluations.forEach((evaluation) => { if (!roundMap.has(evaluation.round)) { roundMap.set(evaluation.round, []); } roundMap.get(evaluation.round)!.push(evaluation); }); }); // For each metric, calculate consensus score per round SEVEN_PILLARS.forEach((metric: PillarName) => { const metricEvolution: MetricEvolution = { metric, rounds: new Map(), changed: false, }; // Process each round in order Array.from(roundMap.keys()) .sort((a, b) => a - b) .forEach((round) => { const evaluationsInRound = roundMap.get(round)!; // Build contributor list for this round and metric const contributors: Array<{ agentName: string; score: number | null; weight: number }> = []; evaluationsInRound.forEach((evaluation) => { if (evaluation.metrics && metric in evaluation.metrics) { const score = evaluation.metrics[metric]; const agentKey = evaluation.agentRole || evaluation.agentName; const weight = getAgentWeight(agentKey, metric); contributors.push({ agentName: evaluation.agentName, score, weight }); } }); // Calculate weighted consensus for this round if (contributors.length > 0) { const consensusScore = calculateWeightedAverage( contributors.map((c) => ({ agentName: c.agentName, score: c.score })), metric ) || 0; metricEvolution.rounds.set(round, consensusScore); // Check if value changed from first round const firstRound = Math.min(...Array.from(metricEvolution.rounds.keys())); const firstValue = metricEvolution.rounds.get(firstRound); if (round > firstRound && firstValue !== undefined) { metricEvolution.changed = metricEvolution.changed || Math.abs(firstValue - consensusScore) > 0.01; } } }); metricMap.set(metric, metricEvolution); }); return Array.from(metricMap.values()); } /** * Calculate consensus values with contributor tracking */ function calculateConsensusValues(groupedResults: Map): Map< string, { value: number | null; contributors: Array<{ name: string; score: number | null; weight: number }>; } > { // Collect metrics const allMetrics = new Set(); groupedResults.forEach((evaluations) => { evaluations.forEach((evaluation) => { if (evaluation.metrics) { Object.keys(evaluation.metrics) .filter((metric) => SEVEN_PILLARS.includes(metric as PillarName)) .forEach((metric) => allMetrics.add(metric)); } }); }); // Build agent-metric matrix and agentName -> agentRole mapping const agentMetrics = new Map>(); const agentRoleMap = new Map(); groupedResults.forEach((evaluations, agentName) => { const latestEval = evaluations[evaluations.length - 1]; if (latestEval.metrics) { const filteredMetrics = Object.fromEntries( Object.entries(latestEval.metrics).filter( ([metric, value]) => SEVEN_PILLARS.includes(metric as PillarName) && (typeof value === 'number' || value === null) ) ); agentMetrics.set(agentName, new Map(Object.entries(filteredMetrics))); } if (latestEval.agentRole) { agentRoleMap.set(agentName, latestEval.agentRole); } }); // Calculate weighted averages for final values const finalValues = new Map< string, { value: number | null; contributors: Array<{ name: string; score: number | null; weight: number }>; } >(); allMetrics.forEach((metric) => { const contributors: Array<{ name: string; score: number | null; weight: number }> = []; agentMetrics.forEach((metrics, agentName) => { if (metrics.has(metric)) { const score = metrics.get(metric)!; // Can be number or null const agentKey = agentRoleMap.get(agentName) || agentName; const weight = getAgentWeight(agentKey, metric as PillarName); contributors.push({ name: agentName, score, weight }); } }); if (contributors.length > 0) { const weightedAvg = calculateWeightedAverage( contributors.map((c) => ({ agentName: agentRoleMap.get(c.name) || c.name, score: c.score, })), metric as PillarName ); finalValues.set(metric, { value: weightedAvg, contributors }); } }); return finalValues; } /** * Build comprehensive metrics table showing all agent contributions */ function buildMetricsTable(groupedResults: Map): string { // Collect metrics, filtering to ONLY the 7 pillars const allMetrics = new Set(); groupedResults.forEach((evaluations) => { evaluations.forEach((evaluation) => { if (evaluation.metrics) { Object.keys(evaluation.metrics) .filter((metric) => SEVEN_PILLARS.includes(metric as PillarName)) .forEach((metric) => allMetrics.add(metric)); } }); }); // Build agent-metric matrix and agentName -> agentRole mapping const agentMetrics = new Map>(); const agentRoleMap = new Map(); // Maps display name to technical key groupedResults.forEach((evaluations, agentName) => { const latestEval = evaluations[evaluations.length - 1]; // Use latest response if (latestEval.metrics) { // Filter metrics to ONLY the 7 pillars, allow both numbers and null const filteredMetrics = Object.fromEntries( Object.entries(latestEval.metrics).filter( ([metric, value]) => SEVEN_PILLARS.includes(metric as PillarName) && (typeof value === 'number' || value === null) ) ); agentMetrics.set(agentName, new Map(Object.entries(filteredMetrics))); } // Store agentRole for weight lookup if (latestEval.agentRole) { agentRoleMap.set(agentName, latestEval.agentRole); } }); // Calculate weighted averages for final values (using weights from constants) const finalValues = new Map< string, { value: number | null; contributors: Array<{ name: string; score: number | null; weight: number }>; } >(); allMetrics.forEach((metric) => { const contributors: Array<{ name: string; score: number | null; weight: number }> = []; agentMetrics.forEach((metrics, agentName) => { if (metrics.has(metric)) { const score = metrics.get(metric)!; // Can be number or null // Use agentRole (technical key) for weight lookup, fallback to agentName const agentKey = agentRoleMap.get(agentName) || agentName; const weight = getAgentWeight(agentKey, metric as PillarName); contributors.push({ name: agentName, score, weight }); } }); if (contributors.length > 0) { // Calculate weighted average using agentRole (technical keys) const weightedAvg = calculateWeightedAverage( contributors.map((c) => ({ agentName: agentRoleMap.get(c.name) || c.name, // Use agentRole for weight lookup score: c.score, })), metric as PillarName ); finalValues.set(metric, { value: weightedAvg, contributors }); } }); // Build HTML table const metricLabels = Array.from(allMetrics).map((m) => m .replace(/([A-Z])/g, ' $1') .replace(/^./, (str) => str.toUpperCase()) .trim() ); return `
📊 Comprehensive Metrics Analysis
${Array.from(agentMetrics.keys()) .map((agent) => ``) .join('')} ${Array.from(allMetrics) .map((metric, idx) => { const final = finalValues.get(metric); return ` ${Array.from(agentMetrics.keys()) .map((agent) => { const value = agentMetrics.get(agent)?.get(metric); // Use agentRole for weight lookup const agentKey = agentRoleMap.get(agent) || agent; const weight = getAgentWeight(agentKey, metric as PillarName); // Ensure weight is a number before calling toFixed if (typeof weight !== 'number') { return ``; } const weightPercent = (weight * 100).toFixed(1); const isPrimary = weight >= 0.4; // Primary expertise threshold const badgeClass = isPrimary ? 'badge bg-warning text-dark' : 'badge bg-secondary'; // Ensure value is a valid number if (value !== undefined && typeof value === 'number' && isFinite(value)) { return ``; } return ``; }) .join('')} `; }) .join('')}
Metric / Pillar${agent}Final Agreed
${metricLabels[idx]}-
${(Number(value) || 0).toFixed(2)}
${weightPercent}%
- ${final && typeof final.value === 'number' && isFinite(final.value) ? `${(Number(final.value) || 0).toFixed(2)}
(weighted avg from ${final.contributors.length} agent${final.contributors.length > 1 ? 's' : ''})` : '-
(no data)'}
📊 Weighted Scoring System:
Each agent evaluates all 7 pillars, but their expertise determines the weight of their opinion:
  • 40-45% = PRIMARY expertise (agent's specialization)
  • 15-21% = Secondary opinion (related expertise)
  • 8-14% = Tertiary opinion (general perspective)
Final Agreed: Calculated using weighted average where expert opinions carry more weight. Formula: Σ(agent_score × agent_weight) / Σ(agent_weight)
`; } /** * Load evaluation history from disk */ function loadEvaluationHistory(outputDir: string): EvaluationHistoryEntry[] { try { const historyPath = path.join(outputDir, 'history.json'); if (!fs.existsSync(historyPath)) { return []; } const content = fs.readFileSync(historyPath, 'utf-8'); return JSON.parse(content); } catch (error) { return []; } } /** * Calculate statistics from history data */ function calculateHistoryStatistics( history: EvaluationHistoryEntry[], metrics: string[] ): Record { const stats: Record = {}; metrics.forEach((metric) => { const values = history .map((h) => { let val = (h.metrics as any)[metric]; // Backward compatibility: default to 0 for debtReductionHours if not present in old evaluations if (metric === 'debtReductionHours' && val === undefined) { val = 0; } return val; }) .filter((v) => typeof v === 'number'); if (values.length === 0) return; const avg = values.reduce((a, b) => a + b, 0) / values.length; const sorted = [...values].sort((a, b) => a - b); const median = sorted.length % 2 === 0 ? (sorted[sorted.length / 2 - 1] + sorted[sorted.length / 2]) / 2 : sorted[Math.floor(sorted.length / 2)]; const variance = values.reduce((sum, val) => sum + Math.pow(val - avg, 2), 0) / values.length; const stdDev = Math.sqrt(variance); const min = Math.min(...values); const max = Math.max(...values); const range = max - min; // Calculate trend (last value - first value) const firstVal = values[0]; const lastVal = values[values.length - 1]; const trend = lastVal - firstVal; const trendDir = trend > 0.1 ? '📈 Increasing' : trend < -0.1 ? '📉 Decreasing' : '→ Stable'; stats[metric] = { avg: avg.toFixed(2), median: median.toFixed(2), stdDev: stdDev.toFixed(2), min: min.toFixed(2), max: max.toFixed(2), range: range.toFixed(2), trend: trendDir, values, }; }); return stats; } /** * Generate history comparison HTML */ function generateHistoryHtml(history: EvaluationHistoryEntry[], modelInfo?: string): string { if (history.length === 0) { return '

No re-evaluation history available yet. This is the first evaluation.

'; } if (history.length === 1) { return '

Only one evaluation recorded. History comparison will appear after re-evaluations.

'; } // Build comparison tables - Evaluations as ROWS, Metrics as COLUMNS const allMetrics = SEVEN_PILLARS; const stats = calculateHistoryStatistics(history, [...allMetrics]); // Build evaluation rows (each row is one evaluation with all metrics as columns) const evaluationRows = history .map((h, evalIdx) => { const timestamp = new Date(h.timestamp).toLocaleString(); const sourceLabel = h.source === 'batch' ? '🔄 Batch' : '📝 Manual'; let rowHtml = ` Evaluation #${h.evaluationNumber}
${timestamp}
${sourceLabel} `; allMetrics.forEach((metric: string) => { // Backward compatibility: default to 0 for debtReductionHours if not present in old evaluations let val = (h.metrics as any)[metric]; if (metric === 'debtReductionHours' && val === undefined) { val = 0; } const displayVal = typeof val === 'number' ? val.toFixed(1) : 'N/A'; // Calculate change from previous evaluation let changeHtml = ''; if (evalIdx > 0) { let prevVal = (history[evalIdx - 1].metrics as any)[metric]; // Backward compatibility for previous eval as well if (metric === 'debtReductionHours' && prevVal === undefined) { prevVal = 0; } if (typeof val === 'number' && typeof prevVal === 'number') { const diff = val - prevVal; const direction = diff > 0.05 ? '↑' : diff < -0.05 ? '↓' : '→'; const colorClass = diff > 0.1 ? 'text-danger' : diff < -0.1 ? 'text-success' : 'text-muted'; changeHtml = `
${direction} ${Math.abs(diff).toFixed(2)}`; } } rowHtml += `${displayVal}${changeHtml}`; }); rowHtml += ``; return rowHtml; }) .join(''); // Build metric statistics rows - show final consensus values and history statistics const latestEntry = history[history.length - 1]; const statsRows = allMetrics .map((metric: string) => { const stat = stats[metric]; if (!stat) return ''; // Get final value from latest history entry (weighted consensus) const finalValue = latestEntry.metrics ? (latestEntry.metrics as any)[metric] : 'N/A'; const finalValueStr = typeof finalValue === 'number' ? finalValue.toFixed(2) : 'N/A'; return ` ${metric .replace(/([A-Z])/g, ' $1') .replace(/^./, (s) => s.toUpperCase()) .trim()} final ${finalValueStr} avg ${stat.avg} med ${stat.median} σ ${stat.stdDev} ${stat.min} ${stat.max} ${stat.trend} `; }) .join(''); // Build convergence summary const convergenceScores = history.map((h) => h.convergenceScore * 100); const avgConvergence = convergenceScores.reduce((a, b) => a + b, 0) / convergenceScores.length; const maxConvergence = Math.max(...convergenceScores); const minConvergence = Math.min(...convergenceScores); const convergenceTrend = convergenceScores[convergenceScores.length - 1] - convergenceScores[0]; const metricHeaders = allMetrics .map((m: string) => { const label = m .replace(/([A-Z])/g, ' $1') .replace(/^./, (s) => s.toUpperCase()) .trim(); return `${label}`; }) .join(''); return `
📊 Evaluation History & Statistical Analysis (${history.length} evaluations)
Track metric evolution, convergence trends, and statistical insights${modelInfo ? ' • ' + modelInfo : ''}
📈 Metrics by Evaluation Each row is one evaluation; arrows show change from previous
${metricHeaders} ${evaluationRows}
Evaluation
📊 Statistical Analysis Average, median, std deviation, trend across all evaluations
${statsRows}
Metric Final (Weighted) Average Median Std Dev (σ) Min Max Trend
💾 Token Usage & Cost API resource consumption tracking
${history .map( (h) => ` ` ) .join('')}
Evaluation Input Tokens Output Tokens Total Tokens Cost ($)
Eval #${h.evaluationNumber} ${new Date(h.timestamp).toLocaleString()} ${(h.tokens?.inputTokens || 0).toLocaleString()} ${(h.tokens?.outputTokens || 0).toLocaleString()} ${(h.tokens?.totalTokens || 0).toLocaleString()} $${((h.tokens?.totalCost as any) || 0).toFixed(4)}
Total ${history.reduce((sum, h) => sum + (h.tokens?.inputTokens || 0), 0).toLocaleString()} ${history.reduce((sum, h) => sum + (h.tokens?.outputTokens || 0), 0).toLocaleString()} ${history.reduce((sum, h) => sum + (h.tokens?.totalTokens || 0), 0).toLocaleString()} $${history.reduce((sum, h) => sum + ((h.tokens?.totalCost as any) || 0), 0).toFixed(4)}
🎯 Convergence Analysis Agent consensus metrics across evaluations
Average Convergence ${avgConvergence.toFixed(1)}% Overall agreement level
Highest ${maxConvergence.toFixed(1)}% Best consensus
Lowest ${minConvergence.toFixed(1)}% Most discussion
Trend ${convergenceTrend > 2 ? '📈' : convergenceTrend < -2 ? '📉' : '→'} ${Math.abs(convergenceTrend).toFixed(1)}% ${convergenceTrend > 2 ? 'improving' : convergenceTrend < -2 ? 'declining' : 'stable'}
${history .map((h) => { const score = h.convergenceScore * 100; const scoreClass = score >= 85 ? 'bg-success' : score >= 70 ? 'bg-info' : 'bg-warning'; const scoreLabel = score >= 85 ? 'Excellent' : score >= 70 ? 'Good' : 'Fair'; return `
Eval #${h.evaluationNumber} ${score.toFixed(0)}% ${scoreLabel}
`; }) .join('')}

📊 Interpretation: σ (Sigma) shows metric variability across evaluations. Lower values = more stable metrics. Trend shows direction: ↑ Increasing | ↓ Decreasing | → Stable. Convergence measures agent agreement: 85%+ = Excellent | 70-84% = Good | <70% = Needs more discussion

`; } /** * Generate enhanced HTML report with conversation timeline */ export function generateEnhancedHtmlReport( results: AgentResult[], outputPath: string, metadata?: { commitHash?: string; timestamp?: string; commitAuthor?: string; commitMessage?: string; commitDate?: string; developerOverview?: string; filesChanged?: number; insertions?: number; deletions?: number; } ) { const groupedResults = groupResultsByAgent(results); const metricEvolution = calculateMetricEvolution(groupedResults); const comprehensiveMetricsHtml = buildMetricsTable(groupedResults); // Load and generate evaluation history const outputDir = path.dirname(outputPath); const evaluationHistory = loadEvaluationHistory(outputDir); // Try to load model info from config let modelInfo: string | undefined; try { const configPath = path.join(process.cwd(), '.codewave.config.json'); if (fs.existsSync(configPath)) { const config = JSON.parse(fs.readFileSync(configPath, 'utf-8')); if (config.llm?.provider && config.llm?.model) { modelInfo = `🤖 ${config.llm.provider}/${config.llm.model}`; } } } catch { // Silently fail if config can't be read } const historyHtml = generateHistoryHtml(evaluationHistory, modelInfo); // Calculate consensus values (for comprehensive metrics table) const consensusValues = calculateConsensusValues(groupedResults); // Extract final pillar scores using consensus-based weighted averages // This ensures the 7-Pillar scores match the Final Agreed consensus values const finalPillarScores: Record = {}; // Use consensus values directly - these are already weighted averages consensusValues.forEach((data, metric) => { // Identify the top contributor for attribution const topContributor = data.contributors.reduce((max: any, current: any) => current.weight > max.weight ? current : max ); finalPillarScores[metric] = { value: data.value, agent: topContributor.name, }; }); // Generate 7-Pillar Summary Card // Calculate NET debt for display (technicalDebtHours - debtReductionHours) const technicalDebtValue = finalPillarScores['technicalDebtHours']?.value || 0; const debtReductionValue = finalPillarScores['debtReductionHours']?.value || 0; const netDebtValue = technicalDebtValue - debtReductionValue; // Filter out individual debt metrics and add NET debt instead const displayMetrics = Object.entries(finalPillarScores).filter( ([metric]) => metric !== 'technicalDebtHours' && metric !== 'debtReductionHours' ); // Add NET debt as a composite metric displayMetrics.push([ 'netDebt', { value: netDebtValue, agent: finalPillarScores['technicalDebtHours']?.agent || 'Team', }, ]); const pillarSummaryHtml = `
🎯 7-Pillar Evaluation Summary
${displayMetrics .map(([metric, data]) => { let label = metric; if (metric === 'netDebt') { label = 'Net Debt (−=improve)'; } else { label = metric .replace(/([A-Z])/g, ' $1') .replace(/^./, (str) => str.toUpperCase()) .trim(); } let badgeColor = 'secondary'; let icon = '📊'; // Handle null values if (data.value === null) { badgeColor = 'secondary'; icon = '➖'; } else { // Determine color and icon based on metric type if ( metric.includes('Quality') || metric.includes('Coverage') || metric.includes('Impact') ) { badgeColor = data.value >= 7 ? 'success' : data.value >= 4 ? 'warning' : 'danger'; icon = data.value >= 7 ? '✅' : data.value >= 4 ? '⚠️' : '❌'; } else if (metric.includes('Complexity')) { badgeColor = data.value <= 3 ? 'success' : data.value <= 6 ? 'warning' : 'danger'; icon = data.value <= 3 ? '✅' : data.value <= 6 ? '⚠️' : '❌'; } else if (metric === 'netDebt' || metric.includes('Debt')) { // For NET debt: positive = added debt (bad), negative = debt removed (good) badgeColor = data.value > 0 ? 'danger' : data.value < 0 ? 'success' : 'secondary'; icon = data.value > 0 ? '❌' : data.value < 0 ? '✅' : '➖'; } } const metadata = METRIC_METADATA[metric as keyof typeof METRIC_METADATA]; let formattedValue: string; if (data.value === null) { formattedValue = '-'; } else if (metric === 'netDebt') { formattedValue = `${data.value > 0 ? '+' : ''}${data.value.toFixed(1)}h`; } else { formattedValue = metadata ? metadata.format(data.value) : data.value.toFixed(1); } const scale = metadata ? metadata.scale : metric === 'netDebt' ? 'Positive = added debt, Negative = removed debt' : ''; const tooltip = metadata ? metadata.tooltip : metric === 'netDebt' ? 'Net technical debt: debt introduced minus debt removed' : ''; return `
${icon} ${label}
by ${data.agent}
${scale ? `
📍 ${scale}
` : ''}
${formattedValue}
`; }) .join('')}
`; // Generate Individual Agent Cards let agentCardsHtml = '
'; groupedResults.forEach((evaluations, agentName) => { const latestEval = evaluations[evaluations.length - 1]; const numRounds = evaluations.length; const hasMultipleRounds = numRounds > 1; const agentDescription = AGENT_DESCRIPTIONS[agentName] || ''; agentCardsHtml += `
${latestEval.icon} ${agentName} ${hasMultipleRounds ? `${numRounds} Rounds` : ''}
${agentDescription ? `${agentDescription}` : ''}
📊 Metrics
${ latestEval.metrics ? Object.entries(latestEval.metrics) .map(([key, value]) => { const label = key .replace(/([A-Z])/g, ' $1') .replace(/^./, (str) => str.toUpperCase()) .trim(); return `${label}: ${value}`; }) .join('') : 'No metrics' }
💭 Final Assessment

${latestEval.summary.substring(0, 200)}${latestEval.summary.length > 200 ? '...' : ''}

${ latestEval.concernsRaised.length > 0 ? `
⚠️ Concerns (Round ${latestEval.round})
    ${latestEval.concernsRaised.map((concern) => `
  • ${concern}
  • `).join('')}
` : '' }
`; }); agentCardsHtml += '
'; // Generate Conversation Timeline with round phases let timelineHtml = ''; const allEvaluations: AgentEvaluation[] = []; groupedResults.forEach((evals) => allEvaluations.push(...evals)); // Sort by round first, then by agent allEvaluations.sort((a, b) => { if (a.round !== b.round) return a.round - b.round; return a.agentName.localeCompare(b.agentName); }); const roundPhases = [ { title: 'Initial Analysis', description: 'Initial evaluation from all agents', emoji: '🔍' }, { title: 'Concerns & Questions', description: 'Agents discuss findings and address concerns', emoji: '❓', }, { title: 'Validation', description: 'Final consensus and validation', emoji: '✅' }, ]; let currentRound = -1; allEvaluations.forEach((evaluation) => { if (evaluation.round !== currentRound) { currentRound = evaluation.round; const roundIndex = currentRound - 1; // Convert to 0-based for phase lookup const phase = roundPhases[roundIndex] || { title: `Round ${currentRound} `, description: '', emoji: '🔄', }; timelineHtml += `
${phase.emoji}

Round ${currentRound}: ${phase.title}

${phase.description}

`; } // Simplified card with only essential information const concernsHtml = evaluation.concernsRaised.length > 0 ? `
Concerns:
    ${evaluation.concernsRaised.map((c) => `
  • ${c}
  • `).join('')}
` : ''; const referencesHtml = evaluation.referencesTo.length > 0 ? `
💬 References: ${evaluation.referencesTo.join(', ')}
` : ''; timelineHtml += `
${evaluation.icon} ${evaluation.agentName} Round ${evaluation.round}

${evaluation.summary}

${concernsHtml} ${referencesHtml}
`; }); timelineHtml = `
${timelineHtml}
`; // Determine min and max rounds from evaluations const allRounds = Array.from(groupedResults.values()).flatMap((evals) => evals.map((e) => e.round) ); const minRound = Math.min(...allRounds); const maxRound = Math.max(...allRounds); // Map metric keys to full names const metricNames: Record = { functionalImpact: 'Functional Impact', idealTimeHours: 'Ideal Time Estimate', testCoverage: 'Test Coverage', codeQuality: 'Code Quality', codeComplexity: 'Code Complexity', actualTimeHours: 'Actual Time Spent', technicalDebtHours: 'Technical Debt', debtReductionHours: 'Debt Reduction', netDebt: 'NET Debt (−=improve)', }; // Generate Metric Evolution Table - ROUNDS AS ROWS, METRICS AS COLUMNS const evolutionHtml = `
📈 Metric Evolution Across Rounds
${metricEvolution .map((evolution) => { const fullName = metricNames[evolution.metric] || evolution.metric.replace(/([A-Z])/g, ' $1').trim(); return ``; }) .join('')} ${Array.from({ length: maxRound - minRound + 1 }, (_, idx) => { const roundNum = minRound + idx; const roundIndex = roundNum - 1; // Convert to 0-based index for phase lookup const roundPhases = ['Initial Analysis', 'Concerns & Questions', 'Validation']; const phaseLabel = roundPhases[roundIndex] || `Round ${roundNum}`; return ` ${metricEvolution .map((evolution) => { const value = evolution.rounds.get(roundNum); const previousValue = roundNum > minRound ? evolution.rounds.get(roundNum - 1) : undefined; let cellContent = value !== undefined && value !== null ? value.toFixed(1) : '—'; let cellStyle = ''; // Add change indicator if ( value !== undefined && value !== null && previousValue !== undefined && previousValue !== null ) { const diff = value - previousValue; if (Math.abs(diff) > 0.05) { const arrow = diff > 0 ? '↑' : '↓'; const color = diff > 0 ? '#28a745' : '#dc3545'; cellContent = '' + arrow + ' ' + cellContent + ''; cellStyle = 'background-color: rgba(0,0,0,0.02);'; } } return ( '' ); }) .join('')} ${(() => { const techDebtMetric = metricEvolution.find( (e) => e.metric === 'technicalDebtHours' ); const debtReductionMetric = metricEvolution.find( (e) => e.metric === 'debtReductionHours' ); const techDebtValue = techDebtMetric?.rounds.get(roundNum) ?? 0; const debtReductionValue = debtReductionMetric?.rounds.get(roundNum) ?? 0; const netDebt = techDebtValue - debtReductionValue; const prevTechDebt = roundNum > minRound ? (techDebtMetric?.rounds.get(roundNum - 1) ?? 0) : undefined; const prevDebtReduction = roundNum > minRound ? (debtReductionMetric?.rounds.get(roundNum - 1) ?? 0) : undefined; const prevNetDebt = prevTechDebt !== undefined && prevDebtReduction !== undefined ? prevTechDebt - prevDebtReduction : undefined; let netDebtContent = netDebt.toFixed(1); let netDebtStyle = ''; let netDebtColor = netDebt > 0 ? '#dc3545' : netDebt < 0 ? '#28a745' : '#6c757d'; // Add change indicator if (prevNetDebt !== undefined) { const diff = netDebt - prevNetDebt; if (Math.abs(diff) > 0.05) { const arrow = diff > 0 ? '↑' : '↓'; const arrowColor = diff > 0 ? '#dc3545' : '#28a745'; netDebtContent = '' + arrow + ' ' + netDebtContent + ''; netDebtStyle = 'background-color: rgba(0,0,0,0.02);'; } } return ( '' ); })()} `; }).join('')}
Round${fullName}NET Debt (−=improve)
${roundIndex === 0 ? '🔍' : roundIndex === 1 ? '❓' : roundIndex === 2 ? '✅' : '🔄'} Round ${roundNum} ' + cellContent + '' + netDebtContent + '
📍 Legend: ↑ Increased | ↓ Decreased | — Not evaluated in this round
`; // Generate full HTML const html = ` Commit Evaluation Report - Conversation View
← Back to Index

🌊 CodeWave Analysis Report

AI-Powered Commit Intelligence

${metadata?.commitHash ? `
Commit: ${metadata.commitHash}
` : ''} ${metadata?.commitAuthor ? `
Author: ${metadata.commitAuthor}
` : ''} ${metadata?.commitMessage ? `
${metadata.commitMessage}
` : ''} Generated on ${metadata?.timestamp || new Date().toLocaleString()}
${ metadata?.commitHash || metadata?.commitAuthor ? `
📝 Commit Overview
${ metadata?.commitHash ? `
📌 Commit Hash:
${metadata.commitHash}
` : '' } ${ metadata?.commitAuthor ? `
👤 Author:
${metadata.commitAuthor}
` : '' } ${ metadata?.commitDate ? `
📅 Date:
${new Date(metadata.commitDate).toLocaleString()}
` : '' } ${ metadata?.commitMessage ? `
💬 Commit Message:
${metadata.commitMessage}
` : '' } ${ metadata?.filesChanged !== undefined || metadata?.insertions !== undefined || metadata?.deletions !== undefined ? `
📊 Commit Statistics:
${ metadata?.filesChanged !== undefined ? `
${metadata.filesChanged}
Files Changed
` : '' } ${ metadata?.insertions !== undefined ? `
+${metadata.insertions}
Insertions
` : '' } ${ metadata?.deletions !== undefined ? `
-${metadata.deletions}
Deletions
` : '' }
` : '' }
` : '' }
👨‍💻 Developer Overview
${ metadata?.developerOverview ? `
${metadata.developerOverview}
` : `
💡 Developer overview not yet generated. This section is populated when the Developer Author agent provides insights about implementation decisions, trade-offs, and actual time spent on the changes.
` }
🔄 3-Round Conversation Process

This commit was evaluated through a multi-agent conversation in 3 rounds:

  1. Round 1 - Initial Assessment: Each agent independently analyzes the commit and provides their initial evaluation.
  2. Round 2 - Raising Concerns: Agents review each other's assessments and raise questions or concerns to the responsible agent for specific areas.
  3. Round 3 - Validation & Agreement: Agents respond to concerns, refine their scores, and reach consensus on the final evaluation.

💡 The scores shown below represent the final agreed-upon values from Round 3, while agent results display the last refined assessment from each agent.

${pillarSummaryHtml}

👥 Individual Agent Assessments

${agentCardsHtml}

💬 Conversation Flow

Follow the discussion between agents across ${Math.max(...allEvaluations.map((e) => e.round))} rounds. Agents reference each other's concerns and build consensus.

${timelineHtml}

📊 Comprehensive Metrics Analysis

${comprehensiveMetricsHtml}

📈 Metric Evolution Across Rounds

${evolutionHtml}
${ results.some((r: AgentResult) => r.internalIterations !== undefined) ? `

🔄 Agent Refinement Journey

Each agent iteratively refines their analysis to reach confidence in their assessment. This tab shows the self-refinement process and clarity progression for each agent.

${Array.from(groupedResults.entries()) .map(([agentName, evals]) => { const latestEval = evals[evals.length - 1]; const result = results.find((r) => { const rAgentName = detectAgentName(r, 0); return rAgentName === agentName; }); const iterations = result?.internalIterations || 0; const clarity = result?.clarityScore || 0; return iterations > 0 ? `
${latestEval.icon} ${agentName} 🔄 ${iterations} iterations
Clarity Score:
${clarity}%

This agent refined their analysis through ${iterations} self-iteration cycles, progressively improving their confidence from internal gap analysis and question generation.

${ result?.refinementNotes && result.refinementNotes.length > 0 ? `
Refinement Notes:
    ${result.refinementNotes.map((note) => `
  • ${note}
  • `).join('')}
` : '' } ${ result?.missingInformation && result.missingInformation.length > 0 ? `
Final Gaps Identified:
    ${result.missingInformation .slice(0, 3) .map((gap) => `
  • ${gap}
  • `) .join('')}
` : '' }
` : ''; }) .join('')}
` : '' } ${ evaluationHistory.length > 0 ? `

📈 Evaluation History & Comparisons

Track how metrics and costs have changed across multiple evaluations of this commit. This helps identify consistency, model drift, and cost optimization opportunities.

${historyHtml}
` : '' }
Generated by CodeWave with LangGraph Multi-Agent System
`; fs.writeFileSync(outputPath, html, 'utf-8'); }