| Metric / Pillar | ${Array.from(agentMetrics.keys()) .map((agent) => `${agent} | `) .join('')}Final Agreed | ||
|---|---|---|---|---|
| ${metricLabels[idx]} | ${Array.from(agentMetrics.keys()) .map((agent) => { const value = agentMetrics.get(agent)?.get(metric); // Use agentRole for weight lookup const agentKey = agentRoleMap.get(agent) || agent; const weight = getAgentWeight(agentKey, metric as PillarName); // Ensure weight is a number before calling toFixed if (typeof weight !== 'number') { return `- | `; } const weightPercent = (weight * 100).toFixed(1); const isPrimary = weight >= 0.4; // Primary expertise threshold const badgeClass = isPrimary ? 'badge bg-warning text-dark' : 'badge bg-secondary'; // Ensure value is a valid number if (value !== undefined && typeof value === 'number' && isFinite(value)) { return `
${(Number(value) || 0).toFixed(2)}
${weightPercent}%
| `;
}
return `- | `; }) .join('')}
${final && typeof final.value === 'number' && isFinite(final.value) ? `${(Number(final.value) || 0).toFixed(2)} (weighted avg from ${final.contributors.length} agent${final.contributors.length > 1 ? 's' : ''})` : '- (no data)'} |
Σ(agent_score × agent_weight) / Σ(agent_weight)
No re-evaluation history available yet. This is the first evaluation.
'; } if (history.length === 1) { return 'Only one evaluation recorded. History comparison will appear after re-evaluations.
'; } // Build comparison tables - Evaluations as ROWS, Metrics as COLUMNS const allMetrics = SEVEN_PILLARS; const stats = calculateHistoryStatistics(history, [...allMetrics]); // Build evaluation rows (each row is one evaluation with all metrics as columns) const evaluationRows = history .map((h, evalIdx) => { const timestamp = new Date(h.timestamp).toLocaleString(); const sourceLabel = h.source === 'batch' ? '🔄 Batch' : '📝 Manual'; let rowHtml = `| Evaluation | ${metricHeaders}
|---|
| Metric | Final (Weighted) | Average | Median | Std Dev (σ) | Min | Max | Trend |
|---|
| Evaluation | Input Tokens | Output Tokens | Total Tokens | Cost ($) |
|---|---|---|---|---|
| Eval #${h.evaluationNumber} ${new Date(h.timestamp).toLocaleString()} | ${(h.tokens?.inputTokens || 0).toLocaleString()} | ${(h.tokens?.outputTokens || 0).toLocaleString()} | ${(h.tokens?.totalTokens || 0).toLocaleString()} | $${((h.tokens?.totalCost as any) || 0).toFixed(4)} |
| Total | ${history.reduce((sum, h) => sum + (h.tokens?.inputTokens || 0), 0).toLocaleString()} | ${history.reduce((sum, h) => sum + (h.tokens?.outputTokens || 0), 0).toLocaleString()} | ${history.reduce((sum, h) => sum + (h.tokens?.totalTokens || 0), 0).toLocaleString()} | $${history.reduce((sum, h) => sum + ((h.tokens?.totalCost as any) || 0), 0).toFixed(4)} |
📊 Interpretation: σ (Sigma) shows metric variability across evaluations. Lower values = more stable metrics. Trend shows direction: ↑ Increasing | ↓ Decreasing | → Stable. Convergence measures agent agreement: 85%+ = Excellent | 70-84% = Good | <70% = Needs more discussion
${latestEval.summary.substring(0, 200)}${latestEval.summary.length > 200 ? '...' : ''}
${ latestEval.concernsRaised.length > 0 ? `${phase.description}
${evaluation.summary}
${concernsHtml} ${referencesHtml}| Round | ${metricEvolution .map((evolution) => { const fullName = metricNames[evolution.metric] || evolution.metric.replace(/([A-Z])/g, ' $1').trim(); return `${fullName} | `; }) .join('')}NET Debt (−=improve) |
|---|---|---|
| ${roundIndex === 0 ? '🔍' : roundIndex === 1 ? '❓' : roundIndex === 2 ? '✅' : '🔄'} Round ${roundNum} | ${metricEvolution .map((evolution) => { const value = evolution.rounds.get(roundNum); const previousValue = roundNum > minRound ? evolution.rounds.get(roundNum - 1) : undefined; let cellContent = value !== undefined && value !== null ? value.toFixed(1) : '—'; let cellStyle = ''; // Add change indicator if ( value !== undefined && value !== null && previousValue !== undefined && previousValue !== null ) { const diff = value - previousValue; if (Math.abs(diff) > 0.05) { const arrow = diff > 0 ? '↑' : '↓'; const color = diff > 0 ? '#28a745' : '#dc3545'; cellContent = '' + arrow + ' ' + cellContent + ''; cellStyle = 'background-color: rgba(0,0,0,0.02);'; } } return ( '' + cellContent + ' | ' ); }) .join('')} ${(() => { const techDebtMetric = metricEvolution.find( (e) => e.metric === 'technicalDebtHours' ); const debtReductionMetric = metricEvolution.find( (e) => e.metric === 'debtReductionHours' ); const techDebtValue = techDebtMetric?.rounds.get(roundNum) ?? 0; const debtReductionValue = debtReductionMetric?.rounds.get(roundNum) ?? 0; const netDebt = techDebtValue - debtReductionValue; const prevTechDebt = roundNum > minRound ? (techDebtMetric?.rounds.get(roundNum - 1) ?? 0) : undefined; const prevDebtReduction = roundNum > minRound ? (debtReductionMetric?.rounds.get(roundNum - 1) ?? 0) : undefined; const prevNetDebt = prevTechDebt !== undefined && prevDebtReduction !== undefined ? prevTechDebt - prevDebtReduction : undefined; let netDebtContent = netDebt.toFixed(1); let netDebtStyle = ''; let netDebtColor = netDebt > 0 ? '#dc3545' : netDebt < 0 ? '#28a745' : '#6c757d'; // Add change indicator if (prevNetDebt !== undefined) { const diff = netDebt - prevNetDebt; if (Math.abs(diff) > 0.05) { const arrow = diff > 0 ? '↑' : '↓'; const arrowColor = diff > 0 ? '#dc3545' : '#28a745'; netDebtContent = '' + arrow + ' ' + netDebtContent + ''; netDebtStyle = 'background-color: rgba(0,0,0,0.02);'; } } return ( '' + netDebtContent + ' | ' ); })()}
AI-Powered Commit Intelligence
${metadata?.commitHash ? `${metadata.commitHash}
This commit was evaluated through a multi-agent conversation in 3 rounds:
💡 The scores shown below represent the final agreed-upon values from Round 3, while agent results display the last refined assessment from each agent.
Follow the discussion between agents across ${Math.max(...allEvaluations.map((e) => e.round))} rounds. Agents reference each other's concerns and build consensus.
${timelineHtml}Each agent iteratively refines their analysis to reach confidence in their assessment. This tab shows the self-refinement process and clarity progression for each agent.
This agent refined their analysis through ${iterations} self-iteration cycles, progressively improving their confidence from internal gap analysis and question generation.
${ result?.refinementNotes && result.refinementNotes.length > 0 ? `Track how metrics and costs have changed across multiple evaluations of this commit. This helps identify consistency, model drift, and cost optimization opportunities.
${historyHtml}