#!/usr/bin/env python3
"""Generate an HTML review page for eval results.

Creates an interactive HTML page showing eval outputs, grading results,
and benchmark data for human review.

Usage:
    python3 generate_review.py <workspace-dir>
    python3 generate_review.py <workspace-dir> --static output.html
    python3 generate_review.py <workspace-dir> --open

Options:
    --static <path>   Generate a standalone HTML file instead of serving
    --open            Open the generated HTML in the default browser
    --iteration <n>   Show specific iteration (default: latest)
"""

import argparse
import json
import os
import sys
import webbrowser
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from utils import load_json, find_latest_iteration


def collect_eval_data(iteration_dir):
    """Collect all eval data from an iteration directory."""
    iteration_dir = Path(iteration_dir)
    evals = []

    for eval_dir in sorted(iteration_dir.iterdir()):
        if not eval_dir.is_dir() or not eval_dir.name.startswith("eval-"):
            continue

        eval_id = eval_dir.name.replace("eval-", "")
        eval_data = {"id": eval_id, "configs": {}}

        for config in ["with_skill", "without_skill"]:
            config_dir = eval_dir / config
            if not config_dir.exists():
                continue

            config_data = {"outputs": {}, "grading": None, "timing": None}

            # Read outputs
            outputs_dir = config_dir / "outputs"
            if outputs_dir.exists():
                for f in sorted(outputs_dir.iterdir()):
                    if f.is_file():
                        try:
                            config_data["outputs"][f.name] = f.read_text()[:5000]
                        except (UnicodeDecodeError, PermissionError):
                            config_data["outputs"][f.name] = "(binary file)"

            # Read grading
            grading_path = config_dir / "grading.json"
            if grading_path.exists():
                config_data["grading"] = load_json(grading_path)

            # Read timing
            timing_path = config_dir / "timing.json"
            if timing_path.exists():
                config_data["timing"] = load_json(timing_path)

            eval_data["configs"][config] = config_data

        evals.append(eval_data)

    # Read benchmark
    benchmark = None
    benchmark_path = iteration_dir / "benchmark.json"
    if benchmark_path.exists():
        benchmark = load_json(benchmark_path)

    return evals, benchmark


def generate_html(evals, benchmark, iteration_num, workspace_dir):
    """Generate the HTML review page."""
    # Escape JSON for embedding in HTML
    evals_json = json.dumps(evals, indent=2)
    benchmark_json = json.dumps(benchmark, indent=2) if benchmark else "null"

    html = f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Skill Eval Review - Iteration {iteration_num}</title>
    <style>
        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, monospace; background: #0d1117; color: #c9d1d9; }}
        .header {{ background: #161b22; border-bottom: 1px solid #30363d; padding: 16px 24px; display: flex; justify-content: space-between; align-items: center; }}
        .header h1 {{ font-size: 20px; color: #f0f6fc; }}
        .tabs {{ display: flex; gap: 0; border-bottom: 1px solid #30363d; background: #161b22; padding: 0 24px; }}
        .tab {{ padding: 12px 20px; cursor: pointer; border-bottom: 2px solid transparent; color: #8b949e; font-size: 14px; }}
        .tab:hover {{ color: #c9d1d9; }}
        .tab.active {{ color: #f0f6fc; border-bottom-color: #f78166; }}
        .content {{ padding: 24px; max-width: 1400px; margin: 0 auto; }}
        .eval-card {{ background: #161b22; border: 1px solid #30363d; border-radius: 6px; margin-bottom: 16px; overflow: hidden; }}
        .eval-header {{ padding: 12px 16px; background: #1c2128; border-bottom: 1px solid #30363d; display: flex; justify-content: space-between; align-items: center; }}
        .eval-header h3 {{ font-size: 16px; color: #f0f6fc; }}
        .badge {{ padding: 2px 8px; border-radius: 12px; font-size: 12px; font-weight: 600; }}
        .badge.pass {{ background: #1a3a2a; color: #3fb950; }}
        .badge.fail {{ background: #3a1a1a; color: #f85149; }}
        .badge.mixed {{ background: #3a2a1a; color: #d29922; }}
        .eval-body {{ padding: 16px; }}
        .config-columns {{ display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }}
        .config-col {{ background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 12px; }}
        .config-col h4 {{ font-size: 14px; margin-bottom: 8px; color: #8b949e; }}
        .assertion {{ padding: 8px; margin: 4px 0; border-radius: 4px; font-size: 13px; }}
        .assertion.pass {{ background: #1a3a2a; border-left: 3px solid #3fb950; }}
        .assertion.fail {{ background: #3a1a1a; border-left: 3px solid #f85149; }}
        .evidence {{ color: #8b949e; font-size: 12px; margin-top: 4px; }}
        .output-block {{ background: #0d1117; border: 1px solid #30363d; border-radius: 4px; padding: 12px; margin: 8px 0; max-height: 300px; overflow-y: auto; font-size: 13px; white-space: pre-wrap; }}
        .benchmark-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 16px; }}
        .metric-card {{ background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 16px; text-align: center; }}
        .metric-value {{ font-size: 32px; font-weight: 700; color: #f0f6fc; margin: 8px 0; }}
        .metric-label {{ font-size: 13px; color: #8b949e; }}
        .metric-sub {{ font-size: 12px; color: #8b949e; margin-top: 4px; }}
        .feedback-section {{ margin-top: 16px; }}
        .feedback-section textarea {{ width: 100%; height: 80px; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 8px; color: #c9d1d9; font-family: inherit; font-size: 13px; resize: vertical; }}
        .feedback-section select {{ background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 6px 12px; color: #c9d1d9; font-size: 13px; margin-right: 8px; }}
        .btn {{ padding: 8px 16px; border-radius: 6px; border: none; cursor: pointer; font-size: 14px; font-weight: 600; }}
        .btn-primary {{ background: #238636; color: #fff; }}
        .btn-primary:hover {{ background: #2ea043; }}
        .submit-bar {{ position: fixed; bottom: 0; left: 0; right: 0; background: #161b22; border-top: 1px solid #30363d; padding: 12px 24px; display: flex; justify-content: flex-end; gap: 12px; }}
        .hidden {{ display: none; }}
        .bar-chart {{ display: flex; align-items: flex-end; gap: 8px; height: 120px; margin: 16px 0; }}
        .bar {{ flex: 1; border-radius: 4px 4px 0 0; min-width: 30px; position: relative; }}
        .bar.with {{ background: #238636; }}
        .bar.without {{ background: #6e7681; }}
        .bar-label {{ position: absolute; bottom: -20px; left: 50%; transform: translateX(-50%); font-size: 10px; white-space: nowrap; }}
    </style>
</head>
<body>
    <div class="header">
        <h1>Skill Eval Review &mdash; Iteration {iteration_num}</h1>
        <span style="color: #8b949e; font-size: 13px;">{workspace_dir}</span>
    </div>
    <div class="tabs">
        <div class="tab active" onclick="showTab('outputs')">Outputs</div>
        <div class="tab" onclick="showTab('benchmark')">Benchmark</div>
    </div>
    <div id="outputs-tab" class="content"></div>
    <div id="benchmark-tab" class="content hidden"></div>
    <div class="submit-bar">
        <select id="action-select">
            <option value="iterate">Iterate (improve &amp; rerun)</option>
            <option value="publish">Publish (submit to marketplace)</option>
            <option value="stop">Stop (done for now)</option>
        </select>
        <button class="btn btn-primary" onclick="submitFeedback()">Submit All Reviews</button>
    </div>

    <script>
    const evals = {evals_json};
    const benchmark = {benchmark_json};

    function showTab(name) {{
        document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
        document.querySelectorAll('.content').forEach(c => c.classList.add('hidden'));
        event.target.classList.add('active');
        document.getElementById(name + '-tab').classList.remove('hidden');
    }}

    function renderOutputs() {{
        const container = document.getElementById('outputs-tab');
        let html = '';

        evals.forEach(ev => {{
            const wsGrading = ev.configs.with_skill?.grading;
            const woGrading = ev.configs.without_skill?.grading;
            const wsRate = wsGrading ? wsGrading.pass_rate : null;
            const woRate = woGrading ? woGrading.pass_rate : null;

            let badgeClass = 'mixed';
            let badgeText = 'N/A';
            if (wsRate !== null) {{
                if (wsRate === 1.0) {{ badgeClass = 'pass'; badgeText = 'ALL PASS'; }}
                else if (wsRate === 0) {{ badgeClass = 'fail'; badgeText = 'ALL FAIL'; }}
                else {{ badgeText = Math.round(wsRate * 100) + '% pass'; }}
            }}

            html += '<div class="eval-card">';
            html += '<div class="eval-header">';
            html += '<h3>eval-' + ev.id + '</h3>';
            html += '<span class="badge ' + badgeClass + '">' + badgeText + '</span>';
            html += '</div>';
            html += '<div class="eval-body">';
            html += '<div class="config-columns">';

            ['with_skill', 'without_skill'].forEach(config => {{
                const data = ev.configs[config];
                html += '<div class="config-col">';
                html += '<h4>' + config.replace('_', ' ') + '</h4>';

                if (data && data.grading) {{
                    data.grading.expectations.forEach(exp => {{
                        html += '<div class="assertion ' + exp.verdict.toLowerCase() + '">';
                        html += '<strong>' + exp.verdict + '</strong>: ' + exp.assertion;
                        html += '<div class="evidence">' + (exp.evidence || '') + '</div>';
                        html += '</div>';
                    }});
                }}

                if (data && Object.keys(data.outputs).length > 0) {{
                    html += '<details><summary style="cursor:pointer;margin-top:8px;color:#8b949e">Show outputs</summary>';
                    Object.entries(data.outputs).forEach(([name, content]) => {{
                        html += '<div style="margin-top:4px;font-size:12px;color:#8b949e">' + name + '</div>';
                        html += '<div class="output-block">' + escapeHtml(content) + '</div>';
                    }});
                    html += '</details>';
                }}

                if (data && data.timing) {{
                    html += '<div style="margin-top:8px;font-size:12px;color:#8b949e">';
                    html += 'Tokens: ' + data.timing.total_tokens + ' | Time: ' + data.timing.duration_ms + 'ms';
                    html += '</div>';
                }}

                html += '</div>';
            }});

            html += '</div>';
            html += '<div class="feedback-section">';
            html += '<select data-eval="' + ev.id + '" class="eval-rating">';
            html += '<option value="good">Good</option>';
            html += '<option value="needs-work">Needs Work</option>';
            html += '<option value="bad">Bad</option>';
            html += '</select>';
            html += '<textarea data-eval="' + ev.id + '" class="eval-comment" placeholder="Feedback for this eval..."></textarea>';
            html += '</div>';
            html += '</div></div>';
        }});

        container.innerHTML = html;
    }}

    function renderBenchmark() {{
        const container = document.getElementById('benchmark-tab');
        if (!benchmark) {{
            container.innerHTML = '<p style="color:#8b949e">No benchmark data available. Run evals first.</p>';
            return;
        }}

        let html = '<div class="benchmark-grid">';

        const configs = benchmark.configs || {{}};
        ['with_skill', 'without_skill'].forEach(config => {{
            const data = configs[config];
            if (!data) return;

            html += '<div class="metric-card">';
            html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Pass Rate</div>';
            html += '<div class="metric-value">' + Math.round(data.overall_pass_rate * 100) + '%</div>';
            html += '</div>';

            html += '<div class="metric-card">';
            html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Avg Tokens</div>';
            html += '<div class="metric-value">' + Math.round(data.total_tokens_mean) + '</div>';
            if (data.total_tokens_stddev) html += '<div class="metric-sub">&plusmn; ' + Math.round(data.total_tokens_stddev) + '</div>';
            html += '</div>';

            html += '<div class="metric-card">';
            html += '<div class="metric-label">' + config.replace('_', ' ') + ' — Avg Time</div>';
            html += '<div class="metric-value">' + Math.round(data.duration_ms_mean / 1000 * 10) / 10 + 's</div>';
            if (data.duration_ms_stddev) html += '<div class="metric-sub">&plusmn; ' + Math.round(data.duration_ms_stddev) + 'ms</div>';
            html += '</div>';
        }});

        if (benchmark.comparison) {{
            const c = benchmark.comparison;
            html += '<div class="metric-card">';
            html += '<div class="metric-label">Pass Rate Delta</div>';
            html += '<div class="metric-value" style="color:' + (c.pass_rate_delta >= 0 ? '#3fb950' : '#f85149') + '">';
            html += (c.pass_rate_delta >= 0 ? '+' : '') + Math.round(c.pass_rate_delta * 100) + '%';
            html += '</div></div>';

            html += '<div class="metric-card">';
            html += '<div class="metric-label">Token Overhead</div>';
            html += '<div class="metric-value">' + Math.round(c.token_overhead_percent) + '%</div>';
            html += '</div>';
        }}

        html += '</div>';
        container.innerHTML = html;
    }}

    function escapeHtml(text) {{
        const div = document.createElement('div');
        div.textContent = text;
        return div.innerHTML;
    }}

    function submitFeedback() {{
        const feedback = {{
            timestamp: new Date().toISOString(),
            iteration: {iteration_num},
            eval_feedback: [],
            action: document.getElementById('action-select').value
        }};

        document.querySelectorAll('.eval-rating').forEach(select => {{
            const evalId = select.dataset.eval;
            const comment = document.querySelector('.eval-comment[data-eval="' + evalId + '"]').value;
            feedback.eval_feedback.push({{
                eval_id: evalId,
                rating: select.value,
                comment: comment
            }});
        }});

        // Save as downloadable file
        const blob = new Blob([JSON.stringify(feedback, null, 2)], {{ type: 'application/json' }});
        const a = document.createElement('a');
        a.href = URL.createObjectURL(blob);
        a.download = 'feedback.json';
        a.click();
        alert('Feedback saved! Place feedback.json in the workspace directory.');
    }}

    renderOutputs();
    renderBenchmark();
    </script>
</body>
</html>"""
    return html


def main():
    parser = argparse.ArgumentParser(description="Generate HTML eval review page")
    parser.add_argument("workspace", help="Path to skill workspace directory")
    parser.add_argument("--static", help="Output path for standalone HTML file")
    parser.add_argument("--open", action="store_true", help="Open in browser")
    parser.add_argument("--iteration", type=int, help="Specific iteration to show")
    args = parser.parse_args()

    workspace = Path(args.workspace)
    if not workspace.exists():
        print(f"Error: {workspace} not found", file=sys.stderr)
        sys.exit(1)

    # Find iteration
    if args.iteration:
        iteration_num = args.iteration
    else:
        iteration_num = find_latest_iteration(workspace)

    if iteration_num == 0:
        print("Error: No iterations found in workspace", file=sys.stderr)
        sys.exit(1)

    iteration_dir = workspace / f"iteration-{iteration_num}"
    if not iteration_dir.exists():
        print(f"Error: {iteration_dir} not found", file=sys.stderr)
        sys.exit(1)

    # Collect data
    evals, benchmark = collect_eval_data(iteration_dir)
    if not evals:
        print("Error: No eval data found", file=sys.stderr)
        sys.exit(1)

    # Generate HTML
    html = generate_html(evals, benchmark, iteration_num, str(workspace))

    # Output
    output_path = args.static or str(workspace / "review.html")
    Path(output_path).write_text(html)
    print(f"Generated: {output_path}")

    if args.open:
        webbrowser.open(f"file://{os.path.abspath(output_path)}")


if __name__ == "__main__":
    main()
