#!/usr/bin/env tsx /** * AIWG Model Evaluation Suite — CLI entry point * * Usage: * npx tsx src/index.ts [options] * npx tsx src/index.ts hermes3:latest --backend ollama * npx tsx src/index.ts hermes3:latest --dimensions tool-use,coding * * @issue #433 */ import { Command } from 'commander'; import path from 'path'; import { promises as fs } from 'fs'; import { fileURLToPath } from 'url'; import { OllamaModel } from './models/ollama.js'; import { EvalRunner } from './runner.js'; import { generateMarkdownReport } from './reporters/markdown.js'; import { generateJsonReport } from './reporters/json.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const DATASETS_DIR = path.resolve(__dirname, '../datasets'); const program = new Command(); program .name('aiwg-eval') .description('AIWG model evaluation suite') .version('0.1.0') .argument('', 'Model identifier (e.g., hermes3:latest)') .option('--backend ', 'Backend: ollama or api', 'ollama') .option('--dimensions ', 'Comma-separated dimensions to evaluate') .option('--output ', 'Output format: json or markdown', 'markdown') .option('--ollama-url ', 'Ollama API URL', 'http://localhost:11434') .option('--verbose', 'Show detailed progress', false) .action(async (modelId: string, options: Record) => { console.log(`\nAIWG Model Evaluation Suite`); console.log(`Model: ${modelId}`); console.log(`Backend: ${options.backend}`); console.log(`Ollama URL: ${options['ollama-url'] || 'http://localhost:11434'}`); console.log(''); // Create model client const model = new OllamaModel(modelId, options['ollama-url'] as string); // Parse dimensions const dimensions = options.dimensions ? (options.dimensions as string).split(',').map((d: string) => d.trim()) : undefined; // Run evaluation const runner = new EvalRunner(model, DATASETS_DIR); const report = await runner.run({ dimensions, verbose: options.verbose as boolean, }); // Generate report const output = options.output === 'json' ? generateJsonReport(report) : generateMarkdownReport(report); console.log(output); // Save report const reportsDir = path.resolve(__dirname, '../reports'); await fs.mkdir(reportsDir, { recursive: true }); const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); const ext = options.output === 'json' ? 'json' : 'md'; const reportPath = path.join(reportsDir, `eval-${modelId.replace(/[:/]/g, '-')}-${timestamp}.${ext}`); await fs.writeFile(reportPath, output); console.log(`\nReport saved: ${reportPath}`); }); program.parse();