/** * Intelligent Analyzer * * Uses OpenRouter API to intelligently analyze codebases, docs, and tool configurations. * This is the core of the "forcefully intelligent" initialization. */ import fs from 'fs'; import path from 'path'; import { glob } from 'glob'; import { OpenRouterClient, createOpenRouterClient, hasOpenRouterKey } from '../embeddings/openrouter.js'; import { AI_TOOLS, AI_TOOL_FOLDERS, type AITool } from '../db/schema.js'; import { estimateTokens, chunkForEmbedding } from '../utils/chunking.js'; /** * Discovery result for a file */ export interface DiscoveredFile { path: string; relativePath: string; type: 'doc' | 'code' | 'config' | 'tool_config'; tool?: AITool; size: number; content?: string; } /** * Analysis result */ export interface AnalysisResult { summary: string; techStack: { languages: string[]; frameworks: string[]; tools: string[]; }; workflows: Array<{ name: string; description: string; entryPoint: string; steps: string[]; }>; architecture: { pattern: string; components: string[]; integrations: string[]; }; existingContext: { tools: AITool[]; files: DiscoveredFile[]; }; suggestions: { contextFiles: string[]; workflows: string[]; agents: string[]; commands: string[]; }; } /** * Default ignore patterns */ const DEFAULT_IGNORE = [ '**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**', '**/vendor/**', '**/__pycache__/**', '**/target/**', '**/bin/**', '**/obj/**', '**/.next/**', '**/.nuxt/**', '**/*.min.js', '**/*.map', '**/package-lock.json', '**/yarn.lock', '**/pnpm-lock.yaml' ]; /** * Intelligent analyzer using OpenRouter */ export class IntelligentAnalyzer { private client: OpenRouterClient | null = null; private projectRoot: string; constructor(projectRoot: string) { this.projectRoot = projectRoot; // Try to create client if API key available if (hasOpenRouterKey()) { try { this.client = createOpenRouterClient(); } catch (error) { console.warn('OpenRouter client initialization failed:', error); } } } /** * Check if intelligent analysis is available */ isIntelligentModeAvailable(): boolean { return this.client !== null; } /** * Discover all documentation files (.md files) */ async discoverDocs(): Promise { const files: DiscoveredFile[] = []; const mdFiles = await glob('**/*.md', { cwd: this.projectRoot, ignore: DEFAULT_IGNORE, absolute: true }); for (const filePath of mdFiles) { const stats = fs.statSync(filePath); files.push({ path: filePath, relativePath: path.relative(this.projectRoot, filePath), type: 'doc', size: stats.size }); } return files; } /** * Discover AI tool configurations */ async discoverToolConfigs(): Promise { const files: DiscoveredFile[] = []; for (const tool of AI_TOOLS) { const patterns = AI_TOOL_FOLDERS[tool]; for (const pattern of patterns) { const fullPath = path.join(this.projectRoot, pattern); if (fs.existsSync(fullPath)) { const stats = fs.statSync(fullPath); if (stats.isDirectory()) { // Scan directory contents const dirFiles = await glob('**/*', { cwd: fullPath, nodir: true, absolute: true }); for (const filePath of dirFiles) { const fileStats = fs.statSync(filePath); files.push({ path: filePath, relativePath: path.relative(this.projectRoot, filePath), type: 'tool_config', tool, size: fileStats.size }); } } else { files.push({ path: fullPath, relativePath: pattern, type: 'tool_config', tool, size: stats.size }); } } } } return files; } /** * Discover source code files */ async discoverCode(): Promise { const files: DiscoveredFile[] = []; const codePatterns = [ '**/*.js', '**/*.ts', '**/*.tsx', '**/*.jsx', '**/*.py', '**/*.go', '**/*.rs', '**/*.java', '**/*.cs', '**/*.rb', '**/*.php' ]; for (const pattern of codePatterns) { const codeFiles = await glob(pattern, { cwd: this.projectRoot, ignore: DEFAULT_IGNORE, absolute: true }); for (const filePath of codeFiles) { const stats = fs.statSync(filePath); files.push({ path: filePath, relativePath: path.relative(this.projectRoot, filePath), type: 'code', size: stats.size }); } } return files; } /** * Read file content safely */ private readFileContent(filePath: string, maxBytes: number = 50000): string | null { let fd: number | null = null; try { const stats = fs.statSync(filePath); if (stats.size > maxBytes) { // Read only first part of large files fd = fs.openSync(filePath, 'r'); const buffer = Buffer.alloc(maxBytes); fs.readSync(fd, buffer, 0, maxBytes, 0); return buffer.toString('utf-8') + '\n\n... [truncated]'; } return fs.readFileSync(filePath, 'utf-8'); } catch { return null; } finally { if (fd !== null) { try { fs.closeSync(fd); } catch { // Ignore errors when closing the file descriptor } } } } /** * Perform full intelligent analysis */ async analyze(): Promise { // Discover all files const [docs, toolConfigs, codeFiles] = await Promise.all([ this.discoverDocs(), this.discoverToolConfigs(), this.discoverCode() ]); // Determine which AI tools are already configured const configuredTools = new Set(); for (const config of toolConfigs) { if (config.tool) { configuredTools.add(config.tool); } } // Basic analysis (without AI) const basicResult: AnalysisResult = { summary: '', techStack: this.detectTechStackBasic(codeFiles), workflows: [], architecture: { pattern: 'unknown', components: [], integrations: [] }, existingContext: { tools: Array.from(configuredTools), files: [...docs, ...toolConfigs] }, suggestions: { contextFiles: [], workflows: [], agents: [], commands: [] } }; // If OpenRouter is available, perform intelligent analysis if (this.client) { try { const intelligentResult = await this.performIntelligentAnalysis(docs, toolConfigs, codeFiles); return { ...basicResult, ...intelligentResult, existingContext: basicResult.existingContext }; } catch (error) { console.warn('Intelligent analysis failed, falling back to basic:', error); } } // Generate basic summary basicResult.summary = this.generateBasicSummary(docs, toolConfigs, codeFiles, basicResult.techStack); return basicResult; } /** * Detect tech stack without AI */ private detectTechStackBasic(codeFiles: DiscoveredFile[]): AnalysisResult['techStack'] { const languages = new Set(); const frameworks = new Set(); const tools = new Set(); // Detect languages from file extensions for (const file of codeFiles) { const ext = path.extname(file.relativePath).toLowerCase(); switch (ext) { case '.js': case '.mjs': case '.cjs': languages.add('JavaScript'); break; case '.ts': case '.tsx': languages.add('TypeScript'); break; case '.py': languages.add('Python'); break; case '.go': languages.add('Go'); break; case '.rs': languages.add('Rust'); break; case '.java': languages.add('Java'); break; case '.cs': languages.add('C#'); break; case '.rb': languages.add('Ruby'); break; case '.php': languages.add('PHP'); break; } } // Check for common framework indicators const packageJsonPath = path.join(this.projectRoot, 'package.json'); if (fs.existsSync(packageJsonPath)) { try { const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8')); const deps = { ...packageJson.dependencies, ...packageJson.devDependencies }; if (deps.react) frameworks.add('React'); if (deps.vue) frameworks.add('Vue'); if (deps.angular || deps['@angular/core']) frameworks.add('Angular'); if (deps.express) frameworks.add('Express'); if (deps.fastify) frameworks.add('Fastify'); if (deps.next) frameworks.add('Next.js'); if (deps.nest || deps['@nestjs/core']) frameworks.add('NestJS'); tools.add('npm'); } catch { // Ignore parse errors } } // Check for Python indicators const requirementsTxtPath = path.join(this.projectRoot, 'requirements.txt'); const pyprojectPath = path.join(this.projectRoot, 'pyproject.toml'); if (fs.existsSync(requirementsTxtPath) || fs.existsSync(pyprojectPath)) { tools.add('pip'); if (fs.existsSync(pyprojectPath)) { tools.add('poetry'); } } return { languages: Array.from(languages), frameworks: Array.from(frameworks), tools: Array.from(tools) }; } /** * Perform intelligent analysis using OpenRouter */ private async performIntelligentAnalysis( docs: DiscoveredFile[], toolConfigs: DiscoveredFile[], codeFiles: DiscoveredFile[] ): Promise> { if (!this.client) { throw new Error('OpenRouter client not available'); } // Gather sample content for analysis const sampleContent: string[] = []; // Add README and main docs const importantDocs = docs.filter(d => d.relativePath.toLowerCase().includes('readme') || d.relativePath.toLowerCase().includes('claude') || d.relativePath.toLowerCase().includes('context') ).slice(0, 5); for (const doc of importantDocs) { const content = this.readFileContent(doc.path, 10000); if (content) { sampleContent.push(`## File: ${doc.relativePath}\n\n${content}`); } } // Add tool configs for (const config of toolConfigs.slice(0, 5)) { const content = this.readFileContent(config.path, 5000); if (content) { sampleContent.push(`## Tool Config (${config.tool}): ${config.relativePath}\n\n${content}`); } } // Add sample code files (entry points) const entryPoints = codeFiles.filter(f => f.relativePath.includes('index.') || f.relativePath.includes('main.') || f.relativePath.includes('app.') || f.relativePath.includes('server.') ).slice(0, 5); for (const code of entryPoints) { const content = this.readFileContent(code.path, 8000); if (content) { sampleContent.push(`## Code: ${code.relativePath}\n\n${content}`); } } // Create analysis prompt const analysisPrompt = ` Analyze this codebase content and provide a structured analysis: ${sampleContent.join('\n\n---\n\n')} Provide your analysis in the following JSON format: { "summary": "Brief description of what this project does", "techStack": { "languages": ["list of programming languages"], "frameworks": ["list of frameworks"], "tools": ["list of tools and libraries"] }, "workflows": [ { "name": "workflow name", "description": "what it does", "entryPoint": "file:line", "steps": ["step 1", "step 2"] } ], "architecture": { "pattern": "architectural pattern (e.g., MVC, microservices)", "components": ["main components"], "integrations": ["external integrations"] }, "suggestions": { "contextFiles": ["suggested context files to create"], "workflows": ["workflows to document"], "agents": ["agents that would be useful"], "commands": ["commands to implement"] } } Return ONLY valid JSON, no markdown formatting. `; let response: string = ''; try { response = await this.client.chat([ { role: 'system', content: 'You are an expert code analyzer and AI context engineer. Analyze codebases to understand their structure, workflows, and suggest optimal AI context configurations. Always return valid JSON.' }, { role: 'user', content: analysisPrompt } ], { temperature: 0.2, maxTokens: 4096 }); // First, try to parse the whole response as JSON. try { return JSON.parse(response); } catch { // If that fails, attempt to extract the JSON substring between the // first '{' and the last '}' and parse that. if (typeof response === 'string') { const start = response.indexOf('{'); const end = response.lastIndexOf('}'); if (start !== -1 && end !== -1 && end > start) { const jsonSubstring = response.slice(start, end + 1); return JSON.parse(jsonSubstring); } } } } catch (error) { console.warn('Failed to parse intelligent analysis response.'); // Log a preview of the raw response to aid debugging of malformed JSON. try { const preview = typeof response === 'string' ? response.slice(0, 1000) : JSON.stringify(response).slice(0, 1000); console.warn('Raw response preview (truncated to 1000 chars):', preview); } catch { // If preview logging fails for any reason, ignore and just log the error. } console.warn('Error details:', error); } return {}; } /** * Generate basic summary without AI */ private generateBasicSummary( docs: DiscoveredFile[], toolConfigs: DiscoveredFile[], codeFiles: DiscoveredFile[], techStack: AnalysisResult['techStack'] ): string { const parts: string[] = []; parts.push(`Project Analysis Summary`); parts.push(`========================`); parts.push(``); parts.push(`**Files Discovered:**`); parts.push(`- Documentation: ${docs.length} files`); parts.push(`- Source Code: ${codeFiles.length} files`); parts.push(`- AI Tool Configs: ${toolConfigs.length} files`); parts.push(``); parts.push(`**Tech Stack:**`); if (techStack.languages.length > 0) { parts.push(`- Languages: ${techStack.languages.join(', ')}`); } if (techStack.frameworks.length > 0) { parts.push(`- Frameworks: ${techStack.frameworks.join(', ')}`); } if (techStack.tools.length > 0) { parts.push(`- Tools: ${techStack.tools.join(', ')}`); } return parts.join('\n'); } /** * Generate embeddings for discovered content */ async generateEmbeddings(files: DiscoveredFile[]): Promise> { if (!this.client) { throw new Error('OpenRouter client not available for embeddings'); } const embeddings = new Map(); // Process in batches const batchSize = 10; for (let i = 0; i < files.length; i += batchSize) { const batch = files.slice(i, i + batchSize); const texts: string[] = []; const paths: string[] = []; for (const file of batch) { const content = this.readFileContent(file.path, 8000); if (content) { texts.push(`File: ${file.relativePath}\n\n${content}`); paths.push(file.relativePath); } } if (texts.length > 0) { const batchEmbeddings = await this.client.embedBatch(texts); for (let j = 0; j < paths.length; j++) { embeddings.set(paths[j], batchEmbeddings[j]); } } } return embeddings; } /** * Generate embedding for a single text string (e.g., search query) * * Automatically chunks large texts (>8K tokens) to fit within API limits. * For chunked texts, returns the average of all chunk embeddings. */ async embedText(text: string): Promise { if (!this.client) { throw new Error('OpenRouter client not available for embeddings'); } // Check if text needs chunking (8K token limit for OpenRouter) const tokenEstimate = estimateTokens(text); if (tokenEstimate <= 8000) { // Text is small enough, embed directly return this.client.embed(text); } // Text is too large, chunk it and embed each chunk const chunks = chunkForEmbedding(text); if (chunks.length === 1) { return this.client.embed(chunks[0]); } // Embed all chunks const embeddings: number[][] = []; for (const chunk of chunks) { const embedding = await this.client.embed(chunk); embeddings.push(embedding); } // Return the average embedding across all chunks return this.averageEmbeddings(embeddings); } /** * Average multiple embeddings into a single vector */ private averageEmbeddings(embeddings: number[][]): number[] { if (embeddings.length === 0) { throw new Error('Cannot average empty embeddings array'); } if (embeddings.length === 1) { return embeddings[0]; } const dimension = embeddings[0].length; const averaged = new Array(dimension).fill(0); for (const embedding of embeddings) { for (let i = 0; i < dimension; i++) { averaged[i] += embedding[i]; } } // Divide by count to get average for (let i = 0; i < dimension; i++) { averaged[i] /= embeddings.length; } return averaged; } /** * Set up file watcher for automatic re-indexing */ setupFileWatcher(onChange: (filePath: string) => Promise): () => void { // Check if chokidar is available let chokidar: unknown; try { chokidar = require('chokidar'); } catch { console.warn('chokidar not available. File watching disabled. Install with: npm install chokidar'); return () => {}; } const watchPaths = [ path.join(this.projectRoot, 'src'), path.join(this.projectRoot, 'lib'), path.join(this.projectRoot, '.claude'), path.join(this.projectRoot, 'CLAUDE.md'), path.join(this.projectRoot, 'README.md'), path.join(this.projectRoot, 'docs') ]; const watcher = (chokidar as any).watch(watchPaths, { ignored: /node_modules|\.git|dist/, persistent: true, ignoreInitial: true }); watcher.on('change', async (filePath: string) => { await onChange(filePath); }); return () => watcher.close(); } } /** * Create an intelligent analyzer */ export function createIntelligentAnalyzer(projectRoot: string): IntelligentAnalyzer { return new IntelligentAnalyzer(projectRoot); }