/** * Citation validation utilities * Check that all [@cite] references exist in .bib file */ import * as fs from 'fs'; import * as path from 'path'; import type { Citation, CitationValidation, CitationStats } from './types.js'; /** * Extract all citation keys from markdown text * Handles: [@Key], [@Key1; @Key2], @Key (inline) * @param text - Markdown text to parse * @param file - Optional filename for context * @returns Array of citation objects */ export function extractCitations(text: string, file: string = ''): Citation[] { const citations: Citation[] = []; const lines = text.split('\n'); // Pattern for bracketed citations: [@Key] or [@Key1; @Key2] const bracketPattern = /\[@([^\]]+)\]/g; // Pattern for inline citations: @Key (word boundary) const inlinePattern = /(? k.trim().replace(/^@/, '')); for (const key of keys) { if (key) { citations.push({ key, line: lineNum + 1, file }); } } } // Inline citations (reset lastIndex) inlinePattern.lastIndex = 0; while ((match = inlinePattern.exec(line)) !== null) { const citationKey = match[1]; if (citationKey) { citations.push({ key: citationKey, line: lineNum + 1, file }); } } } return citations; } /** * Parse .bib file and extract all entry keys * @param bibPath - Path to bibliography file * @returns Set of citation keys found in the bib file */ export function parseBibFile(bibPath: string): Set { const keys = new Set(); if (!fs.existsSync(bibPath)) { return keys; } const content = fs.readFileSync(bibPath, 'utf-8'); // Pattern for bib entries: @type{key, const entryPattern = /@\w+\s*\{\s*([^,\s]+)\s*,/g; let match: RegExpExecArray | null; while ((match = entryPattern.exec(content)) !== null) { const key = match[1]; if (key) { keys.add(key); } } return keys; } /** * Validate citations against bib file * @param mdFiles - Markdown files to check * @param bibPath - Path to .bib file * @returns Validation result with valid, missing, unused, and duplicate citations */ export function validateCitations(mdFiles: string[], bibPath: string): CitationValidation { // Collect all citations from markdown const allCitations: Citation[] = []; for (const file of mdFiles) { if (!fs.existsSync(file)) continue; const text = fs.readFileSync(file, 'utf-8'); const citations = extractCitations(text, path.basename(file)); allCitations.push(...citations); } // Get bib keys const bibKeys = parseBibFile(bibPath); // Categorize const valid: Citation[] = []; const missing: Citation[] = []; const citedKeys = new Set(); const keyOccurrences = new Map(); for (const citation of allCitations) { citedKeys.add(citation.key); // Track occurrences for duplicates if (!keyOccurrences.has(citation.key)) { keyOccurrences.set(citation.key, []); } keyOccurrences.get(citation.key)!.push(citation); if (bibKeys.has(citation.key)) { valid.push(citation); } else { missing.push(citation); } } // Find unused bib entries const unused = [...bibKeys].filter(key => !citedKeys.has(key)); // Find duplicate citations (same key cited multiple times - not an error, just info) const duplicates = [...keyOccurrences.entries()] .filter(([key, occurrences]) => occurrences.length > 1) .map(([key, occurrences]) => ({ key, count: occurrences.length, locations: occurrences })); return { valid, missing, unused, duplicates }; } /** * Get citation statistics * @param mdFiles - Markdown files to analyze * @param bibPath - Path to bibliography file * @returns Statistics object */ export function getCitationStats(mdFiles: string[], bibPath: string): CitationStats { const result = validateCitations(mdFiles, bibPath); const bibKeys = parseBibFile(bibPath); return { totalCitations: result.valid.length + result.missing.length, uniqueCited: new Set([...result.valid, ...result.missing].map(c => c.key)).size, valid: result.valid.length, missing: result.missing.length, missingKeys: [...new Set(result.missing.map(c => c.key))], bibEntries: bibKeys.size, unused: result.unused.length, unusedKeys: result.unused, }; }