/**
 * DOI validation and fetching utilities
 * Check DOIs in .bib files, fetch BibTeX from DOIs
 */

import * as fs from 'fs';
import type { BibEntry, DoiCheckResult, BibtexFetchResult, DoiLookupResult, BibCheckResult } from './types.js';
import { crossrefLimiter, dataciteLimiter, doiOrgLimiter } from './rate-limiter.js';
import { getCachedDoi, cacheDoi } from './doi-cache.js';

// Entry types that typically don't have DOIs
const NO_DOI_TYPES = new Set([
  'book',           // Books often don't have DOIs (chapters might)
  'inbook',         // Book chapters - variable
  'thesis',         // Theses rarely have DOIs
  'mastersthesis',
  'phdthesis',
  'misc',           // Catch-all, often no DOI
  'unpublished',    // By definition
  'manual',         // Software manuals
  'techreport',     // Some do, many don't
  'booklet',
]);

// Entry types that should have DOIs
const EXPECT_DOI_TYPES = new Set([
  'article',        // Journal articles should have DOIs
  'inproceedings',  // Conference papers usually do
  'proceedings',
  'incollection',   // Book chapters in collections
]);

/**
 * Parse .bib file and extract entries with DOI info
 */
export function parseBibEntries(bibPath: string): BibEntry[] {
  if (!fs.existsSync(bibPath)) {
    return [];
  }

  const content = fs.readFileSync(bibPath, 'utf-8');
  const entries: BibEntry[] = [];
  const lines = content.split('\n');

  // Pattern for bib entries: @type{key,
  const entryPattern = /@(\w+)\s*\{\s*([^,\s]+)\s*,/g;

  let match: RegExpExecArray | null;
  while ((match = entryPattern.exec(content)) !== null) {
    const type = match[1]!.toLowerCase();
    const key = match[2]!;
    const startPos = match.index;

    // Find the line number
    let line = 1;
    for (let i = 0; i < startPos; i++) {
      if (content[i] === '\n') line++;
    }

    // Find the end of this entry (matching closing brace)
    let braceCount = 0;
    let entryEnd = startPos;
    let inEntry = false;

    for (let i = startPos; i < content.length; i++) {
      if (content[i] === '{') {
        braceCount++;
        inEntry = true;
      } else if (content[i] === '}') {
        braceCount--;
        if (inEntry && braceCount === 0) {
          entryEnd = i + 1;
          break;
        }
      }
    }

    const entryContent = content.slice(startPos, entryEnd);

    // Extract DOI field
    const doiMatch = entryContent.match(/\bdoi\s*=\s*[{"]([^}"]+)[}"]/i);
    let doi = doiMatch ? doiMatch[1]!.trim() : null;

    // Clean DOI - remove URL prefix if present
    if (doi) {
      doi = doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '');
    }

    // Extract title for display
    const titleMatch = entryContent.match(/\btitle\s*=\s*[{"]([^}"]+)[}"]/i);
    const title = titleMatch ? titleMatch[1]!.trim().slice(0, 60) : '';

    // Extract author for lookup
    const authorMatch = entryContent.match(/\bauthor\s*=\s*[{"]([^}"]+)[}"]/i);
    const authorRaw = authorMatch ? authorMatch[1]!.trim() : '';

    // Extract year
    const yearMatch = entryContent.match(/\byear\s*=\s*[{"]?(\d{4})[}""]?/i);
    const year = yearMatch ? parseInt(yearMatch[1]!) : null;

    // Extract journal
    const journalMatch = entryContent.match(/\bjournal\s*=\s*[{"]([^}"]+)[}"]/i);
    const journal = journalMatch ? journalMatch[1]!.trim() : '';

    // Check for skip marker: nodoi = {true} or nodoi = true
    const skipMatch = entryContent.match(/\bnodoi\s*=\s*[{"]?(true|yes|1)[}""]?/i);
    const skip = !!skipMatch;

    // Check for comment marker immediately before entry: % no-doi
    // Only look at the text between the last entry end (or start) and this entry
    const linesBefore = content.slice(Math.max(0, startPos - 200), startPos);
    // Find the last closing brace or start of file to avoid matching comments for previous entries
    const lastEntryEnd = linesBefore.lastIndexOf('}');
    const relevantBefore = lastEntryEnd >= 0 ? linesBefore.slice(lastEntryEnd + 1) : linesBefore;
    const commentSkip = /% *no-?doi/i.test(relevantBefore);

    entries.push({
      key,
      type,
      doi: doi || null,
      title,
      authorRaw,
      year,
      journal,
      skip: skip || commentSkip,
      expectDoi: EXPECT_DOI_TYPES.has(type),
      noDoi: NO_DOI_TYPES.has(type),
      line,
    });
  }

  return entries;
}

/**
 * Validate DOI format
 */
export function isValidDoiFormat(doi: string): boolean {
  if (!doi) return false;
  // DOI format: 10.prefix/suffix
  // Prefix is 4+ digits, suffix can contain most characters
  return /^10\.\d{4,}\/[^\s]+$/.test(doi);
}

/**
 * Check if DOI resolves via DataCite (for Zenodo, Figshare, etc.)
 */
async function checkDoiDataCite(doi: string): Promise<DoiCheckResult> {
  try {
    const response = await dataciteLimiter.fetchWithRetry(
      `https://api.datacite.org/dois/${encodeURIComponent(doi)}`,
      {
        headers: {
          'Accept': 'application/vnd.api+json',
          'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
        },
      }
    );

    if (response.status === 404) {
      return { valid: false, error: 'DOI not found in DataCite' };
    }

    if (!response.ok) {
      return { valid: false, error: `HTTP ${response.status}` };
    }

    const data = await response.json() as any;
    const attrs = data.data?.attributes;

    if (!attrs) {
      return { valid: false, error: 'Invalid DataCite response' };
    }

    return {
      valid: true,
      source: 'datacite',
      metadata: {
        title: attrs.titles?.[0]?.title || '',
        authors: attrs.creators?.map((c: any) => `${c.givenName || ''} ${c.familyName || ''}`.trim()) || [],
        year: attrs.publicationYear,
        journal: attrs.publisher || '',
        type: attrs.types?.resourceTypeGeneral || '',
      },
    };
  } catch (err) {
    return { valid: false, error: (err as Error).message };
  }
}

interface CheckDoiOptions {
  skipCache?: boolean;
}

/**
 * Check if DOI resolves (exists) - tries Crossref first, then DataCite
 * Results are cached for 7 days to reduce API calls.
 */
export async function checkDoi(doi: string, options: CheckDoiOptions = {}): Promise<DoiCheckResult & { cached?: boolean }> {
  if (!isValidDoiFormat(doi)) {
    return { valid: false, error: 'Invalid DOI format' };
  }

  // Check cache first (unless skipped)
  if (!options.skipCache) {
    const cached = getCachedDoi(doi);
    if (cached) {
      return { ...cached, cached: true } as DoiCheckResult & { cached?: boolean };
    }
  }

  // Zenodo DOIs start with 10.5281 - check DataCite first
  const isZenodo = doi.startsWith('10.5281/');
  const isFigshare = doi.startsWith('10.6084/');
  const isDataCiteLikely = isZenodo || isFigshare;

  if (isDataCiteLikely) {
    const dataciteResult = await checkDoiDataCite(doi);
    if (dataciteResult.valid) {
      cacheDoi(doi, dataciteResult);
      return dataciteResult;
    }
  }

  try {
    // Use Crossref API to check DOI
    const response = await crossrefLimiter.fetchWithRetry(
      `https://api.crossref.org/works/${encodeURIComponent(doi)}`,
      {
        headers: {
          'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
        },
      }
    );

    if (response.status === 404) {
      // Try DataCite as fallback (if not already tried)
      if (!isDataCiteLikely) {
        const dataciteResult = await checkDoiDataCite(doi);
        if (dataciteResult.valid) {
          cacheDoi(doi, dataciteResult);
          return dataciteResult;
        }
      }
      const result = { valid: false, error: 'DOI not found' };
      cacheDoi(doi, result);
      return result;
    }

    if (!response.ok) {
      // Don't cache transient errors
      return { valid: false, error: `HTTP ${response.status}` };
    }

    const data = await response.json() as any;
    const work = data.message;

    const result: DoiCheckResult = {
      valid: true,
      source: 'crossref',
      metadata: {
        title: work.title?.[0] || '',
        authors: work.author?.map((a: any) => `${a.given || ''} ${a.family || ''}`.trim()) || [],
        year: work.published?.['date-parts']?.[0]?.[0] || work.created?.['date-parts']?.[0]?.[0],
        journal: work['container-title']?.[0] || '',
        type: work.type,
      },
    };

    cacheDoi(doi, result);
    return result;
  } catch (err) {
    // Don't cache network errors
    return { valid: false, error: (err as Error).message };
  }
}

/**
 * Fetch BibTeX from DOI using content negotiation
 */
export async function fetchBibtex(doi: string): Promise<BibtexFetchResult> {
  // Clean DOI
  doi = doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '');

  if (!isValidDoiFormat(doi)) {
    return { success: false, error: 'Invalid DOI format' };
  }

  try {
    const response = await doiOrgLimiter.fetchWithRetry(
      `https://doi.org/${encodeURIComponent(doi)}`,
      {
        headers: {
          'Accept': 'application/x-bibtex',
          'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
        },
        redirect: 'follow',
      }
    );

    if (!response.ok) {
      return { success: false, error: `HTTP ${response.status}` };
    }

    const bibtex = await response.text();

    if (!bibtex.includes('@')) {
      return { success: false, error: 'Invalid BibTeX response' };
    }

    return { success: true, bibtex: bibtex.trim() };
  } catch (err) {
    return { success: false, error: (err as Error).message };
  }
}

interface CheckBibDoisOptions {
  checkMissing?: boolean;
  parallel?: number;
}

/**
 * Check all DOIs in a .bib file
 */
export async function checkBibDois(bibPath: string, options: CheckBibDoisOptions = {}): Promise<BibCheckResult> {
  const { checkMissing = false, parallel = 5 } = options;

  const entries = parseBibEntries(bibPath);
  const results: Array<BibEntry & { status: string; message?: string; metadata?: object }> = [];

  let valid = 0;
  let invalid = 0;
  let missing = 0;
  let skipped = 0;

  // Process in batches to avoid rate limiting
  for (let i = 0; i < entries.length; i += parallel) {
    const batch = entries.slice(i, i + parallel);

    const batchResults = await Promise.all(
      batch.map(async (entry) => {
        // Skip if marked
        if (entry.skip) {
          skipped++;
          return { ...entry, status: 'skipped', message: 'Marked as no-doi' };
        }

        // No DOI field
        if (!entry.doi) {
          if (entry.noDoi) {
            // Expected - books, theses, etc.
            skipped++;
            return { ...entry, status: 'skipped', message: `${entry.type} typically has no DOI` };
          } else if (entry.expectDoi) {
            // Should have DOI but doesn't
            missing++;
            return { ...entry, status: 'missing', message: 'Expected DOI for article/proceedings' };
          } else {
            skipped++;
            return { ...entry, status: 'skipped', message: 'No DOI field' };
          }
        }

        // Validate DOI format first
        if (!isValidDoiFormat(entry.doi)) {
          invalid++;
          return { ...entry, status: 'invalid', message: 'Invalid DOI format' };
        }

        // Check if DOI resolves
        const check = await checkDoi(entry.doi);
        if (check.valid) {
          valid++;
          return { ...entry, status: 'valid', metadata: check.metadata };
        } else {
          invalid++;
          return { ...entry, status: 'invalid', message: check.error };
        }
      })
    );

    results.push(...batchResults);

    // Small delay between batches to be nice to the API
    if (i + parallel < entries.length) {
      await new Promise(r => setTimeout(r, 200));
    }
  }

  return { entries: results, valid, invalid, missing, skipped };
}

interface DataCiteItem {
  id: string;
  attributes: {
    titles?: Array<{ title: string }>;
    creators?: Array<{ givenName?: string; familyName?: string }>;
    publicationYear: number;
    publisher?: string;
  };
}

/**
 * Search DataCite API (for Zenodo, Figshare, etc.)
 */
async function searchDataCite(title: string, author: string = '', year: number | null = null): Promise<any[]> {
  try {
    // DataCite query syntax
    let query = `titles.title:${title.replace(/[{}]/g, '')}`;
    if (author) {
      query += ` AND creators.name:${author}`;
    }
    if (year) {
      query += ` AND publicationYear:${year}`;
    }

    const params = new URLSearchParams({
      query: query,
      'page[size]': '5',
    });

    const response = await dataciteLimiter.fetchWithRetry(
      `https://api.datacite.org/dois?${params}`,
      {
        headers: {
          'Accept': 'application/vnd.api+json',
          'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
        },
      }
    );

    if (!response.ok) return [];

    const data = await response.json() as { data?: DataCiteItem[] };
    const items = data.data || [];

    return items.map(item => {
      const attrs = item.attributes;
      return {
        DOI: item.id,
        title: [attrs.titles?.[0]?.title || ''],
        author: attrs.creators?.map(c => ({ family: c.familyName, given: c.givenName })) || [],
        'published-print': { 'date-parts': [[attrs.publicationYear]] },
        'container-title': [attrs.publisher || ''],
        score: 50, // Base score for DataCite results
        source: 'datacite',
      };
    });
  } catch {
    return [];
  }
}

/**
 * Normalize text for comparison (lowercase, remove special chars)
 */
function normalizeForMatching(text: string): string {
  return (text || '')
    .toLowerCase()
    .replace(/[{}\\]/g, '')  // Remove LaTeX braces
    .replace(/[^a-z0-9\s]/g, ' ')  // Replace special chars with space
    .replace(/\s+/g, ' ')
    .trim();
}

/**
 * Check if DOI looks like a supplement, figure, or review (not the main paper)
 */
function isSupplementOrReview(doi: string, title: string = '', journal: string = ''): boolean {
  const doiLower = (doi || '').toLowerCase();
  const titleLower = (title || '').toLowerCase();
  const journalLower = (journal || '').toLowerCase();

  // Supplement/figure DOI patterns
  if (/\.suppl|\/suppl|\.figure|\/figure|\.s\d+$|_s\d+$/i.test(doiLower)) {
    return true;
  }

  // F1000/Faculty Opinions (post-publication reviews)
  if (/10\.3410\/f\./i.test(doiLower) || /faculty opinions/i.test(journalLower)) {
    return true;
  }

  // Title suggests it's supplementary material
  if (/^supplementary|^supporting information|^appendix/i.test(titleLower)) {
    return true;
  }

  return false;
}

interface CrossrefItem {
  DOI: string;
  title?: string[];
  author?: Array<{ given?: string; family?: string }>;
  'published-print'?: { 'date-parts': number[][] };
  'published-online'?: { 'date-parts': number[][] };
  'container-title'?: string[];
  score?: number;
  type?: string;
}

/**
 * Search for DOI by title and author using Crossref API (+ DataCite fallback)
 */
export async function lookupDoi(
  title: string,
  author: string = '',
  year: number | null = null,
  journal: string = ''
): Promise<DoiLookupResult> {
  if (!title || title.length < 10) {
    return { found: false, error: 'Title too short for reliable search' };
  }

  // Check for keywords that suggest Zenodo/DataCite sources
  const likelyZenodo = /\b(IPBES|zenodo|assessment report|secretariat)\b/i.test(title);

  try {
    // Build query - title is most important, add author and journal if available
    let query = title;
    if (author) {
      query = `${title} ${author}`;
    }
    // Add journal to query for better matching
    if (journal) {
      query = `${query} ${journal}`;
    }

    let items: CrossrefItem[] = [];

    // Try structured bibliographic query first (more accurate)
    const structuredParams = new URLSearchParams({
      rows: '10',
      select: 'DOI,title,author,published-print,published-online,container-title,score,type',
    });
    structuredParams.set('query.bibliographic', title);
    if (author) {
      structuredParams.set('query.author', author);
    }
    if (journal) {
      structuredParams.set('query.container-title', journal);
    }

    let response = await crossrefLimiter.fetchWithRetry(
      `https://api.crossref.org/works?${structuredParams}`,
      {
        headers: {
          'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
        },
      }
    );

    if (response.ok) {
      const data = await response.json() as { message?: { items?: CrossrefItem[] } };
      items = data.message?.items || [];
    }

    // If structured query found few results, also try query.title (often better for exact matches)
    if (items.length < 5) {
      const titleParams = new URLSearchParams({
        rows: '10',
        select: 'DOI,title,author,published-print,published-online,container-title,score,type',
      });
      titleParams.set('query.title', title);

      const response2 = await crossrefLimiter.fetchWithRetry(
        `https://api.crossref.org/works?${titleParams}`,
        {
          headers: {
            'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
          },
        }
      );

      if (response2.ok) {
        const data = await response2.json() as { message?: { items?: CrossrefItem[] } };
        const newItems = data.message?.items || [];
        // Merge results, avoiding duplicates
        const existingDois = new Set(items.map(i => i.DOI));
        for (const item of newItems) {
          if (!existingDois.has(item.DOI)) {
            items.push(item);
          }
        }
      }
    }

    // If still nothing, try basic query (most lenient)
    if (items.length === 0) {
      const basicParams = new URLSearchParams({
        query: query,
        rows: '10',
        select: 'DOI,title,author,published-print,published-online,container-title,score,type',
      });

      response = await crossrefLimiter.fetchWithRetry(
        `https://api.crossref.org/works?${basicParams}`,
        {
          headers: {
            'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
          },
        }
      );

      if (response.ok) {
        const data = await response.json() as { message?: { items?: CrossrefItem[] } };
        items = data.message?.items || [];
      }
    }

    // Also search DataCite for Zenodo/institutional repos
    if (likelyZenodo || items.length === 0) {
      const dataciteItems = await searchDataCite(title, author, year);
      items = [...items, ...dataciteItems];
    }

    if (items.length === 0) {
      return { found: false, error: 'No results found' };
    }

    const normalizedSearchTitle = normalizeForMatching(title);
    const normalizedJournal = normalizeForMatching(journal);

    // Score the results
    const scored = items.map(item => {
      let score = 0;
      const itemTitle = item.title?.[0] || '';
      const itemJournal = item['container-title']?.[0] || '';
      const normalizedItemTitle = normalizeForMatching(itemTitle);
      const normalizedItemJournal = normalizeForMatching(itemJournal);

      // === PENALTY: Supplement/figure/review DOIs ===
      if (isSupplementOrReview(item.DOI, itemTitle, itemJournal)) {
        score -= 100;  // Heavy penalty - almost never want these
      }

      // === Title similarity (most important) ===
      if (normalizedItemTitle === normalizedSearchTitle) {
        score += 100;  // Exact match
      } else if (normalizedItemTitle.includes(normalizedSearchTitle) ||
                 normalizedSearchTitle.includes(normalizedItemTitle)) {
        score += 50;
      } else {
        // Check word overlap
        const searchWords = normalizedSearchTitle.split(/\s+/).filter(w => w.length > 3);
        const itemWords = normalizedItemTitle.split(/\s+/).filter(w => w.length > 3);
        const overlap = searchWords.filter(w =>
          itemWords.some(iw => iw.includes(w) || w.includes(iw))
        );
        score += (overlap.length / Math.max(searchWords.length, 1)) * 40;
      }

      // === Author match ===
      if (author && item.author) {
        const authorLower = author.toLowerCase();
        const hasAuthor = item.author.some(a =>
          (a.family || '').toLowerCase().includes(authorLower) ||
          authorLower.includes((a.family || '').toLowerCase())
        );
        if (hasAuthor) score += 30;
      }

      // === Journal match (NEW) ===
      if (normalizedJournal && normalizedItemJournal) {
        // Check for journal name match (handles abbreviations)
        const journalWords = normalizedJournal.split(/\s+/).filter(w => w.length > 2);
        const itemJournalWords = normalizedItemJournal.split(/\s+/).filter(w => w.length > 2);

        // Count matching words
        const journalOverlap = journalWords.filter(w =>
          itemJournalWords.some(iw => iw.includes(w) || w.includes(iw))
        );

        if (journalOverlap.length >= Math.min(2, journalWords.length)) {
          score += 40;  // Good journal match
        } else if (journalOverlap.length >= 1) {
          score += 15;  // Partial match
        }

        // Bonus for exact journal match
        if (normalizedItemJournal === normalizedJournal) {
          score += 20;
        }
      }

      // === Year match - CRITICAL for accuracy ===
      const itemYear = item['published-print']?.['date-parts']?.[0]?.[0] ||
                       item['published-online']?.['date-parts']?.[0]?.[0];
      if (year && itemYear) {
        if (itemYear === year) {
          score += 50;  // Exact match - required for high confidence
        } else if (Math.abs(itemYear - year) === 1) {
          score += 20;  // Off by one (common for online-first)
        } else {
          score -= 50;  // Wrong year = likely wrong paper
        }
      } else if (year && !itemYear) {
        score -= 10;  // Can't verify year
      }

      // Crossref's own relevance score (capped)
      score += Math.min(item.score || 0, 10);

      return {
        doi: item.DOI,
        title: itemTitle,
        authors: item.author?.map(a => `${a.given || ''} ${a.family || ''}`.trim()) || [],
        year: itemYear,
        journal: itemJournal,
        score,
        crossrefScore: item.score,
        isSupplement: isSupplementOrReview(item.DOI, itemTitle, itemJournal),
      };
    });

    // Sort by our score
    scored.sort((a, b) => b.score - a.score);

    // Filter out supplements for the "best" pick (but keep in alternatives)
    const mainPapers = scored.filter(s => !s.isSupplement);
    const best = mainPapers.length > 0 ? mainPapers[0] : scored[0];

    if (!best) {
      return { found: false, error: 'No valid results found' };
    }

    // Confidence thresholds
    let confidence: 'low' | 'medium' | 'high' = 'low';
    if (best.score >= 120) confidence = 'high';
    else if (best.score >= 70) confidence = 'medium';

    // === NEW: Try DataCite if Crossref confidence is low ===
    if (confidence === 'low' && !likelyZenodo) {
      const dataciteItems = await searchDataCite(title, author, year);
      if (dataciteItems.length > 0) {
        // Score DataCite results with same logic
        for (const dcItem of dataciteItems) {
          const dcTitle = dcItem.title?.[0] || '';
          const normalizedDcTitle = normalizeForMatching(dcTitle);
          let dcScore = 0;

          // Title match
          if (normalizedDcTitle === normalizedSearchTitle) {
            dcScore += 100;
          } else if (normalizedDcTitle.includes(normalizedSearchTitle) ||
                     normalizedSearchTitle.includes(normalizedDcTitle)) {
            dcScore += 50;
          }

          // Year match
          const dcYear = dcItem['published-print']?.['date-parts']?.[0]?.[0];
          if (year && dcYear && dcYear === year) {
            dcScore += 50;
          }

          if (dcScore > best.score) {
            return {
              found: true,
              doi: dcItem.DOI,
              confidence: dcScore >= 120 ? 'high' : dcScore >= 70 ? 'medium' : 'low',
              score: dcScore,
              metadata: {
                title: dcTitle,
                authors: dcItem.author?.map((a: any) => `${a.given || ''} ${a.family || ''}`.trim()) || [],
                year: dcYear,
                journal: dcItem['container-title']?.[0] || '',
              },
              alternatives: scored.slice(0, 2),
            };
          }
        }
      }
    }

    return {
      found: true,
      doi: best.doi,
      confidence,
      score: best.score,
      metadata: {
        title: best.title,
        authors: best.authors,
        year: best.year || 0,
        journal: best.journal,
      },
      alternatives: scored.filter(s => s.doi !== best.doi).slice(0, 3),
    };
  } catch (err) {
    return { found: false, error: (err as Error).message };
  }
}

interface LookupMissingDoisOptions {
  parallel?: number;
  onProgress?: (current: number, total: number) => void;
}

interface LookupMissingDoiResult {
  key: string;
  title: string;
  type: string;
  journal: string;
  result: DoiLookupResult;
}

/**
 * Look up DOIs for all entries missing them in a .bib file
 */
export async function lookupMissingDois(
  bibPath: string,
  options: LookupMissingDoisOptions = {}
): Promise<LookupMissingDoiResult[]> {
  const { parallel = 3, onProgress } = options;

  const entries = parseBibEntries(bibPath);
  const missing = entries.filter(e =>
    !e.doi &&
    !e.skip &&
    !NO_DOI_TYPES.has(e.type)
  );

  const results: LookupMissingDoiResult[] = [];

  for (let i = 0; i < missing.length; i += parallel) {
    const batch = missing.slice(i, i + parallel);

    const batchResults = await Promise.all(
      batch.map(async (entry) => {
        // Extract first author's last name from the entry
        // This is tricky because BibTeX author format varies
        let author = '';
        if (entry.authorRaw) {
          // Try to get first author's last name
          const firstAuthor = entry.authorRaw.split(' and ')[0];
          if (firstAuthor) {
            const parts = firstAuthor.split(',');
            author = parts[0]?.trim() || '';
          }
        }

        const result = await lookupDoi(entry.title, author, entry.year, entry.journal);

        return {
          key: entry.key,
          title: entry.title,
          type: entry.type,
          journal: entry.journal,
          result,
        };
      })
    );

    results.push(...batchResults);

    if (onProgress) {
      onProgress(Math.min(i + parallel, missing.length), missing.length);
    }

    // Rate limiting
    if (i + parallel < missing.length) {
      await new Promise(r => setTimeout(r, 300));
    }
  }

  return results;
}

interface AddToBibResult {
  success: boolean;
  key?: string;
  error?: string;
}

/**
 * Add a BibTeX entry to a .bib file
 */
export function addToBib(bibPath: string, bibtex: string): AddToBibResult {
  // Extract key from BibTeX
  const keyMatch = bibtex.match(/@\w+\s*\{\s*([^,\s]+)/);
  if (!keyMatch) {
    return { success: false, error: 'Could not extract citation key from BibTeX' };
  }
  const key = keyMatch[1];

  // Check if key already exists
  const existing = fs.existsSync(bibPath) ? fs.readFileSync(bibPath, 'utf-8') : '';
  if (existing.includes(`{${key},`) || existing.includes(`{${key}\n`)) {
    return { success: false, error: `Key "${key}" already exists in ${bibPath}` };
  }

  // Append to file
  const newContent = existing.trim() + '\n\n' + bibtex + '\n';
  fs.writeFileSync(bibPath, newContent, 'utf-8');

  return { success: true, key };
}