/**
 * Compute section boundaries in a DOCX from its real heading paragraphs.
 *
 * Given the configured `sections.yaml` and the headings extracted via
 * `extractHeadings()`, return one boundary per section file with text
 * positions in the same coordinate system as `CommentAnchorData.docPosition`.
 *
 * Matching is by heading text (primary header + aliases, case-insensitive).
 * This replaces the older keyword-search-in-body-text approach which would
 * pick up section names that happen to appear inside prose ("results across
 * countries") or in structured-abstract labels where paragraph boundaries
 * are lost in concatenation.
 */

import type { DocxHeading } from '../word-extraction.js';
import type { SectionConfig } from '../types.js';

export interface SectionBoundary {
  file: string;
  start: number;
  end: number;
}

export function computeSectionBoundaries(
  sections: Record<string, SectionConfig>,
  headings: DocxHeading[],
): SectionBoundary[] {
  const matched: SectionBoundary[] = [];

  // Only consider top-level (Heading1-style) when level info is available;
  // when level==0 (unparseable style), fall back to all headings.
  const haveLevels = headings.some(h => h.level > 0);
  const candidates = haveLevels ? headings.filter(h => h.level === 1) : headings;

  for (const [file, cfg] of Object.entries(sections)) {
    const targets = [cfg.header, ...(cfg.aliases || [])]
      .filter(Boolean)
      .map(s => s.toLowerCase().trim());

    let firstMatch = -1;
    for (const h of candidates) {
      const text = h.text.toLowerCase().trim();
      if (targets.includes(text)) {
        firstMatch = h.docPosition;
        break;
      }
    }

    // Fallback: if no level-1 hit, allow any-level match (handles single-level docs)
    if (firstMatch < 0 && haveLevels) {
      for (const h of headings) {
        const text = h.text.toLowerCase().trim();
        if (targets.includes(text)) {
          firstMatch = h.docPosition;
          break;
        }
      }
    }

    if (firstMatch >= 0) {
      matched.push({ file, start: firstMatch, end: Number.MAX_SAFE_INTEGER });
    }
  }

  // Sort by start position and tighten each end to the next start
  matched.sort((a, b) => a.start - b.start);
  for (let i = 0; i < matched.length - 1; i++) {
    matched[i].end = matched[i + 1].start;
  }

  return matched;
}