/**
 * Markdown Parser
 * 
 * Wrapper around remark + remark-gfm for robust markdown parsing.
 * Caches the processor instance for performance.
 * 
 * NOTE: We disable setext-style headings (Text\n---) and only support
 * ATX-style headings (# Heading) for predictable streaming behavior.
 */

import { remark } from 'remark';
import remarkGfm from 'remark-gfm';
import type { Root, Content } from 'mdast';

// ============================================================================
// Cached Processor
// ============================================================================

/**
 * Remark processor instance (created once, reused)
 * Includes GitHub Flavored Markdown support
 */
const processor = remark().use(remarkGfm);

// ============================================================================
// Pre-processing: Disable Setext Headings
// ============================================================================

/** Valid list marker characters */
const LIST_MARKERS = new Set(['-', '*', '+']);

/** Pattern to match potential setext underlines (lines of only -/= with optional whitespace) */
const SETEXT_PATTERN = /^([ \t]*)([-=]+)([ \t]*)$/;

/**
 * Check if a line is a valid UNINDENTED list marker (single -, *, or + at start of line)
 * Indented single dashes could be setext underlines, so we only preserve unindented ones.
 */
function isUnindentedListMarker(indent: string, chars: string): boolean {
  return !indent && chars.length === 1 && LIST_MARKERS.has(chars);
}

/**
 * Check if a line is a horizontal rule (3+ unindented dashes after blank line)
 */
function isHorizontalRule(
  indent: string,
  chars: string,
  prevLine: string
): boolean {
  const isUnindented = !indent;
  const isThreePlusDashes = chars.length >= 3 && chars[0] === '-';
  const prevLineIsBlank = !prevLine.trim();
  return isUnindented && isThreePlusDashes && prevLineIsBlank;
}

/**
 * Escape potential setext underlines before parsing.
 * 
 * Setext headings use underlines (--- or ===) on the line after text:
 *   Heading
 *   -------
 * 
 * We only support ATX-style headings (# Heading) because:
 * 1. They're unambiguous during streaming
 * 2. They don't conflict with list markers or horizontal rules
 * 
 * This function adds a zero-width space to break the setext pattern.
 * The zero-width space is invisible in rendered output.
 * 
 * Preserves:
 * - Unindented single list markers (-, *, +) — these start new list items
 * - Horizontal rules (--- after blank line)
 * 
 * Escapes:
 * - Indented dashes/equals (could be setext underlines)
 * - Multiple dashes/equals (could be setext underlines)
 * 
 * @param markdown - Raw markdown string
 * @returns Markdown with setext underlines escaped
 */
export function escapeSetextUnderlines(markdown: string): string {
  const lines = markdown.split('\n');
  
  return lines.map((line, index) => {
    const match = line.match(SETEXT_PATTERN);
    if (!match) return line;
    
    const [, indent, chars, trailing] = match;
    const prevLine = index > 0 ? lines[index - 1] : '';
    
    // Preserve unindented single list markers (-, *, +)
    if (isUnindentedListMarker(indent, chars)) return line;
    
    // Preserve horizontal rules (3+ unindented dashes after blank line)
    if (isHorizontalRule(indent, chars, prevLine)) return line;
    
    // Escape everything else by inserting zero-width space after indent
    return `${indent}\u200B${chars}${trailing}`;
  }).join('\n');
}

// ============================================================================
// Parsing Functions
// ============================================================================

/**
 * Parse complete markdown document into MDAST Root
 * 
 * @param markdown - Markdown content to parse
 * @returns MDAST root node
 */
export function parseMarkdown(markdown: string): Root {
  try {
    const escaped = escapeSetextUnderlines(markdown);
    return processor.parse(escaped);
  } catch (error) {
    console.warn('Remark parse error:', error);
    return { type: 'root', children: [] };
  }
}

/**
 * Parse a single block of markdown and return the first block-level node.
 * Useful for parsing individual blocks in isolation.
 * 
 * @param content - Block content to parse
 * @returns First MDAST block node, or null if empty/invalid
 */
export function parseBlockContent(content: string): Content | null {
  if (!content || content.trim().length === 0) {
    return null;
  }
  
  try {
    const escaped = escapeSetextUnderlines(content);
    const ast = processor.parse(escaped);
    return ast.children[0] ?? null;
  } catch (error) {
    console.warn('Block parse error:', error);
    return {
      type: 'paragraph',
      children: [{ type: 'text', value: content }],
    };
  }
}

/**
 * Parse markdown and return all block nodes (excluding root)
 * 
 * @param markdown - Markdown content to parse
 * @returns Array of MDAST block nodes
 */
export function parseBlocks(markdown: string): Content[] {
  const ast = parseMarkdown(markdown);
  return ast.children;
}

/**
 * Check if remark would parse this as valid formatting
 * Useful for testing/validation
 * 
 * @param markdown - Markdown to validate
 * @returns True if parses without errors
 */
export function isValidMarkdown(markdown: string): boolean {
  try {
    processor.parse(markdown);
    return true;
  } catch {
    return false;
  }
}