"""Full-text search across indexed file contents."""

import fnmatch as _fnmatch
import re
import time
from typing import Optional

from ..storage import IndexStore, record_savings, estimate_savings, cost_avoided
from ._utils import resolve_repo

# Detect nested quantifiers that cause catastrophic backtracking in Python's re engine.
# Matches patterns like (a+)+, (a*)+, (a+)*, (?:a+){2,}, etc.
_NESTED_QUANTIFIER_RE = re.compile(
    r'[+*]\s*\)'    # quantifier before closing group
    r'\s*[+*{]'     # quantifier after closing group
)

_MAX_REGEX_LEN = 200


def search_text(
    repo: str,
    query: str,
    file_pattern: Optional[str] = None,
    max_results: int = 20,
    context_lines: int = 0,
    is_regex: bool = False,
    storage_path: Optional[str] = None,
) -> dict:
    """Search for text across all indexed files in a repository.

    Useful when symbol search misses — e.g., searching for string literals,
    comments, configuration values, or patterns not captured as symbols.

    Args:
        repo: Repository identifier (owner/repo or just repo name).
        query: Text to search for. Case-insensitive substring by default;
               set is_regex=True for full regex (e.g. 'estimateToken|tokenEstimat').
        file_pattern: Optional glob pattern to filter files.
        max_results: Maximum number of matching lines to return.
        context_lines: Lines of context before/after each match (like grep -C).
        is_regex: When True, treat query as a Python regex (re.search, IGNORECASE).
        storage_path: Custom storage path.

    Returns:
        Dict with matching lines grouped by file, plus _meta envelope.
    """
    _MAX_QUERY_LEN = 500
    if len(query) > _MAX_QUERY_LEN:
        return {"error": f"Query too long ({len(query)} chars, max {_MAX_QUERY_LEN})"}

    start = time.perf_counter()
    max_results = max(1, min(max_results, 100))
    context_lines = max(0, min(context_lines, 10))

    # For regex mode, compile the user pattern. For substring mode, use
    # Python's optimized `in` operator (faster than re.search per line).
    pattern = None
    query_lower = None
    if is_regex:
        if len(query) > _MAX_REGEX_LEN:
            return {"error": f"Regex too long ({len(query)} chars, max {_MAX_REGEX_LEN})"}
        if _NESTED_QUANTIFIER_RE.search(query):
            return {"error": "Regex rejected: nested quantifiers can cause catastrophic backtracking"}
        try:
            pattern = re.compile(query, re.IGNORECASE)
        except re.error as e:
            return {"error": f"Invalid regex: {e}"}
    else:
        query_lower = query.lower()

    try:
        owner, name = resolve_repo(repo, storage_path)
    except ValueError as e:
        return {"error": str(e)}

    store = IndexStore(base_path=storage_path)
    index = store.load_index(owner, name)

    if not index:
        return {"error": f"Repository not indexed: {owner}/{name}"}

    # Pre-compile file pattern into a single regex (avoids double fnmatch per file)
    files = index.source_files
    if file_pattern:
        pat_re = re.compile(
            _fnmatch.translate(file_pattern) + "|" + _fnmatch.translate(f"*/{file_pattern}")
        )
        files = [f for f in files if pat_re.match(f)]

    content_dir = store._content_dir(owner, name)
    results = []
    result_count = 0
    files_searched = 0
    truncated = False
    raw_bytes = 0
    response_bytes = 0

    for file_path in files:
        full_path = store._safe_content_path(content_dir, file_path)
        if not full_path:
            continue
        try:
            with open(full_path, "r", encoding="utf-8", errors="replace") as f:
                lines = f.readlines()
        except OSError:
            continue

        files_searched += 1
        file_matches = []
        for line_index, line in enumerate(lines):
            line = line.rstrip("\n")
            hit = pattern.search(line) if pattern else (query_lower in line.lower())
            if not hit:
                continue
            text = line.rstrip()[:200]
            match = {
                "line": line_index + 1,
                "text": text,
            }
            response_bytes += len(text) + 20  # key overhead estimate
            if context_lines > 0:
                before_start = max(0, line_index - context_lines)
                after_end = min(len(lines), line_index + context_lines + 1)
                before = [v.rstrip()[:200] for v in lines[before_start:line_index]]
                after = [v.rstrip()[:200] for v in lines[line_index + 1:after_end]]
                match["before"] = before
                match["after"] = after
                response_bytes += sum(len(v) for v in before) + sum(len(v) for v in after)
            file_matches.append(match)
            result_count += 1
            if result_count >= max_results:
                truncated = True
                break

        if file_matches:
            results.append({"file": file_path, "matches": file_matches})
            response_bytes += len(file_path) + 20
            raw_bytes += index.file_sizes.get(file_path, 0)

        if truncated:
            break

    elapsed = (time.perf_counter() - start) * 1000

    tokens_saved = estimate_savings(raw_bytes, response_bytes)
    total_saved = record_savings(tokens_saved, tool_name="search_text", repo_key=f"{owner}/{name}")

    return {
        "result_count": result_count,
        "results": results,
        "_meta": {
            "timing_ms": round(elapsed, 1),
            "files_searched": files_searched,
            "truncated": truncated,
            "tokens_saved": tokens_saved,
            "total_tokens_saved": total_saved,
            **cost_avoided(tokens_saved, total_saved),
        },
    }