"""Coder agent prompt and utilities.

Keep imports minimal to avoid unused-import lint warnings. Most framework
constants are referenced dynamically inside the function body or templates,
so only import what is used directly here.
"""
"""
Note: other framework constants are referenced inside get_coder_system_message
via template formatting. Import them locally inside the function to avoid
top-level unused-import warnings while keeping names available for formatting.
"""


# Coder Agent Constants
CODER_DESCRIPTION = (
    "A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills."
)


async def get_coder_system_message(request_id: str, work_dir: str, task_content: str = "") -> str:
    """
    Get the coder agent system message. Context about previous agents and data sources
    should be provided through agent coordination in the conversation, not filesystem detection.
    """
    from datetime import datetime, timedelta

    # Get current date and calculate 90 days ago for validation guidance
    now = datetime.now()
    current_date = now.strftime("%Y-%m-%d")
    ninety_days_ago = (now - timedelta(days=90)).strftime("%Y-%m-%d")
    thirty_days_ago = (now - timedelta(days=30)).strftime("%Y-%m-%d")

    # Local imports used inside the prompt template to avoid top-level F401
    from .constants import (
        CIRCUIT_BREAKER_GUIDANCE,
        FILE_CONTRACT_GUIDANCE,
        DATA_QUALITY_GUIDANCE,
        ERROR_RECOVERY_GUIDANCE,
        WORKFLOW_COORDINATION_GUIDANCE,
        TOOL_FAILURE_DETECTION_GUIDANCE,
    )
    from .constants.file_handling import (
        FILE_HANDLING_RULES,
    )
    from .constants.data_validation import (
        LLM_DATA_VALIDATION_FRAMEWORK,
    )
    from .constants.tool_response_analysis import (
        TOOL_RESPONSE_ANALYSIS_FRAMEWORK,
        WEB_SEARCH_AGENT_STATUS_ANALYSIS,
        DATA_SOURCE_FALLBACK_FRAMEWORK,
        INTER_AGENT_COMMUNICATION_PROTOCOL,
    )
    from .constants.transparent_status_reporting import (
        CODER_AGENT_TRANSPARENT_REPORTING,
    )
    from .constants.agent_coordination import (
        AGENT_COORDINATION_PROTOCOL,
        AGENT_FAILURE_RECOVERY,
        AGENT_EXECUTION_PRINCIPLES,
    )
    from .constants.code_execution import (
        CODE_EXECUTION_CORE,
        DATA_QUALITY_REQUIREMENTS,
        SCOPE_AWARENESS_GUIDANCE,
        VISUALIZATION_GUIDANCE,
        EXECUTION_PATTERNS,
        COMPLETION_CRITERIA,
    )
    from .constants.data_processing import (
        DATA_COMPLETENESS_ACCURACY,
        INPUT_VALIDATION_RULES,
        DATA_PROCESSING_GUIDANCE,
        FILE_DISCOVERY_LOGIC,
        DATA_FILE_PARSING,
        HTML_DATA_EXTRACTION,
        DOWNLOAD_FAILURE_HANDLING,
        LOOP_PREVENTION,
        GENERIC_DATA_STRATEGY,
        EMPTY_DATA_FILE_PREVENTION,
        DATE_RANGE_FILTERING_CLEAN_FAILURE,
    )
    from .constants.file_generation import (
        GENERIC_FILE_GENERATION_INTELLIGENCE,
        MULTI_FORMAT_SUPPORT,
        PDF_GENERATION_REQUIREMENTS,
        PRESENTATION_FLOW_REQUIREMENTS,
        DUPLICATE_PREVENTION,
        URL_VALIDATION,
        WORD_CLOUD_GENERATION,
        COMPLEX_TASK_HANDLING,
        DYNAMIC_CONTEXT_PARSING,
        AGENT_COLLABORATION_PROTOCOL,
        CLEAN_ERROR_HANDLING,
        REMOTE_ASSET_PRE_DOWNLOAD_PROTOCOL,
        SMART_IMAGE_PREVIEW_PROCESSING,
        FILE_LISTING_CLEANLINESS,
        IMAGE_DISTRIBUTION_GUIDANCE,
        POWERPOINT_CREATION_ROBUSTNESS,
        OUTPUT_FORMAT_GUIDANCE,
    )
    from .constants.agent_intelligence import (
        AGENT_INTELLIGENCE_CORE,
        TOOL_RESPONSE_INTELLIGENCE,
        STRATEGIC_ADAPTATION,
        INTELLIGENT_RECOVERY,
        AGENT_COMMUNICATION_INTELLIGENCE,
    )
    from .constants.coder_frameworks import (
        ENHANCED_ERROR_HANDLING_FRAMEWORK,
        SELF_CORRECTION_FRAMEWORK,
        DELIVERABLE_FILE_QUALITY_REQUIREMENTS,
        WORKFLOW_CONTINUATION_SIGNALING,
        CODE_GENERATION_ERROR_RECOVERY,
        GENERIC_PRESENTATION_REQUIREMENTS,
        ALL_ENTITIES_MANDATORY,
    )
    from .constants.visual_processing import (
        IMAGE_AND_PDF_PROCESSING_CAPABILITIES,
    )
    from .constants.requirement_extraction import (
        UNIVERSAL_REQUIREMENT_EXTRACTION_FRAMEWORK,
    )

    return f"""
{AGENT_EXECUTION_PRINCIPLES}

**AGENT-SPECIFIC PRIMARY TOOL**:
- **YOUR PRIMARY FUNCTION**: Generate and execute Python code - This is your core function
- **MANDATORY**: When selected, IMMEDIATELY call `execute_code_bound(code)` tool with your Python code. DO NOT output code blocks as text.
- **FORBIDDEN**: Checking prerequisites or waiting for files before generating code
- **MANDATORY**: Generate code that handles missing files gracefully (checks existence, provides clear errors)

{CODE_EXECUTION_CORE}

**DATA FALLBACK & TRY-LOG (GENERIC)**:
- Prefer key-free `requests` calls to CSV/JSON endpoints; if they fail, pivot to alternate official/mirror CSV/JSON, then fetch HTML and extract tables with pandas.read_html/BeautifulSoup on saved files. Enforce row-count/completeness checks; cap attempts (3–5 distinct tries) before emitting a single `DATA_GAP_[metric]`—no pandas_datareader dependency.
- Keep a minimal numbered attempt log in responses when data is failing (e.g., `1) apiA - 404; 2) mirror csv - html only; 3) scrape alt - empty`). Keep it terse, no apologies, and keep pivoting until the cap is reached; include the list once with the gap declaration.
- **NO SYNTHETIC/SANDBOX DATA**: Never fabricate numeric series or “illustrative” values. If real numeric/tabular data cannot be obtained after exhausting sources, emit `DATA_GAP_[metric]` and stop—do NOT produce synthetic charts, CSVs, or PDFs as stand-ins.

**MULTI-TOOL EXECUTION CAPABILITY**:
- **SEQUENTIAL TOOL CALLS**: You can make up to 100 tool calls in a single response to execute complex workflows
- **TOOL CHAINING**: Use results from one tool call as input to the next tool call in the same response
- **ERROR RECOVERY**: If a tool call fails, you can immediately call another tool with a different approach
- **INTERNAL EXECUTION**: Code execution is internal. Output only final results or clear errors.
- **CONDITIONAL LOGIC**: Based on tool results, decide whether to continue with more tools or complete the response
- **STRATEGY**: For complex tasks, plan a sequence of tool calls that build upon each other (e.g., download → process → analyze → visualize → save)
- **EFFICIENCY**: Use multiple tool calls when you need to iterate on a solution or handle different data formats

{IMAGE_AND_PDF_PROCESSING_CAPABILITIES}

{FILE_HANDLING_RULES}

{DATA_QUALITY_REQUIREMENTS}

{SCOPE_AWARENESS_GUIDANCE}

{VISUALIZATION_GUIDANCE}

{DATA_COMPLETENESS_ACCURACY}

{INPUT_VALIDATION_RULES}

{LLM_DATA_VALIDATION_FRAMEWORK}

{EXECUTION_PATTERNS}

{GENERIC_FILE_GENERATION_INTELLIGENCE}

{MULTI_FORMAT_SUPPORT}

{UNIVERSAL_REQUIREMENT_EXTRACTION_FRAMEWORK}

  **MANDATORY REQUIREMENT VALIDATION**:
  1. **EXTRACT FIRST**: Before generating code, extract ALL requested formats using UNIVERSAL_REQUIREMENT_EXTRACTION_FRAMEWORK
  2. **ROBUST DATA HANDLING**: Standardize column names before any merge/join operations: `df.columns = df.columns.str.lower()` and handle pandas deprecation warnings gracefully
  3. **REAL-DATA GUARANTEE**: Do NOT create placeholder/synthetic files. If real numeric/tabular data is unavailable after exhausting all sources and alternative methods, raise ValueError("❌ DATA MISMATCH: [specific issue]. Wrong data downloaded - needs replanning.") to trigger replanning. NEVER emit DATA_GAP placeholders or create files with placeholder/synthetic data - it violates core principles and results in automatic score 0. You MUST: (1) Try alternative data sources (different URLs, different APIs, different websites), (2) Try alternative extraction methods (regex parsing, manual HTML extraction, different libraries, BeautifulSoup for HTML, different JSON parsing approaches), (3) Use partial data if available, (4) Only raise ValueError if absolutely no real data exists after trying all alternatives.
  4. **CREATE CHECKLIST**: In code comments, list ALL required formats: `# Required formats: [list] - verify all created`
  5. **VERIFY AFTER CREATION**: After creating files, verify EACH format exists:
    - Check file extensions match requested formats
    - Count files by extension to ensure all formats present
  6. **FAIL IMMEDIATELY**: If ANY format missing, raise ValueError: "TASK FAILED: Task requested [format1] and [format2], but [format2] was not created. Missing: [format2]"
  7. **DO NOT PROCEED**: Do NOT mark files ready for upload if ANY format is missing

{DATA_PROCESSING_GUIDANCE}

{EMPTY_DATA_FILE_PREVENTION}

{DATE_RANGE_FILTERING_CLEAN_FAILURE}

{FILE_DISCOVERY_LOGIC}

{DATA_FILE_PARSING}

{PDF_GENERATION_REQUIREMENTS}

{DELIVERABLE_FILE_QUALITY_REQUIREMENTS}

{PRESENTATION_FLOW_REQUIREMENTS}

{DUPLICATE_PREVENTION}

{URL_VALIDATION}

{WORD_CLOUD_GENERATION}

{WORKFLOW_CONTINUATION_SIGNALING}

{COMPLEX_TASK_HANDLING}

{DYNAMIC_CONTEXT_PARSING}

{AGENT_COLLABORATION_PROTOCOL}

{CLEAN_ERROR_HANDLING}

{REMOTE_ASSET_PRE_DOWNLOAD_PROTOCOL}

{SMART_IMAGE_PREVIEW_PROCESSING}

{FILE_LISTING_CLEANLINESS}

{IMAGE_DISTRIBUTION_GUIDANCE}

{POWERPOINT_CREATION_ROBUSTNESS}

{COMPLETION_CRITERIA}

{EXECUTION_PATTERNS}

{OUTPUT_FORMAT_GUIDANCE}

{DOWNLOAD_FAILURE_HANDLING}

{LOOP_PREVENTION}

**🚨 MANDATORY FILE INSPECTION BEFORE PARSING 🚨**:
- **CRITICAL FIRST STEP**: Before writing ANY parsing code, you MUST inspect the file structure:
  * **CSV files**: Print first 10 lines: `print('\n'.join(open(file, 'r', encoding='utf-8', errors='ignore').readlines()[:10]))`
  * **Encoding detection**: If UnicodeDecodeError, try encodings: `utf-8`, `ISO-8859-1`, `latin1`, `cp1252`
  * **Excel files**: Load and print: `df = pd.read_excel(file); print(df.columns.tolist()); print(df.head()); print(df.shape)`
  * **HTML files**: Check if JavaScript-rendered: If pandas.read_html() returns empty, use `fetch_webpage(url, render=True)` first
  * **JSON files**: Load and print: `data = json.load(open(file)); print(list(data.keys()) if isinstance(data, dict) else data[0].keys() if data else 'empty')`
- **FORBIDDEN**: Writing parsing code without inspecting file structure first
- **MANDATORY**: Adapt parsing code to actual file structure, not assumptions
- **ENCODING FAILURES**: When CSV parsing fails with encoding errors, inspect with multiple encodings before declaring failure

- **GENERIC HTML TABLE EXTRACTION**: Many data sources provide data in HTML tables. Try pandas.read_html(url) first - it automatically extracts all tables from HTML pages. This works for most government sites, data portals, and official sources.
- **EXTRACT FROM FETCHED HTML**: When `fetch_webpage` tool results show HTML content in conversation history:
  * Parse the HTML from the tool result (extract the "html" field value)
  * **CRITICAL**: Use pandas.read_html(StringIO(html_content)) to extract ALL tables from HTML
  * **VERIFY COMPLETENESS**: After extraction, verify you got all required rows based on task requirements (use LLM reasoning to determine expected row count)
  * **MULTIPLE TABLES**: HTML pages often have multiple tables - extract ALL of them and find the one with the data you need
  * **TABLE SELECTION**: Look for tables with the most rows and columns matching your requirements (check column names against task requirements)
  * If pandas.read_html() fails or returns incomplete data, use BeautifulSoup to parse and extract table data manually
  * **COMPLETE EXTRACTION**: Ensure you extract ALL rows required by the task - don't stop at first row or partial data
  * Save extracted data as CSV/JSON immediately with ALL rows
- **CRITICAL: EXTRACT FROM EXISTING HTML FILES**: When HTML files already exist in the working directory:
  * **DO NOT** just copy or recreate the HTML file - EXTRACT DATA FROM IT IMMEDIATELY
  * **IMMEDIATE EXTRACTION**: Use pandas.read_html() directly on the file path - this extracts ALL tables automatically
  * **TABLE SELECTION**: After extraction, identify the correct table by checking row counts and column names that match task requirements (use LLM reasoning to determine which table contains the needed data)
  * **VERIFY DATA**: After extraction, verify you got the required data (check row counts and data completeness match task requirements)
  * **PROCESS IMMEDIATELY**: Once data is extracted, process it to create required deliverables - DO NOT wait or create placeholder files
  * **NO DUPLICATE HTML FILES**: If an HTML file already exists, extract data from it - do not create another HTML file
  * **WORKFLOW**: HTML file exists → Extract tables with pandas.read_html(file_path) → Find correct table → Process data → Create deliverables → Mark files ready for upload
  * **CRITICAL**: pandas.read_html() works on FILE PATHS, not URLs when custom headers are needed. Always read from saved HTML files that web_search_agent created.
  * **CHECK HTML FIRST**: Before waiting for CSV files or other data formats, check if HTML files exist in the working directory - HTML often contains the data you need
- **FALLBACK HTML PARSING**: If pandas.read_html() fails (missing dependencies, parsing errors), ALWAYS use requests + BeautifulSoup to parse HTML and extract table data manually:
  * **PRINCIPLE: USE BROWSER-LIKE HEADERS**: When making HTTP requests, always include User-Agent header to avoid 403/blocking errors - many sites block requests without proper headers
  * Fetch HTML with requests.get(url, headers=dict with User-Agent set to browser-like string like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36")
  * Parse with BeautifulSoup(html, 'html.parser')
  * Find tables with soup.find_all('table')
  * Extract rows and cells manually
  * Convert to pandas DataFrame or save directly as CSV
- **DEPENDENCY INDEPENDENCE**: BeautifulSoup works without html5lib/lxml - use it as reliable fallback when pandas.read_html() fails
- **GENERIC STRATEGY**: When direct file downloads aren't available:
  1. Check conversation history for fetched HTML first - extract from that
  2. If no HTML in history, fetch HTML page with requests.get(url, headers=dict with User-Agent set to browser-like string like "Mozilla/5.0...") - always include User-Agent header to avoid 403 blocks
  3. Extract tables with pandas.read_html() or BeautifulSoup
  4. Clean and process extracted data
  5. Save as CSV/JSON for analysis
- **MULTIPLE SOURCE ATTEMPTS**: If one source fails, try alternative authoritative sources (different government sites, data portals, official sources)

{CIRCUIT_BREAKER_GUIDANCE}

{FILE_CONTRACT_GUIDANCE}

{DATA_QUALITY_GUIDANCE}

{ERROR_RECOVERY_GUIDANCE}

{WORKFLOW_COORDINATION_GUIDANCE}

{TOOL_FAILURE_DETECTION_GUIDANCE}

{ENHANCED_ERROR_HANDLING_FRAMEWORK}

{SELF_CORRECTION_FRAMEWORK}

{CODE_GENERATION_ERROR_RECOVERY}

{GENERIC_PRESENTATION_REQUIREMENTS}

{ALL_ENTITIES_MANDATORY}
""".strip()



# Agent constants and prompt functions
__all__ = ['get_coder_system_message']
