#!/bin/bash
#
# Nexus Memory - Enhanced Auto Recall Hook (GraphRAG v2)
# Automatically recalls relevant context from Nexus GraphRAG on every user prompt.
# Uses enhanced retrieval with entity extraction, knowledge graph, and episodic memory.
#
# This hook is triggered on UserPromptSubmit to enrich Claude's context with
# relevant memories, entities, facts, and suggestions from past sessions.
#
# Usage (automatic via settings.json):
#   Triggered on every user prompt submission
#
# Environment Variables:
#   NEXUS_API_KEY        - API key for authentication (REQUIRED)
#   NEXUS_API_URL        - API endpoint (default: https://api.adverant.ai)
#   NEXUS_COMPANY_ID     - Company identifier (default: adverant)
#   NEXUS_APP_ID         - Application identifier (default: claude-code)
#   NEXUS_VERBOSE        - Set to 1 for debug output
#   NEXUS_RECALL_LIMIT   - Number of memories to recall (default: 5)
#
# GraphRAG Enhancement Options:
#   NEXUS_INCLUDE_ENTITIES   - Include entity information in output (default: true)
#   NEXUS_INCLUDE_FACTS      - Include extracted facts in output (default: true)
#   NEXUS_INCLUDE_FOLLOWUPS  - Include suggested follow-ups (default: true)
#   NEXUS_GRAPH_DEPTH        - Multi-hop graph traversal depth (default: 2)
#   NEXUS_MAX_TOKENS         - Token budget for recall (default: 3000)
#
# Output:
#   Returns relevant memories, entities, and facts as context for Claude
#

set -o pipefail

# Configuration with environment variable overrides
NEXUS_API_KEY="${NEXUS_API_KEY:-}"
NEXUS_API_URL="${NEXUS_API_URL:-https://api.adverant.ai}"
COMPANY_ID="${NEXUS_COMPANY_ID:-adverant}"
APP_ID="${NEXUS_APP_ID:-claude-code}"
VERBOSE="${NEXUS_VERBOSE:-0}"
RECALL_LIMIT="${NEXUS_RECALL_LIMIT:-5}"

# GraphRAG Enhancement Configuration
INCLUDE_ENTITIES="${NEXUS_INCLUDE_ENTITIES:-true}"
INCLUDE_FACTS="${NEXUS_INCLUDE_FACTS:-true}"
INCLUDE_FOLLOWUPS="${NEXUS_INCLUDE_FOLLOWUPS:-true}"
GRAPH_DEPTH="${NEXUS_GRAPH_DEPTH:-2}"
MAX_TOKENS="${NEXUS_MAX_TOKENS:-3000}"

# Logging function
log() {
  if [[ "$VERBOSE" == "1" ]]; then
    echo "[auto-recall] $1" >&2
  fi
}

log_error() {
  echo "[auto-recall] ERROR: $1" >&2
}

# Skip if no API key (silently - don't block conversation)
if [[ -z "$NEXUS_API_KEY" ]]; then
  log "NEXUS_API_KEY not set, skipping auto-recall"
  exit 0
fi

# Check dependencies silently
if ! command -v jq &> /dev/null; then
  log "jq not installed, skipping auto-recall"
  exit 0
fi

if ! command -v curl &> /dev/null; then
  log "curl not installed, skipping auto-recall"
  exit 0
fi

# Read input from stdin (the user's prompt)
INPUT=$(cat)

if [[ -z "$INPUT" ]]; then
  log "No input provided, skipping"
  exit 0
fi

log "Received prompt: ${INPUT:0:100}..."

# Extract the user's prompt/query
QUERY=$(echo "$INPUT" | jq -r '.prompt // .content // .tool_input.command // empty' 2>/dev/null)

# Skip if no query content
if [[ -z "$QUERY" ]] || [[ "$QUERY" == "null" ]]; then
  log "No query content, skipping"
  exit 0
fi

# Truncate very long queries for recall
if [[ ${#QUERY} -gt 500 ]]; then
  QUERY="${QUERY:0:500}"
  log "Query truncated to 500 characters for recall"
fi

# Get current project for context
PROJECT_NAME=$(basename "$(pwd)")
PROJECT_DIR=$(pwd)

log "Query: ${QUERY:0:100}..."
log "Project: $PROJECT_NAME"
log "Limit: $RECALL_LIMIT"

# Build the enhanced payload for GraphRAG retrieval
PAYLOAD=$(jq -n \
  --arg query "$QUERY" \
  --argjson limit "$RECALL_LIMIT" \
  --arg project "$PROJECT_NAME" \
  --arg projectDir "$PROJECT_DIR" \
  --argjson includeEntities "$INCLUDE_ENTITIES" \
  --argjson includeFacts "$INCLUDE_FACTS" \
  --argjson includeFollowups "$INCLUDE_FOLLOWUPS" \
  --argjson graphDepth "$GRAPH_DEPTH" \
  --argjson maxTokens "$MAX_TOKENS" \
  '{
    query: $query,
    limit: $limit,
    context: {
      project: $project,
      projectDir: $projectDir
    },
    filters: {
      project: $project
    },
    include_episodic: true,
    include_documents: true,
    include_entities: $includeEntities,
    include_facts: $includeFacts,
    include_followups: $includeFollowups,
    enable_graph_traversal: true,
    graph_depth: $graphDepth,
    max_tokens: $maxTokens,
    hybrid_search: true,
    rerank: true
  }')

# Use the UNIFIED /api/memory endpoint for all memory operations
# This single endpoint handles both store (with content) and recall (with query)
ENDPOINT="$NEXUS_API_URL/api/memory"
log "Recalling from unified endpoint: $ENDPOINT"

# Search GraphRAG for relevant memories via unified memory endpoint
# Use short timeout to not block conversation
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$ENDPOINT" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $NEXUS_API_KEY" \
  -H "X-Company-ID: $COMPANY_ID" \
  -H "X-App-ID: $APP_ID" \
  -H "X-User-ID: ${USER:-unknown}" \
  -d "$PAYLOAD" \
  --max-time 5 2>&1)

# Parse response
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | sed '$d')

# Check for errors (silently fail - don't block conversation)
if [[ "$HTTP_CODE" != "200" ]]; then
  if [[ "$VERBOSE" == "1" ]]; then
    echo "⚠️  Auto-recall failed (HTTP $HTTP_CODE), continuing without context" >&2

    if [[ "$HTTP_CODE" == "401" ]]; then
      echo "   Authentication failed - check NEXUS_API_KEY" >&2
    fi
  fi
  exit 0  # Don't block conversation on recall failure
fi

# Validate JSON response
if ! echo "$BODY" | jq . &>/dev/null; then
  log "Invalid JSON response, continuing without context"
  exit 0
fi

# Extract data from response (handles both enhanced and basic response structures)
# Handle Gateway wrapper (.data.results) and direct response
RESULT=$(echo "$BODY" | jq 'if .data.results then .data.results else . end' 2>/dev/null)
MEMORIES=$(echo "$RESULT" | jq -r '.unified_memories // .memories // []' 2>/dev/null)
ENTITIES=$(echo "$RESULT" | jq -r '.entities_mentioned // .entities // []' 2>/dev/null)
FACTS=$(echo "$RESULT" | jq -r '.relevant_facts // .facts // []' 2>/dev/null)
EPISODIC=$(echo "$RESULT" | jq -r '.episodic_context // []' 2>/dev/null)
FOLLOWUPS=$(echo "$RESULT" | jq -r '.suggested_followups // []' 2>/dev/null)

MEMORY_COUNT=$(echo "$MEMORIES" | jq 'length' 2>/dev/null || echo "0")
ENTITY_COUNT=$(echo "$ENTITIES" | jq 'length' 2>/dev/null || echo "0")
FACT_COUNT=$(echo "$FACTS" | jq 'length' 2>/dev/null || echo "0")
EPISODIC_COUNT=$(echo "$EPISODIC" | jq 'length' 2>/dev/null || echo "0")

# Log success in verbose mode
if [[ "$VERBOSE" == "1" ]]; then
  if [[ "$MEMORY_COUNT" != "0" ]] || [[ "$ENTITY_COUNT" != "0" ]]; then
    echo "✅ Auto-recall: $MEMORY_COUNT memories, $ENTITY_COUNT entities injected" >&2
  fi
fi

log "Found $MEMORY_COUNT memories, $ENTITY_COUNT entities, $FACT_COUNT facts, $EPISODIC_COUNT episodic"

# Skip if nothing to show
if [[ "$MEMORY_COUNT" == "0" ]] && [[ "$ENTITY_COUNT" == "0" ]] && [[ "$FACT_COUNT" == "0" ]]; then
  log "No relevant context found"
  exit 0
fi

# Token budget management
# Estimate tokens used (4 chars ~= 1 token)
# Max output tokens is MAX_TOKENS (default 3000)
TOKENS_USED=0
MAX_MEMORY_TOKENS=$((MAX_TOKENS * 60 / 100))  # 60% for memories
MAX_ENTITY_TOKENS=$((MAX_TOKENS * 15 / 100))  # 15% for entities
MAX_FACT_TOKENS=$((MAX_TOKENS * 15 / 100))    # 15% for facts
MAX_EPISODIC_TOKENS=$((MAX_TOKENS * 10 / 100)) # 10% for episodic

log "Token budget: memories=$MAX_MEMORY_TOKENS, entities=$MAX_ENTITY_TOKENS, facts=$MAX_FACT_TOKENS"

# Format output as rich context for Claude
echo ""
echo "<nexus-memory-context>"
echo ""

# Output memories (with token budget)
if [[ "$MEMORY_COUNT" -gt 0 ]] && [[ "$MEMORY_COUNT" != "null" ]]; then
  echo "## Relevant Memories"
  # Process memories with token awareness
  MEMORY_OUTPUT=""
  MEMORY_TOKENS=0
  MAX_CHARS=$((MAX_MEMORY_TOKENS * 4))  # ~4 chars per token

  while IFS= read -r line; do
    LINE_LEN=${#line}
    LINE_TOKENS=$((LINE_LEN / 4))
    if (( MEMORY_TOKENS + LINE_TOKENS > MAX_MEMORY_TOKENS )); then
      break
    fi
    echo "$line"
    MEMORY_TOKENS=$((MEMORY_TOKENS + LINE_TOKENS))
  done < <(echo "$MEMORIES" | jq -r '.[] |
    "- [\(.metadata.eventType // .metadata.contentClass // "context")] \(.content | if length > 250 then .[0:250] + "..." else . end)"
  ' 2>/dev/null | head -n 8)
  echo ""
  log "Memory tokens used: $MEMORY_TOKENS"
fi

# Output entities (knowledge graph nodes)
if [[ "$ENTITY_COUNT" -gt 0 ]] && [[ "$ENTITY_COUNT" != "null" ]] && [[ "$INCLUDE_ENTITIES" == "true" ]]; then
  echo "## Entities Mentioned"
  echo "$ENTITIES" | jq -r '.[:5] | .[] |
    if type == "object" then
      "- \(.name // .entity) (\(.type // "unknown"))\(if .related_count then " - \(.related_count) related memories" else "" end)"
    else
      "- \(.)"
    end
  ' 2>/dev/null
  echo ""
fi

# Output facts (extracted knowledge)
if [[ "$FACT_COUNT" -gt 0 ]] && [[ "$FACT_COUNT" != "null" ]] && [[ "$INCLUDE_FACTS" == "true" ]]; then
  echo "## Key Facts"
  echo "$FACTS" | jq -r '.[:5] | .[] |
    if type == "object" then
      "- \(.fact // .content)"
    else
      "- \(.)"
    end
  ' 2>/dev/null
  echo ""
fi

# Output episodic context (recent session activity)
if [[ "$EPISODIC_COUNT" -gt 0 ]] && [[ "$EPISODIC_COUNT" != "null" ]]; then
  echo "## Recent Session Context"
  echo "$EPISODIC" | jq -r '.[:3] | .[] |
    "- \(.summary // .content | if length > 200 then .[0:200] + "..." else . end)"
  ' 2>/dev/null
  echo ""
fi

# Output active beads (if bd is available and initialized)
BD_BIN="${HOME}/.local/bin/bd"
BD_CMD=""
if [[ -x "$BD_BIN" ]]; then
  BD_CMD="$BD_BIN"
elif command -v bd &> /dev/null; then
  BD_CMD="bd"
fi

if [[ -n "$BD_CMD" ]]; then
  # Check if beads is initialized in this directory
  if [[ -d ".beads" ]] || "$BD_CMD" list &>/dev/null 2>&1; then
    ACTIVE_BEADS=$("$BD_CMD" list --json 2>/dev/null | jq -c '[.[] | select(.status == "open" or .status == "in_progress")]' 2>/dev/null || echo "[]")
    ACTIVE_COUNT=$(echo "$ACTIVE_BEADS" | jq 'length' 2>/dev/null || echo "0")

    if [[ "$ACTIVE_COUNT" -gt 0 ]] && [[ "$ACTIVE_COUNT" != "0" ]]; then
      echo "## Active Beads"
      echo "$ACTIVE_BEADS" | jq -r '.[:5] | .[] |
        "- [\(.id // .ID)] \(.title // .Title) (\(.status // .Status))"
      ' 2>/dev/null
      echo ""
    fi

    # Show ready work (no blockers)
    READY_BEADS=$("$BD_CMD" ready --json 2>/dev/null || echo "[]")
    READY_COUNT=$(echo "$READY_BEADS" | jq 'length' 2>/dev/null || echo "0")

    if [[ "$READY_COUNT" -gt 0 ]] && [[ "$READY_COUNT" != "0" ]]; then
      echo "## Ready Work (No Blockers)"
      echo "$READY_BEADS" | jq -r '.[:3] | .[] |
        "- [\(.id // .ID)] \(.title // .Title)"
      ' 2>/dev/null
      echo ""
    fi
  fi
fi

# Output suggested follow-ups
FOLLOWUP_COUNT=$(echo "$FOLLOWUPS" | jq 'length' 2>/dev/null || echo "0")
if [[ "$FOLLOWUP_COUNT" -gt 0 ]] && [[ "$FOLLOWUP_COUNT" != "null" ]] && [[ "$INCLUDE_FOLLOWUPS" == "true" ]]; then
  echo "## Suggested Context to Explore"
  echo "$FOLLOWUPS" | jq -r '.[:3] | .[] |
    if type == "object" then
      "- \(.suggestion // .query)"
    else
      "- \(.)"
    end
  ' 2>/dev/null
  echo ""
fi

# === AUTO-INGEST STATUS SECTION ===
AUTO_INGEST_STATE="${HOME}/.claude/session-env/auto-ingest"

# Show recently completed ingestions (cleared after display)
if [[ -f "${AUTO_INGEST_STATE}/recent_completions.json" ]]; then
  COMPLETIONS=$(cat "${AUTO_INGEST_STATE}/recent_completions.json" 2>/dev/null || echo "[]")
  COMPLETION_COUNT=$(echo "$COMPLETIONS" | jq 'length' 2>/dev/null || echo "0")

  if [[ "$COMPLETION_COUNT" -gt 0 ]] && [[ "$COMPLETION_COUNT" != "0" ]]; then
    echo "## Recently Ingested Files (Ready for Recall)"
    echo "$COMPLETIONS" | jq -r '.[] |
      if .status == "completed" then
        "- ✅ \(.path | split("/") | .[-1]) - now searchable\(if .entityCount > 0 then " (\(.entityCount) entities)" else "" end)"
      else
        "- ❌ \(.path | split("/") | .[-1]) - ingestion failed"
      end
    ' 2>/dev/null
    echo ""
    # Clear after display
    echo "[]" > "${AUTO_INGEST_STATE}/recent_completions.json"
  fi
fi

# Show pending ingestions
if [[ -f "${AUTO_INGEST_STATE}/pending_jobs.json" ]]; then
  PENDING=$(cat "${AUTO_INGEST_STATE}/pending_jobs.json" 2>/dev/null || echo "[]")
  PENDING_COUNT=$(echo "$PENDING" | jq 'length' 2>/dev/null || echo "0")

  if [[ "$PENDING_COUNT" -gt 0 ]] && [[ "$PENDING_COUNT" != "0" ]]; then
    echo "## Files Currently Being Processed"
    echo "$PENDING" | jq -r '.[] | "- ⏳ \(.path | split("/") | .[-1]) - processing..."' 2>/dev/null
    echo ""
  fi
fi

# Document freshness alerts (files ingested > 30 days ago)
FRESHNESS_DAYS="${NEXUS_FRESHNESS_DAYS:-30}"
if [[ -f "${AUTO_INGEST_STATE}/ingested_files.json" ]]; then
  STALE_THRESHOLD=$((FRESHNESS_DAYS * 24 * 60 * 60))  # Convert days to seconds
  NOW=$(date +%s)

  # Find stale documents (completed but old)
  STALE_DOCS=$(jq --argjson now "$NOW" --argjson thresh "$STALE_THRESHOLD" '
    to_entries | map(select(
      (.value.status == "completed") and
      (.value.ingestedAt != null) and
      (($now - (.value.ingestedAt | if type == "string" then (. | split("T")[0] | strptime("%Y-%m-%d") | mktime) else 0 end)) > $thresh)
    )) | .[0:3]
  ' "${AUTO_INGEST_STATE}/ingested_files.json" 2>/dev/null || echo "[]")

  STALE_COUNT=$(echo "$STALE_DOCS" | jq 'length' 2>/dev/null || echo "0")

  if [[ "$STALE_COUNT" -gt 0 ]] && [[ "$STALE_COUNT" != "0" ]]; then
    echo "## Document Freshness Alerts"
    echo "$STALE_DOCS" | jq -r --argjson days "$FRESHNESS_DAYS" '.[] |
      "- ⚠️ \(.key | split("/") | .[-1]) - ingested >\($days) days ago, may be outdated"
    ' 2>/dev/null
    echo ""
  fi
fi

echo "</nexus-memory-context>"

exit 0