#!/bin/bash
#
# Nexus Memory - Auto Ingest Hook
# Automatically detects files from Read tool usage or prompt mentions and queues
# them for background ingestion into GraphRAG.
#
# Triggers:
#   - PostToolUse: When Claude uses the Read tool to access a file
#   - UserPromptSubmit: When user mentions file paths in their prompt
#
# Features:
#   - Automatic file path detection from tool input and prompts
#   - Deduplication via mtime-based freshness tracking
#   - Background ingestion (non-blocking)
#   - Excluded directory filtering (node_modules, .git, etc.)
#   - Context injection for immediate user feedback
#
# Environment Variables:
#   NEXUS_API_KEY           - API key for authentication (REQUIRED)
#   NEXUS_AUTO_INGEST       - Enable/disable auto-ingestion (default: 1)
#   NEXUS_INGEST_VERBOSE    - Enable debug output (default: 0)
#   NEXUS_FRESHNESS_DAYS    - Days before re-ingesting a file (default: 30)
#

set -o pipefail

# Configuration
NEXUS_API_KEY="${NEXUS_API_KEY:-}"
AUTO_INGEST="${NEXUS_AUTO_INGEST:-1}"
VERBOSE="${NEXUS_INGEST_VERBOSE:-${NEXUS_VERBOSE:-0}}"
FRESHNESS_DAYS="${NEXUS_FRESHNESS_DAYS:-30}"

# State directory
STATE_DIR="${HOME}/.claude/session-env/auto-ingest"
INGESTED_FILES="${STATE_DIR}/ingested_files.json"
PENDING_JOBS="${STATE_DIR}/pending_jobs.json"
NOTIFIER_PID="${STATE_DIR}/notifier.pid"

# Excluded directories (skip files in these paths)
EXCLUDED_DIRS=(
  "node_modules"
  ".git"
  "__pycache__"
  "dist"
  "build"
  "vendor"
  ".venv"
  "venv"
  ".next"
  ".nuxt"
  "coverage"
  ".cache"
  ".npm"
  ".yarn"
  "bower_components"
  "target"
  "out"
  ".gradle"
  ".idea"
  ".vscode"
)

# Logging functions
log() {
  if [[ "$VERBOSE" == "1" ]]; then
    echo "[auto-ingest] $1" >&2
  fi
}

log_error() {
  echo "[auto-ingest] ERROR: $1" >&2
}

# Skip if auto-ingest is disabled
if [[ "$AUTO_INGEST" != "1" ]]; then
  log "Auto-ingest disabled (NEXUS_AUTO_INGEST=$AUTO_INGEST)"
  exit 0
fi

# Skip if no API key (silently - don't block conversation)
if [[ -z "$NEXUS_API_KEY" ]]; then
  log "NEXUS_API_KEY not set, skipping auto-ingest"
  exit 0
fi

# Check dependencies silently
if ! command -v jq &> /dev/null; then
  log "jq not installed, skipping auto-ingest"
  exit 0
fi

# Initialize state directory
init_state() {
  mkdir -p "$STATE_DIR"
  [[ -f "$INGESTED_FILES" ]] || echo "{}" > "$INGESTED_FILES"
  [[ -f "$PENDING_JOBS" ]] || echo "[]" > "$PENDING_JOBS"
}

# Check if file is in an excluded directory
is_excluded() {
  local file_path="$1"

  for dir in "${EXCLUDED_DIRS[@]}"; do
    if [[ "$file_path" == *"/$dir/"* ]] || [[ "$file_path" == *"/$dir" ]]; then
      log "Skipping excluded directory: $dir"
      return 0
    fi
  done

  return 1
}

# Get file modification time (cross-platform)
get_mtime() {
  local file_path="$1"
  if [[ "$(uname)" == "Darwin" ]]; then
    stat -f %m "$file_path" 2>/dev/null
  else
    stat -c %Y "$file_path" 2>/dev/null
  fi
}

# Check if file needs ingestion (not already ingested or modified since)
needs_ingestion() {
  local file_path="$1"

  # File must exist
  if [[ ! -f "$file_path" ]]; then
    log "File not found: $file_path"
    return 1
  fi

  # Check if already pending
  local is_pending=$(jq -r --arg p "$file_path" '[.[] | select(.path == $p)] | length' "$PENDING_JOBS" 2>/dev/null || echo "0")
  if [[ "$is_pending" -gt 0 ]]; then
    log "File already pending: $file_path"
    return 1
  fi

  # Get current mtime
  local current_mtime=$(get_mtime "$file_path")
  if [[ -z "$current_mtime" ]]; then
    log "Cannot get mtime for: $file_path"
    return 1
  fi

  # Check ingested files index
  local cached=$(jq -r --arg p "$file_path" '.[$p] // empty' "$INGESTED_FILES" 2>/dev/null)

  if [[ -z "$cached" ]]; then
    log "File never ingested: $file_path"
    return 0  # Never ingested, needs ingestion
  fi

  local cached_mtime=$(echo "$cached" | jq -r '.mtime // 0')
  local cached_status=$(echo "$cached" | jq -r '.status // "unknown"')

  # Re-ingest if: modified since last ingest, or previous attempt failed
  if [[ "$current_mtime" != "$cached_mtime" ]]; then
    log "File modified since last ingest: $file_path (old: $cached_mtime, new: $current_mtime)"
    return 0
  fi

  if [[ "$cached_status" == "failed" ]]; then
    log "Previous ingestion failed, retrying: $file_path"
    return 0
  fi

  log "File unchanged and already ingested: $file_path"
  return 1
}

# Extract file paths from Read tool input
extract_read_tool_path() {
  local input="$1"

  # Check if this is a Read tool PostToolUse event
  local tool_name=$(echo "$input" | jq -r '.tool_name // empty' 2>/dev/null)

  if [[ "$tool_name" == "Read" ]]; then
    # Extract file_path from tool_input
    local file_path=$(echo "$input" | jq -r '.tool_input.file_path // empty' 2>/dev/null)
    if [[ -n "$file_path" ]] && [[ "$file_path" != "null" ]]; then
      echo "$file_path"
    fi
  fi
}

# Extract file paths from prompt text using regex
extract_prompt_paths() {
  local prompt="$1"

  # Match absolute paths: /foo/bar/file.ext
  # Match home paths: ~/project/file.ext
  # Match relative paths: ./foo/bar.ts, ../src/index.js
  # Require file extension to reduce false positives
  echo "$prompt" | grep -oE '(~|\.{0,2})?/[A-Za-z0-9_./+-]+\.[A-Za-z0-9]+' 2>/dev/null | while read -r path; do
    # Expand ~ to home directory
    if [[ "$path" == ~* ]]; then
      path="${path/#\~/$HOME}"
    fi

    # Resolve relative paths
    if [[ "$path" == ./* ]] || [[ "$path" == ../* ]]; then
      path=$(cd "$(dirname "$path")" 2>/dev/null && pwd)/$(basename "$path")
    fi

    # Only output if file exists
    if [[ -f "$path" ]]; then
      echo "$path"
    fi
  done | sort -u
}

# Queue file for background ingestion
queue_for_ingestion() {
  local file_path="$1"
  local file_name=$(basename "$file_path")

  log "Queueing for ingestion: $file_path"

  # Call upload-document.sh in background mode
  local upload_script="${HOME}/.claude/hooks/upload-document.sh"

  if [[ ! -x "$upload_script" ]]; then
    log_error "upload-document.sh not found or not executable"
    return 1
  fi

  # Run in background (fire-and-forget)
  (
    "$upload_script" "$file_path" --background 2>/dev/null
  ) &
  disown 2>/dev/null || true

  return 0
}

# Output context injection for user feedback
emit_context() {
  local files_queued="$1"
  local count=$(echo "$files_queued" | wc -l | tr -d ' ')

  if [[ -n "$files_queued" ]] && [[ "$count" -gt 0 ]]; then
    echo ""
    echo "<nexus-auto-ingest>"
    echo "## Files Queued for Memory Ingestion"
    echo "$files_queued" | while read -r path; do
      if [[ -n "$path" ]]; then
        echo "- $(basename "$path") - processing in background..."
      fi
    done
    echo ""
    echo "Files will be searchable via recall after processing completes."
    echo "</nexus-auto-ingest>"
  fi
}

# Main execution
main() {
  init_state

  # Read input from stdin
  INPUT=$(cat)

  if [[ -z "$INPUT" ]]; then
    log "No input provided"
    exit 0
  fi

  log "Received input: ${INPUT:0:200}..."

  # Collect file paths to process
  FILES_TO_INGEST=""

  # 1. Check for Read tool usage (PostToolUse)
  local read_path=$(extract_read_tool_path "$INPUT")
  if [[ -n "$read_path" ]]; then
    log "Detected Read tool file: $read_path"

    if ! is_excluded "$read_path" && needs_ingestion "$read_path"; then
      FILES_TO_INGEST="$read_path"
    fi
  fi

  # 2. Check for file paths in prompt (UserPromptSubmit)
  local prompt=$(echo "$INPUT" | jq -r '.prompt // .content // empty' 2>/dev/null)
  if [[ -n "$prompt" ]] && [[ "$prompt" != "null" ]]; then
    log "Scanning prompt for file paths..."

    while IFS= read -r path; do
      if [[ -n "$path" ]]; then
        log "Detected prompt file: $path"

        if ! is_excluded "$path" && needs_ingestion "$path"; then
          if [[ -n "$FILES_TO_INGEST" ]]; then
            FILES_TO_INGEST="${FILES_TO_INGEST}"$'\n'"${path}"
          else
            FILES_TO_INGEST="$path"
          fi
        fi
      fi
    done < <(extract_prompt_paths "$prompt")
  fi

  # Remove duplicates
  if [[ -n "$FILES_TO_INGEST" ]]; then
    FILES_TO_INGEST=$(echo "$FILES_TO_INGEST" | sort -u)
  fi

  # Queue files for ingestion
  QUEUED_FILES=""
  if [[ -n "$FILES_TO_INGEST" ]]; then
    while IFS= read -r file_path; do
      if [[ -n "$file_path" ]]; then
        if queue_for_ingestion "$file_path"; then
          if [[ -n "$QUEUED_FILES" ]]; then
            QUEUED_FILES="${QUEUED_FILES}"$'\n'"${file_path}"
          else
            QUEUED_FILES="$file_path"
          fi
        fi
      fi
    done <<< "$FILES_TO_INGEST"
  fi

  # Emit context for user feedback
  emit_context "$QUEUED_FILES"

  exit 0
}

main
