#!/bin/bash
#
# Nexus Memory - Optimized Episode Summary Hook (GraphRAG v2)
# Captures significant tool uses and generates periodic episode summaries.
#
# PERFORMANCE OPTIMIZED:
# - Smart tool filtering (skip routine operations)
# - Significance scoring for tools
# - Adaptive episode thresholds based on activity
# - Compressed episode summaries for efficiency
# - Async storage for non-blocking operation
#
# Episodes capture:
# - Significant tool uses only
# - Project context and decisions made
# - Causal relationships between actions
# - Extracted entities for knowledge graph
#
# Usage:
#   echo '{"tool_name": "Bash", "tool_input": {...}, "tool_output": "..."}' | episode-summary.sh
#
# Environment Variables:
#   NEXUS_API_KEY        - API key for authentication (REQUIRED)
#   NEXUS_API_URL        - API endpoint (default: https://api.adverant.ai)
#   NEXUS_COMPANY_ID     - Company identifier (default: adverant)
#   NEXUS_APP_ID         - Application identifier (default: claude-code)
#   NEXUS_VERBOSE        - Set to 1 for debug output
#   NEXUS_EPISODE_THRESHOLD - Tool count threshold for episode summary (default: 10)
#
# GraphRAG Enhancement Options:
#   NEXUS_EXTRACT_ENTITIES  - Enable entity extraction (default: true)
#   NEXUS_CREATE_RELATIONS  - Create knowledge graph relationships (default: true)
#   NEXUS_MIN_SIGNIFICANCE  - Minimum significance to store tool use (default: 40)
#

set -o pipefail

# Configuration with environment variable overrides
NEXUS_API_KEY="${NEXUS_API_KEY:-}"
NEXUS_API_URL="${NEXUS_API_URL:-https://api.adverant.ai}"
COMPANY_ID="${NEXUS_COMPANY_ID:-adverant}"
APP_ID="${NEXUS_APP_ID:-claude-code}"
VERBOSE="${NEXUS_VERBOSE:-0}"
EPISODE_THRESHOLD="${NEXUS_EPISODE_THRESHOLD:-10}"

# GraphRAG Enhancement Configuration
EXTRACT_ENTITIES="${NEXUS_EXTRACT_ENTITIES:-true}"
CREATE_RELATIONS="${NEXUS_CREATE_RELATIONS:-true}"
MIN_SIGNIFICANCE="${NEXUS_MIN_SIGNIFICANCE:-40}"

# State file for tracking
STATE_DIR="${HOME}/.claude/session-env"
COUNTER_FILE="${STATE_DIR}/episode_counter"
LAST_TOOL_ID_FILE="${STATE_DIR}/last_tool_id"
SESSION_TOOLS_FILE="${STATE_DIR}/session_tools"
LAST_EPISODE_FILE="${STATE_DIR}/last_episode_id"

# Logging function
log() {
  if [[ "$VERBOSE" == "1" ]]; then
    echo "[episode-summary] $1" >&2
  fi
}

log_error() {
  echo "[episode-summary] ERROR: $1" >&2
}

# =========================================================
# FAST PATH: Early exit checks
# =========================================================

# Skip if no API key
if [[ -z "$NEXUS_API_KEY" ]]; then
  exit 0
fi

# Fast dependency check
type jq &>/dev/null || exit 0
type curl &>/dev/null || exit 0

# Ensure state directory exists
mkdir -p "$STATE_DIR" 2>/dev/null

# Read input from stdin
INPUT=$(cat)

if [[ -z "$INPUT" ]]; then
  exit 0
fi

log "Received input: ${INPUT:0:100}..."

# =========================================================
# INPUT PARSING
# =========================================================

# Extract fields with single jq call
read -r TOOL_NAME FORCE_SUMMARY SESSION_END < <(echo "$INPUT" | jq -r '[
  (.tool_name // ""),
  (.force // "false"),
  (.session_end // "false")
] | @tsv' 2>/dev/null || echo "	false	false")

TOOL_INPUT=$(echo "$INPUT" | jq -c '.tool_input // {}' 2>/dev/null)
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // ""' 2>/dev/null)
CONTENT=$(echo "$INPUT" | jq -r '.content // .conversation_summary // .prompt // ""' 2>/dev/null)

# Get project context
PROJECT_NAME=$(basename "$(pwd)")
PROJECT_DIR=$(pwd)
TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
INTERACTION_ID="${PROJECT_NAME}-$(date +%s)-$$"

# =========================================================
# TOOL SIGNIFICANCE SCORING
# =========================================================

# Tool categories with base significance scores
get_tool_significance() {
  local tool="$1"
  local input="$2"
  local output="$3"
  local score=50  # Base score

  # High-value tools (creation/modification)
  case "$tool" in
    Write|Edit|NotebookEdit)
      score=80  # File modifications are significant
      ;;
    Bash)
      # Analyze the command
      local cmd=$(echo "$input" | jq -r '.command // ""' 2>/dev/null)

      # Skip routine read commands
      if echo "$cmd" | grep -qE '^(ls|pwd|cat|head|tail|wc|find|grep|which|echo|date|whoami|type|file)(\s|$)'; then
        score=10
      # Skip routine git read commands
      elif echo "$cmd" | grep -qE '^git\s+(status|log|diff|branch|show|remote|fetch)(\s|$)'; then
        score=15
      # High-value git commands
      elif echo "$cmd" | grep -qE '^git\s+(commit|push|merge|rebase|checkout|reset)(\s|$)'; then
        score=85
      # Package management
      elif echo "$cmd" | grep -qE '^(npm|yarn|pnpm)\s+(install|add|remove|run build|run test)'; then
        score=70
      # Docker commands
      elif echo "$cmd" | grep -qE '^docker\s+(build|run|push|pull|compose)'; then
        score=75
      # kubectl commands
      elif echo "$cmd" | grep -qE '^(kubectl|k3s kubectl)\s+(apply|delete|create|rollout)'; then
        score=80
      else
        score=40  # Unknown commands get medium score
      fi
      ;;
    TodoWrite)
      score=60  # Task management
      ;;
    WebFetch|WebSearch)
      score=30  # Research/lookup
      ;;
    Read|Glob|Grep)
      score=10  # Read-only operations
      ;;
    Skill)
      score=75  # Skill invocations are significant
      ;;
    *)
      score=50  # Unknown tools get base score
      ;;
  esac

  # Output-based adjustments
  if [[ -n "$output" ]]; then
    # Errors are always significant
    if echo "$output" | grep -qiE '(error|exception|failed|failure)'; then
      score=$((score + 20))
    fi

    # Success messages boost significance
    if echo "$output" | grep -qiE '(success|completed|created|fixed|resolved)'; then
      score=$((score + 15))
    fi
  fi

  # Clamp to 0-100
  if [[ "$score" -lt 0 ]]; then score=0; fi
  if [[ "$score" -gt 100 ]]; then score=100; fi

  echo "$score"
}

# =========================================================
# TOOL PROCESSING
# =========================================================

if [[ -n "$TOOL_NAME" ]] && [[ "$TOOL_NAME" != "null" ]]; then
  # Calculate tool significance
  SIGNIFICANCE=$(get_tool_significance "$TOOL_NAME" "$TOOL_INPUT" "$TOOL_OUTPUT")
  log "Tool: $TOOL_NAME, Significance: $SIGNIFICANCE (min: $MIN_SIGNIFICANCE)"

  # Get current counter
  CURRENT_COUNT=0
  if [[ -f "$COUNTER_FILE" ]]; then
    CURRENT_COUNT=$(cat "$COUNTER_FILE" 2>/dev/null || echo "0")
  fi

  # Always increment counter
  NEW_COUNT=$((CURRENT_COUNT + 1))
  echo "$NEW_COUNT" > "$COUNTER_FILE"

  # Skip storing low-significance tools
  if [[ "$SIGNIFICANCE" -lt "$MIN_SIGNIFICANCE" ]]; then
    log "Tool below significance threshold, skipping storage (count: $NEW_COUNT)"
  else
    log "Storing significant tool use: $TOOL_NAME"

    # Truncate long outputs
    if [[ ${#TOOL_OUTPUT} -gt 800 ]]; then
      TOOL_OUTPUT="${TOOL_OUTPUT:0:800}... [truncated]"
    fi

    # Get previous tool ID for causal chaining
    PREV_TOOL_ID=""
    if [[ -f "$LAST_TOOL_ID_FILE" ]]; then
      PREV_TOOL_ID=$(cat "$LAST_TOOL_ID_FILE" 2>/dev/null)
    fi

    # Determine domain
    DOMAIN="code"
    case "$TOOL_NAME" in
      WebFetch|WebSearch) DOMAIN="web" ;;
      TodoWrite) DOMAIN="planning" ;;
    esac

    # Build compact tool content
    TOOL_CONTENT="[Tool] $TOOL_NAME (sig: $SIGNIFICANCE)
Project: $PROJECT_NAME
Input: ${TOOL_INPUT:0:400}
Output: $TOOL_OUTPUT"

    # Build payload
    TOOL_PAYLOAD=$(jq -n \
      --arg content "$TOOL_CONTENT" \
      --arg tool "$TOOL_NAME" \
      --arg project "$PROJECT_NAME" \
      --arg timestamp "$TIMESTAMP" \
      --arg interactionId "$INTERACTION_ID" \
      --arg prevToolId "$PREV_TOOL_ID" \
      --argjson significance "$SIGNIFICANCE" \
      --arg domain "$DOMAIN" \
      --argjson extractEntities "$EXTRACT_ENTITIES" \
      --argjson createRelations "$CREATE_RELATIONS" \
      '{
        content: $content,
        tags: ["claude-code", "type:tool-use", "tool:\($tool)", "project:\($project)", "sig:\($significance)"],
        metadata: {
          eventType: "tool-use",
          toolName: $tool,
          projectName: $project,
          timestamp: $timestamp,
          interactionId: $interactionId,
          significance: $significance
        },
        extract_entities: $extractEntities,
        entity_types: ["code_pattern", "error", "file", "function", "command"],
        domain: $domain,
        create_relationships: $createRelations,
        episodic: {
          type: "tool_use",
          interaction_id: $interactionId,
          causal_context: (if $prevToolId != "" then $prevToolId else null end)
        }
      }')

    # Store tool use asynchronously
    (
      RESPONSE=$(curl -s -X POST "$NEXUS_API_URL/api/memory/store" \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $NEXUS_API_KEY" \
        -H "X-Company-ID: $COMPANY_ID" \
        -H "X-App-ID: $APP_ID" \
        -H "X-User-ID: ${USER:-unknown}" \
        -H "Connection: keep-alive" \
        -d "$TOOL_PAYLOAD" \
        --connect-timeout 2 \
        --max-time 5 2>/dev/null)

      # Save this tool's memory ID for causal chaining
      MEMORY_ID=$(echo "$RESPONSE" | jq -r '.data.memoryId // .memoryId // empty' 2>/dev/null)
      if [[ -n "$MEMORY_ID" ]]; then
        echo "$MEMORY_ID" > "$LAST_TOOL_ID_FILE"
      fi

      # Append to session tools log (compact format)
      echo "$TOOL_NAME:$SIGNIFICANCE:$(date +%H%M%S)" >> "$SESSION_TOOLS_FILE"
    ) &

    log "Tool use stored"
  fi
else
  # No tool - just get counter
  if [[ -f "$COUNTER_FILE" ]]; then
    CURRENT_COUNT=$(cat "$COUNTER_FILE" 2>/dev/null || echo "0")
  else
    CURRENT_COUNT=0
  fi
  NEW_COUNT=$((CURRENT_COUNT + 1))
  echo "$NEW_COUNT" > "$COUNTER_FILE"
fi

# =========================================================
# EPISODE SUMMARY GENERATION
# =========================================================

# Determine if we should generate an episode summary
SHOULD_SUMMARIZE=false

if [[ "$FORCE_SUMMARY" == "true" ]]; then
  SHOULD_SUMMARIZE=true
  log "Forced summary requested"
elif [[ "$SESSION_END" == "true" ]]; then
  SHOULD_SUMMARIZE=true
  log "Session end summary"
elif [[ "$NEW_COUNT" -ge "$EPISODE_THRESHOLD" ]]; then
  SHOULD_SUMMARIZE=true
  log "Threshold reached ($NEW_COUNT >= $EPISODE_THRESHOLD)"
  echo "0" > "$COUNTER_FILE"
fi

# Exit if we shouldn't summarize
if [[ "$SHOULD_SUMMARIZE" != "true" ]]; then
  log "Not generating episode yet (count: $NEW_COUNT/$EPISODE_THRESHOLD)"
  exit 0
fi

# =========================================================
# BUILD EPISODE SUMMARY
# =========================================================

SESSION_ID=$(date +%Y%m%d-%H%M%S)

# Read session tools for summary
SESSION_TOOLS=""
SIGNIFICANT_TOOLS=""
if [[ -f "$SESSION_TOOLS_FILE" ]]; then
  SESSION_TOOLS=$(cat "$SESSION_TOOLS_FILE" 2>/dev/null | tail -30)
  # Count high-significance tools (sig >= 60)
  SIGNIFICANT_TOOLS=$(echo "$SESSION_TOOLS" | awk -F: '$2 >= 60 {print $1}' | sort | uniq -c | sort -rn | head -5)
  # Clear the session tools file
  echo "" > "$SESSION_TOOLS_FILE"
fi

# Build content from session activity if not provided
if [[ -z "$CONTENT" ]] || [[ "$CONTENT" == "null" ]]; then
  if [[ -n "$SESSION_TOOLS" ]]; then
    CONTENT="Session activity in $PROJECT_NAME"
  else
    CONTENT="Conversation segment in $PROJECT_NAME project"
  fi
fi

# Truncate very long content
if [[ ${#CONTENT} -gt 3000 ]]; then
  CONTENT="${CONTENT:0:3000}... [truncated]"
fi

# Count tools by significance
TOTAL_TOOLS=$(echo "$SESSION_TOOLS" | grep -c ':' || echo "0")
HIGH_SIG_COUNT=$(echo "$SESSION_TOOLS" | awk -F: '$2 >= 60' | wc -l | tr -d ' ')

# Generate compact episode summary
EPISODE_CONTENT=$(cat <<EOF
[Episode] $PROJECT_NAME
Time: $TIMESTAMP
Tools: $TOTAL_TOOLS total, $HIGH_SIG_COUNT significant

Activity Summary:
$CONTENT

Top Tools:
$(echo "$SIGNIFICANT_TOOLS" | head -5 | awk '{print "- " $2 " (" $1 "x)"}')

---
Auto-generated after $NEW_COUNT interactions.
EOF
)

log "Generating episode summary"

# Get previous episode ID for causal chaining
PREV_EPISODE_ID=""
if [[ -f "$LAST_EPISODE_FILE" ]]; then
  PREV_EPISODE_ID=$(cat "$LAST_EPISODE_FILE" 2>/dev/null)
fi

# Build the payload
PAYLOAD=$(jq -n \
  --arg content "$EPISODE_CONTENT" \
  --arg session "$SESSION_ID" \
  --arg project "$PROJECT_NAME" \
  --arg timestamp "$TIMESTAMP" \
  --argjson toolCount "$NEW_COUNT" \
  --argjson sigCount "$HIGH_SIG_COUNT" \
  --arg prevEpisodeId "$PREV_EPISODE_ID" \
  --argjson extractEntities "$EXTRACT_ENTITIES" \
  --argjson createRelations "$CREATE_RELATIONS" \
  '{
    content: $content,
    tags: ["claude-code", "type:episode", "project:\($project)", "session:\($session)"],
    metadata: {
      sessionId: $session,
      eventType: "episode",
      projectName: $project,
      timestamp: $timestamp,
      toolCount: $toolCount,
      significantToolCount: $sigCount,
      isEpisodeSummary: true
    },
    extract_entities: $extractEntities,
    entity_types: ["code_pattern", "decision", "learning", "error", "fix"],
    domain: "code",
    create_relationships: $createRelations,
    episodic: {
      type: "episode",
      interaction_id: $session,
      causal_context: (if $prevEpisodeId != "" then $prevEpisodeId else null end),
      trigger: "tool_threshold",
      tool_count: $toolCount
    }
  }')

log "Storing episode to $NEXUS_API_URL/api/memory/store"

# Store episode
if [[ "$VERBOSE" == "1" ]]; then
  RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$NEXUS_API_URL/api/memory/store" \
    -H "Content-Type: application/json" \
    -H "Authorization: Bearer $NEXUS_API_KEY" \
    -H "X-Company-ID: $COMPANY_ID" \
    -H "X-App-ID: $APP_ID" \
    -H "X-User-ID: ${USER:-unknown}" \
    -H "Connection: keep-alive" \
    -d "$PAYLOAD" \
    --connect-timeout 2 \
    --max-time 8 2>&1)

  HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  BODY=$(echo "$RESPONSE" | sed '$d')

  log "Response code: $HTTP_CODE"

  if [[ "$HTTP_CODE" == "200" ]] || [[ "$HTTP_CODE" == "201" ]]; then
    EPISODE_ID=$(echo "$BODY" | jq -r '.data.memoryId // .memoryId // empty' 2>/dev/null)
    if [[ -n "$EPISODE_ID" ]]; then
      echo "$EPISODE_ID" > "$LAST_EPISODE_FILE"
      log "Saved episode ID: $EPISODE_ID"
    fi
    log "Episode stored successfully"
  else
    log_error "Failed to store episode (HTTP $HTTP_CODE)"
  fi
else
  # Async mode
  (
    RESPONSE=$(curl -s -X POST "$NEXUS_API_URL/api/memory/store" \
      -H "Content-Type: application/json" \
      -H "Authorization: Bearer $NEXUS_API_KEY" \
      -H "X-Company-ID: $COMPANY_ID" \
      -H "X-App-ID: $APP_ID" \
      -H "X-User-ID: ${USER:-unknown}" \
      -H "Connection: keep-alive" \
      -d "$PAYLOAD" \
      --connect-timeout 2 \
      --max-time 5 2>/dev/null)

    EPISODE_ID=$(echo "$RESPONSE" | jq -r '.data.memoryId // .memoryId // empty' 2>/dev/null)
    if [[ -n "$EPISODE_ID" ]]; then
      echo "$EPISODE_ID" > "$LAST_EPISODE_FILE"
    fi
  ) &
fi

# Reset counter after successful summary
echo "0" > "$COUNTER_FILE"

log "Episode summary complete"
exit 0
