#!/bin/bash
#
# Nexus Memory - Upload Document Hook (v2.2.1)
# Uploads documents to FileProcessAgent for intelligent processing with
# FULL KNOWLEDGE EXTRACTION enabled by default.
#
# Auto-Discovery Features (enabled automatically):
# - Smart file type detection via magic bytes
# - Intelligent routing: MageAgent (docs), VideoAgent (video), CyberAgent (binaries)
# - 3-tier OCR cascade: Tesseract → GPT-4o Vision → Claude Opus (auto-escalates)
# - Layout analysis: 99.2% accuracy (Dockling-level)
# - Table extraction: 97.9% accuracy
# - Document DNA: Triple-layer storage (semantic + structural + original)
# - Entity extraction → Neo4j knowledge graph
# - Vector embeddings → Qdrant for semantic search
# - Content findable via recall-memory.sh
#
# Supports ALL file types including:
# - Documents: PDF, DOCX, TXT, MD, HTML, etc.
# - Images: JPEG, PNG, GIF, TIFF, WebP
# - Videos: MP4, MOV, AVI, MKV, WebM
# - Archives: ZIP, RAR, 7z, TAR, GZIP
# - Geospatial: GeoJSON, Shapefile, GeoTIFF, KML (via intelligent routing)
# - Point Cloud: LAS, LAZ, PLY, PCD, E57 (via intelligent routing)
# - Code repositories: Automatically detected and processed
# - Any other binary format (routed to appropriate processor)
#
# Usage:
#   upload-document.sh <file_path> [options]
#   upload-document.sh <file1> <file2> ... --batch [options]
#
# Arguments:
#   file_path         Path to the file(s) to upload (required)
#   --wait            Wait for processing to complete and return results
#   --poll-interval=N Poll interval in seconds (default: 5)
#   --batch           Process multiple files (list files before this flag)
#   --tags=a,b,c      Add custom tags for recall (comma-separated)
#   --no-entities     Skip entity extraction to knowledge graph
#   --prefer-speed    Use faster OCR (may reduce accuracy for scanned docs)
#   --background      Fire-and-forget mode for auto-ingestion (non-blocking)
#
# Environment Variables:
#   NEXUS_API_KEY     - API key for authentication (REQUIRED)
#   NEXUS_API_URL     - API endpoint (default: https://api.adverant.ai)
#   NEXUS_COMPANY_ID  - Company identifier (default: adverant)
#   NEXUS_APP_ID      - Application identifier (default: claude-code)
#   NEXUS_VERBOSE     - Set to 1 for debug output
#
# Examples:
#   upload-document.sh ./document.pdf
#   upload-document.sh ./book.pdf --wait
#   upload-document.sh ./data.csv --wait --tags=dataset,sales
#   upload-document.sh book1.pdf book2.pdf book3.pdf --batch --wait
#   upload-document.sh ./video.mp4 --wait --poll-interval=10
#

set -o pipefail

# Source the API key helper for interactive prompting
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/api-key-helper.sh" 2>/dev/null || {
  # Fallback if helper not found
  if [[ -z "$NEXUS_API_KEY" ]]; then
    echo "[upload-document] ERROR: NEXUS_API_KEY is required but not set." >&2
    echo "  Get your API key from: https://dashboard.adverant.ai/dashboard/api-keys" >&2
    exit 1
  fi
}

# Configuration with environment variable overrides
NEXUS_API_KEY="${NEXUS_API_KEY:-}"
NEXUS_API_URL="${NEXUS_API_URL:-https://api.adverant.ai}"
COMPANY_ID="${NEXUS_COMPANY_ID:-adverant}"
APP_ID="${NEXUS_APP_ID:-claude-code}"
VERBOSE="${NEXUS_VERBOSE:-0}"

# FileProcessAgent endpoint (via Istio VirtualService)
FILEPROCESS_URL="${NEXUS_API_URL}/fileprocess/api/process"
JOBS_URL="${NEXUS_API_URL}/fileprocess/api/jobs"

# Logging functions
log() {
  if [[ "$VERBOSE" == "1" ]]; then
    echo "[upload-document] $1" >&2
  fi
}

log_error() {
  echo "[upload-document] ERROR: $1" >&2
}

log_info() {
  echo "[upload-document] $1" >&2
}

print_usage() {
  echo "Usage: upload-document.sh <file_path> [options]"
  echo "       upload-document.sh <file1> <file2> ... --batch [options]"
  echo ""
  echo "Arguments:"
  echo "  file_path         Path to the file(s) to upload (required)"
  echo "  --wait            Wait for processing to complete"
  echo "  --poll-interval=N Poll interval in seconds (default: 5)"
  echo "  --batch           Process multiple files (list files before this flag)"
  echo "  --tags=a,b,c      Add custom tags for recall (comma-separated)"
  echo "  --no-entities     Skip entity extraction to knowledge graph"
  echo "  --prefer-speed    Use faster OCR (may reduce accuracy)"
  echo ""
  echo "Auto-Discovery Features (enabled by default):"
  echo "  • Smart file type detection via magic bytes"
  echo "  • Intelligent routing: MageAgent, VideoAgent, CyberAgent"
  echo "  • 3-tier OCR cascade (auto-escalates for quality)"
  echo "  • Layout analysis (99.2% accuracy)"
  echo "  • Table extraction (97.9% accuracy)"
  echo "  • Entity extraction → Knowledge graph"
  echo "  • Vector embeddings → Semantic search"
  echo "  • Content findable via recall-memory.sh"
  echo ""
  echo "Supported file types:"
  echo "  Documents:  PDF, DOCX, DOC, TXT, MD, HTML, CSV, XML, JSON"
  echo "  Images:     JPEG, PNG, GIF, TIFF, WebP"
  echo "  Videos:     MP4, MOV, AVI, MKV, WebM, FLV"
  echo "  Archives:   ZIP, RAR, 7z, TAR, TAR.GZ, TAR.BZ2"
  echo "  Geospatial: GeoJSON, Shapefile, GeoTIFF, KML"
  echo "  Point Cloud: LAS, LAZ, PLY, PCD, E57"
  echo "  Code:       Any programming language"
  echo "  Any other binary format"
  echo ""
  echo "Maximum file size: 5GB"
  echo ""
  echo "Examples:"
  echo "  upload-document.sh ./document.pdf"
  echo "  upload-document.sh ./book.pdf --wait"
  echo "  upload-document.sh ./data.csv --wait --tags=dataset,sales"
  echo "  upload-document.sh book1.pdf book2.pdf book3.pdf --batch --wait"
  echo "  upload-document.sh ./video.mp4 --wait --poll-interval=10"
}

# Check for API key (REQUIRED) - Interactive prompt if not set
# This is a user-invoked command, so we can prompt interactively
require_api_key --interactive || exit 1

# Check dependencies
if ! command -v curl &> /dev/null; then
  log_error "curl is required but not installed."
  exit 1
fi

if ! command -v jq &> /dev/null; then
  log_error "jq is required but not installed. Install with: brew install jq"
  exit 1
fi

# Parse arguments
FILES=()
WAIT_FOR_COMPLETION=0
POLL_INTERVAL=5
BATCH_MODE=0
CUSTOM_TAGS=""
EXTRACT_ENTITIES=1
PREFER_SPEED=0
BACKGROUND_MODE=0

while [[ $# -gt 0 ]]; do
  case $1 in
    --wait)
      WAIT_FOR_COMPLETION=1
      shift
      ;;
    --poll-interval=*)
      POLL_INTERVAL="${1#*=}"
      shift
      ;;
    --batch)
      BATCH_MODE=1
      shift
      ;;
    --tags=*)
      CUSTOM_TAGS="${1#*=}"
      shift
      ;;
    --no-entities)
      EXTRACT_ENTITIES=0
      shift
      ;;
    --prefer-speed)
      PREFER_SPEED=1
      shift
      ;;
    --background)
      BACKGROUND_MODE=1
      shift
      ;;
    --help|-h)
      print_usage
      exit 0
      ;;
    -*)
      log_error "Unknown option: $1"
      print_usage
      exit 1
      ;;
    *)
      # Collect file paths
      FILES+=("$1")
      shift
      ;;
  esac
done

# Validate files
if [[ ${#FILES[@]} -eq 0 ]]; then
  log_error "At least one file path is required"
  print_usage
  exit 1
fi

# If not batch mode but multiple files provided, error
if [[ "$BATCH_MODE" == "0" ]] && [[ ${#FILES[@]} -gt 1 ]]; then
  log_error "Multiple files require --batch flag"
  log_error "Usage: upload-document.sh file1.pdf file2.pdf --batch"
  exit 1
fi

# Validate all files exist
for file in "${FILES[@]}"; do
  if [[ ! -f "$file" ]]; then
    log_error "File not found: $file"
    exit 1
  fi
done

# Build processing metadata JSON with hints for aggressive extraction
build_metadata() {
  local file_name="$1"
  local tags_json="[]"

  # Convert comma-separated tags to JSON array
  if [[ -n "$CUSTOM_TAGS" ]]; then
    tags_json=$(echo "$CUSTOM_TAGS" | tr ',' '\n' | jq -R . | jq -s .)
  fi

  # Determine OCR preference
  local prefer_accuracy="true"
  if [[ "$PREFER_SPEED" == "1" ]]; then
    prefer_accuracy="false"
  fi

  # Determine entity extraction
  local extract_entities="true"
  if [[ "$EXTRACT_ENTITIES" == "0" ]]; then
    extract_entities="false"
  fi

  cat <<EOF
{
  "source": "nexus-memory-skill",
  "version": "2.2.1",
  "preferAccuracy": ${prefer_accuracy},
  "forceEntityExtraction": ${extract_entities},
  "storeInKnowledgeGraph": ${extract_entities},
  "enableDocumentDNA": true,
  "tags": ${tags_json},
  "uploadedBy": "${USER:-unknown}",
  "uploadedAt": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
}
EOF
}

# Function to upload a single file
upload_file() {
  local FILE_PATH="$1"
  local FILE_NAME=$(basename "$FILE_PATH")
  local FILE_SIZE=$(wc -c < "$FILE_PATH" | tr -d ' ')
  local FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024))

  # Check file size (max 5GB)
  local MAX_SIZE=$((5 * 1024 * 1024 * 1024))
  if [[ "$FILE_SIZE" -gt "$MAX_SIZE" ]]; then
    log_error "File too large: $FILE_NAME (${FILE_SIZE_MB}MB). Maximum size is 5GB."
    return 1
  fi

  log "File: $FILE_PATH"
  log "Size: $FILE_SIZE bytes (${FILE_SIZE_MB}MB)"

  # Display upload info
  if [[ "$FILE_SIZE_MB" -gt 100 ]]; then
    log_info "Uploading large file: $FILE_NAME (${FILE_SIZE_MB}MB) - this may take a while..."
  else
    log_info "Uploading: $FILE_NAME (${FILE_SIZE_MB}MB)"
  fi

  # Build metadata with processing hints
  local METADATA=$(build_metadata "$FILE_NAME")
  log "Metadata: $METADATA"

  # Upload file via multipart form with metadata hints
  log "Uploading to $FILEPROCESS_URL"

  local RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$FILEPROCESS_URL" \
    -H "Authorization: Bearer $NEXUS_API_KEY" \
    -H "X-Company-ID: $COMPANY_ID" \
    -H "X-App-ID: $APP_ID" \
    -H "X-User-ID: ${USER:-unknown}" \
    -F "file=@${FILE_PATH}" \
    -F "userId=${USER:-unknown}" \
    -F "metadata=${METADATA}" \
    --max-time 600 2>&1)

  # Parse response
  local HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
  local BODY=$(echo "$RESPONSE" | sed '$d')

  log "Response code: $HTTP_CODE"

  # Check for upload errors (200, 201, 202 are all success codes)
  if [[ "$HTTP_CODE" != "200" ]] && [[ "$HTTP_CODE" != "201" ]] && [[ "$HTTP_CODE" != "202" ]]; then
    log_error "Failed to upload document (HTTP $HTTP_CODE)"
    if [[ -n "$BODY" ]]; then
      echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
    fi
    return 1
  fi

  # Parse job ID from response
  local JOB_ID=$(echo "$BODY" | jq -r '.jobId // empty')

  if [[ -z "$JOB_ID" ]]; then
    log_error "No job ID returned from upload"
    echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
    return 1
  fi

  log_info "Document queued for processing"
  echo "Job ID: $JOB_ID"

  # Return job ID for tracking
  echo "$JOB_ID"
}

# Function to display detailed results
display_results() {
  local STATUS_RESPONSE="$1"
  local FILE_NAME="$2"

  echo ""
  echo "╔══════════════════════════════════════════════════════════════╗"
  echo "║              PROCESSING COMPLETE: $FILE_NAME"
  echo "╚══════════════════════════════════════════════════════════════╝"
  echo ""

  # Extract key metrics from response
  # API response structure: { success: true, job: {...}, documentDna?: {...} }
  local JOB=$(echo "$STATUS_RESPONSE" | jq '.job // .')
  local DOC_DNA=$(echo "$STATUS_RESPONSE" | jq '.documentDna // .job.documentDna // {}')

  # Get job-level fields
  local OCR_TIER=$(echo "$JOB" | jq -r '.ocrTierUsed // .ocrTier // "auto"' 2>/dev/null)
  local CONFIDENCE=$(echo "$JOB" | jq -r '.confidence // "N/A"' 2>/dev/null)
  local PROCESSING_TIME=$(echo "$JOB" | jq -r '.processingTimeMs // "N/A"' 2>/dev/null)
  local MIME_TYPE=$(echo "$JOB" | jq -r '.mimeType // "unknown"' 2>/dev/null)

  # Get metadata fields (where entities, tables, etc. are stored)
  local METADATA=$(echo "$JOB" | jq '.metadata // {}')
  local ENTITY_COUNT=$(echo "$METADATA" | jq -r '.entities // [] | length' 2>/dev/null)
  local TABLE_COUNT=$(echo "$METADATA" | jq -r '.tables // [] | length' 2>/dev/null)
  local PAGE_COUNT=$(echo "$METADATA" | jq -r '.pageCount // "N/A"' 2>/dev/null)
  local WORD_COUNT=$(echo "$METADATA" | jq -r '.wordCount // "N/A"' 2>/dev/null)
  local DOC_TYPE=$(echo "$METADATA" | jq -r '.documentType // .type // "unknown"' 2>/dev/null)

  # Check if stored in GraphRAG (Document DNA exists)
  local DOC_DNA_ID=$(echo "$JOB" | jq -r '.documentDnaId // "null"' 2>/dev/null)
  local GRAPHRAG_STORED="false"
  if [[ "$DOC_DNA_ID" != "null" ]] && [[ -n "$DOC_DNA_ID" ]]; then
    GRAPHRAG_STORED="true"
  fi

  echo "📄 Document Type:     $DOC_TYPE ($MIME_TYPE)"
  echo "📑 Pages:             $PAGE_COUNT"
  echo "📝 Words:             $WORD_COUNT"
  echo "⏱️  Processing Time:   ${PROCESSING_TIME}ms"
  echo "🎯 Confidence:        $CONFIDENCE"
  echo ""
  echo "🔍 Auto-Discovery Results:"
  echo "   • OCR Tier Used:   $OCR_TIER"
  echo "   • Tables Found:    $TABLE_COUNT"
  echo "   • Entities:        $ENTITY_COUNT"
  echo "   • GraphRAG:        $GRAPHRAG_STORED"
  if [[ "$DOC_DNA_ID" != "null" ]] && [[ -n "$DOC_DNA_ID" ]]; then
    echo "   • Document DNA:    $DOC_DNA_ID"
  fi
  echo ""

  # Show extracted entities if any
  if [[ "$ENTITY_COUNT" != "0" ]] && [[ "$ENTITY_COUNT" != "null" ]] && [[ -n "$ENTITY_COUNT" ]]; then
    echo "🏷️  Extracted Entities:"
    echo "$METADATA" | jq -r '.entities // [] | .[:10][] | "   • \(.name // .text // .value) (\(.type // "entity"))"' 2>/dev/null
    if [[ "$ENTITY_COUNT" -gt 10 ]]; then
      echo "   ... and $((ENTITY_COUNT - 10)) more"
    fi
    echo ""
  fi

  # Show recall command
  echo "💡 To recall this content:"
  echo "   echo '{\"query\": \"<your search>\"}' | recall-memory.sh"
  echo ""

  # Show full JSON if verbose
  if [[ "$VERBOSE" == "1" ]]; then
    echo "=== FULL RESPONSE ==="
    echo "$STATUS_RESPONSE" | jq .
  fi
}

# Function to wait for job completion
wait_for_job() {
  local JOB_ID="$1"
  local FILE_NAME="$2"

  log_info "Waiting for processing to complete (polling every ${POLL_INTERVAL}s)..."

  local MAX_WAIT=3600  # 1 hour max wait
  local WAITED=0

  while [[ "$WAITED" -lt "$MAX_WAIT" ]]; do
    sleep "$POLL_INTERVAL"
    WAITED=$((WAITED + POLL_INTERVAL))

    # Check job status
    local STATUS_RESPONSE=$(curl -s "$JOBS_URL/$JOB_ID" \
      -H "Authorization: Bearer $NEXUS_API_KEY" \
      -H "X-Company-ID: $COMPANY_ID" \
      -H "X-App-ID: $APP_ID" \
      -H "X-User-ID: ${USER:-unknown}" \
      --max-time 30 2>/dev/null)

    if [[ -z "$STATUS_RESPONSE" ]]; then
      log "Waiting... (${WAITED}s elapsed)"
      continue
    fi

    # API response structure: { success: true, job: { status: "completed", ... } }
    # Status is nested under .job.status, NOT at root level
    local JOB_STATE=$(echo "$STATUS_RESPONSE" | jq -r '.job.status // .status // .state // empty')

    case "$JOB_STATE" in
      "completed"|"finished"|"success")
        display_results "$STATUS_RESPONSE" "$FILE_NAME"
        return 0
        ;;
      "failed"|"error"|"cancelled")
        log_error "Processing failed for: $FILE_NAME"
        echo ""
        echo "=== ERROR DETAILS ==="
        local ERROR_MSG=$(echo "$STATUS_RESPONSE" | jq -r '.job.errorMessage // .errorMessage // "Unknown error"')
        local ERROR_CODE=$(echo "$STATUS_RESPONSE" | jq -r '.job.errorCode // .errorCode // "UNKNOWN"')
        echo "Error Code: $ERROR_CODE"
        echo "Error Message: $ERROR_MSG"
        echo ""
        if [[ "$VERBOSE" == "1" ]]; then
          echo "$STATUS_RESPONSE" | jq .
        fi
        return 1
        ;;
      "queued"|"waiting"|"pending")
        log "[$FILE_NAME] Queued... (${WAITED}s elapsed)"
        ;;
      "processing"|"active")
        # Try to get progress from job metadata
        local PROGRESS=$(echo "$STATUS_RESPONSE" | jq -r '.job.metadata.progress // .progress // empty')
        local STAGE=$(echo "$STATUS_RESPONSE" | jq -r '.job.metadata.stage // .stage // empty')
        if [[ -n "$PROGRESS" ]] && [[ "$PROGRESS" != "null" ]] && [[ -n "$STAGE" ]] && [[ "$STAGE" != "null" ]]; then
          log "[$FILE_NAME] ${STAGE}: ${PROGRESS}% (${WAITED}s elapsed)"
        elif [[ -n "$PROGRESS" ]] && [[ "$PROGRESS" != "null" ]]; then
          log "[$FILE_NAME] Processing... ${PROGRESS}% (${WAITED}s elapsed)"
        else
          log "[$FILE_NAME] Processing... (${WAITED}s elapsed)"
        fi
        ;;
      ""|"null")
        # Empty status might mean job not found or API issue
        log "[$FILE_NAME] Waiting for status... (${WAITED}s elapsed)"
        ;;
      *)
        log "[$FILE_NAME] Status: $JOB_STATE (${WAITED}s elapsed)"
        ;;
    esac
  done

  log_error "Timeout waiting for processing (${MAX_WAIT}s)"
  echo "Job may still be processing. Check status manually:"
  echo "curl -s \"$JOBS_URL/$JOB_ID\" | jq ."
  return 1
}

# ============================================================================
# MAIN EXECUTION
# ============================================================================

# Background mode: Fire-and-forget for auto-ingestion
if [[ "$BACKGROUND_MODE" == "1" ]]; then
  # Only process first file in background mode
  FILE_PATH="${FILES[0]}"
  FILE_NAME=$(basename "$FILE_PATH")

  log "Background mode: uploading $FILE_NAME"

  # Run upload in background subshell
  (
    # Suppress output in background mode
    UPLOAD_OUTPUT=$(upload_file "$FILE_PATH" 2>&1)
    UPLOAD_EXIT_CODE=$?

    if [[ $UPLOAD_EXIT_CODE -eq 0 ]]; then
      # Extract job ID from output
      JOB_ID=$(echo "$UPLOAD_OUTPUT" | grep -E '^[a-f0-9-]+$' | tail -1)
      if [[ -z "$JOB_ID" ]]; then
        JOB_ID=$(echo "$UPLOAD_OUTPUT" | grep "Job ID:" | sed 's/Job ID: //' | tr -d ' ')
      fi

      if [[ -n "$JOB_ID" ]]; then
        # Update state files for auto-ingest tracking
        STATE_DIR="${HOME}/.claude/session-env/auto-ingest"
        PENDING_FILE="${STATE_DIR}/pending_jobs.json"
        INGESTED_FILE="${STATE_DIR}/ingested_files.json"

        mkdir -p "$STATE_DIR"
        [[ -f "$PENDING_FILE" ]] || echo "[]" > "$PENDING_FILE"
        [[ -f "$INGESTED_FILE" ]] || echo "{}" > "$INGESTED_FILE"

        # Add to pending jobs
        NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
        jq --arg j "$JOB_ID" --arg p "$FILE_PATH" --arg t "$NOW" \
          '. + [{jobId: $j, path: $p, startTime: $t}]' "$PENDING_FILE" > "${PENDING_FILE}.tmp" 2>/dev/null \
          && mv "${PENDING_FILE}.tmp" "$PENDING_FILE"

        # Mark as processing in ingested files
        if [[ "$(uname)" == "Darwin" ]]; then
          MTIME=$(stat -f %m "$FILE_PATH" 2>/dev/null || echo "0")
        else
          MTIME=$(stat -c %Y "$FILE_PATH" 2>/dev/null || echo "0")
        fi

        jq --arg p "$FILE_PATH" --arg j "$JOB_ID" --arg m "$MTIME" --arg t "$NOW" \
          '.[$p] = {jobId: $j, status: "processing", mtime: ($m | tonumber), ingestedAt: $t}' \
          "$INGESTED_FILE" > "${INGESTED_FILE}.tmp" 2>/dev/null \
          && mv "${INGESTED_FILE}.tmp" "$INGESTED_FILE"

        # Start notifier daemon if not running
        NOTIFIER_SCRIPT="${HOME}/.claude/hooks/ingest-notify.sh"
        PID_FILE="${STATE_DIR}/notifier.pid"

        if [[ -x "$NOTIFIER_SCRIPT" ]]; then
          if [[ ! -f "$PID_FILE" ]] || ! kill -0 "$(cat "$PID_FILE" 2>/dev/null)" 2>/dev/null; then
            "$NOTIFIER_SCRIPT" &
          fi
        fi

        log "Background upload queued: $FILE_NAME (job: $JOB_ID)"
      fi
    else
      log "Background upload failed: $FILE_NAME"
    fi
  ) &
  disown 2>/dev/null || true

  # Exit immediately (non-blocking)
  exit 0
fi

# Track all job IDs for batch mode
JOB_IDS=()
FILE_NAMES=()
FAILED_UPLOADS=0

# Display batch info
if [[ "$BATCH_MODE" == "1" ]]; then
  log_info "Batch mode: uploading ${#FILES[@]} files"
  echo ""
fi

# Upload all files
for file in "${FILES[@]}"; do
  FILE_NAME=$(basename "$file")
  FILE_NAMES+=("$FILE_NAME")

  # Upload file and capture job ID (last line of output)
  UPLOAD_OUTPUT=$(upload_file "$file" 2>&1)
  UPLOAD_EXIT_CODE=$?

  if [[ $UPLOAD_EXIT_CODE -eq 0 ]]; then
    # Extract job ID from output (last non-empty line that looks like a job ID)
    JOB_ID=$(echo "$UPLOAD_OUTPUT" | grep -E '^[a-f0-9-]+$' | tail -1)
    if [[ -n "$JOB_ID" ]]; then
      JOB_IDS+=("$JOB_ID")
    else
      # Try to extract from "Job ID: xxx" format
      JOB_ID=$(echo "$UPLOAD_OUTPUT" | grep "Job ID:" | sed 's/Job ID: //' | tr -d ' ')
      if [[ -n "$JOB_ID" ]]; then
        JOB_IDS+=("$JOB_ID")
      fi
    fi
    echo "$UPLOAD_OUTPUT"
  else
    log_error "Failed to upload: $FILE_NAME"
    echo "$UPLOAD_OUTPUT"
    FAILED_UPLOADS=$((FAILED_UPLOADS + 1))
  fi

  # Add spacing between files in batch mode
  if [[ "$BATCH_MODE" == "1" ]]; then
    echo ""
  fi
done

# Summary for batch mode
if [[ "$BATCH_MODE" == "1" ]]; then
  echo "╔══════════════════════════════════════════════════════════════╗"
  echo "║                    UPLOAD SUMMARY                            ║"
  echo "╚══════════════════════════════════════════════════════════════╝"
  echo "Total files:     ${#FILES[@]}"
  echo "Uploaded:        ${#JOB_IDS[@]}"
  echo "Failed:          $FAILED_UPLOADS"
  echo ""
fi

# If not waiting, exit here
if [[ "$WAIT_FOR_COMPLETION" == "0" ]]; then
  if [[ ${#JOB_IDS[@]} -gt 0 ]]; then
    echo "To check status:"
    for i in "${!JOB_IDS[@]}"; do
      echo "  curl -s \"$JOBS_URL/${JOB_IDS[$i]}\" | jq .  # ${FILE_NAMES[$i]}"
    done
  fi
  exit $FAILED_UPLOADS
fi

# Wait for all jobs to complete
FAILED_JOBS=0
for i in "${!JOB_IDS[@]}"; do
  JOB_ID="${JOB_IDS[$i]}"
  FILE_NAME="${FILE_NAMES[$i]}"

  if ! wait_for_job "$JOB_ID" "$FILE_NAME"; then
    FAILED_JOBS=$((FAILED_JOBS + 1))
  fi
done

# Final exit code
TOTAL_FAILURES=$((FAILED_UPLOADS + FAILED_JOBS))
if [[ "$TOTAL_FAILURES" -gt 0 ]]; then
  log_error "$TOTAL_FAILURES file(s) failed to process"
  exit 1
fi

exit 0