/**
 * File Reference Registry
 *
 * Central registry for managing file references in on-demand processing mode.
 * Files are registered with lightweight metadata and previews. Full content
 * is processed on-demand when the LLM requests it via tools.
 *
 * This module is the core of the file reference architecture, replacing
 * the previous "load everything upfront" pattern for files that exceed
 * the tiny/small size tiers.
 *
 * @module files/fileReferenceRegistry
 */
import type { FileSource, FileExtractionParams, FileExtractionResult, FileReadResult, FileReference, FileRegistrationOptions, FileRegistryOptions, FileSearchResult, SizeTier } from "../types/index.js";
/**
 * Registry for managing file references with on-demand processing.
 *
 * Design decisions:
 * - One instance per NeuroLink SDK instance (not global singleton)
 * - File buffers persisted to temp dir for later streaming access
 * - LRU eviction when maxFiles exceeded
 * - Thread-safe via sequential async operations (Node.js single-threaded)
 *
 * @example
 * ```typescript
 * const registry = new FileReferenceRegistry();
 * const ref = await registry.register(buffer, {
 *   filename: 'report.xlsx',
 * });
 * console.log(ref.sizeTier);      // 'medium'
 * console.log(ref.preview);       // First 2000 chars of processed content
 * console.log(ref.estimatedTokens); // Type-aware estimate
 *
 * // Later, LLM requests specific section
 * const section = await registry.readSection(ref.id, 1, 50, 5000);
 * ```
 */
export declare class FileReferenceRegistry {
    private files;
    private tempDir;
    private maxFiles;
    private maxTempBytes;
    private defaultPreviewChars;
    private currentTempBytes;
    private tempDirCreated;
    constructor(options?: FileRegistryOptions);
    /**
     * Register a file from a Buffer.
     *
     * This is the primary registration method. It performs lightweight analysis:
     * 1. Detect file type from magic bytes (first 1KB)
     * 2. Determine size tier
     * 3. Extract preview (first N chars of text, or metadata for binary)
     * 4. Persist buffer to temp directory for later streaming access
     *
     * Total time: ~1-5ms for most files (no full processing).
     *
     * @param buffer - File content as Buffer
     * @param source - How the file was provided ('buffer', 'url', 'path', 'datauri')
     * @param options - Registration options
     * @returns FileReference with metadata and preview
     */
    register(buffer: Buffer, source?: FileSource, options?: FileRegistrationOptions): Promise<FileReference>;
    /**
     * Register a file from a file path on disk.
     *
     * Does NOT read the entire file — only reads the first 1KB for type detection
     * and preview. The file path is stored for later streaming access.
     *
     * @param filePath - Absolute path to the file
     * @param options - Registration options
     * @returns FileReference with metadata and preview
     */
    registerFromPath(filePath: string, options?: FileRegistrationOptions): Promise<FileReference>;
    /**
     * Get a file reference by ID.
     * Updates lastAccessedAt for LRU tracking.
     */
    get(id: string): FileReference | undefined;
    /**
     * Get a file reference by ID or filename.
     * Tries ID lookup first, then falls back to filename match.
     * This handles the common case where an LLM uses the filename
     * instead of the UUID when calling file tools.
     *
     * @param idOrName - UUID or filename to search for
     * @returns File reference if found, undefined otherwise
     */
    getByIdOrFilename(idOrName: string): FileReference | undefined;
    /**
     * Ensure a file has been processed (binary content extracted to text).
     *
     * For text files this is a no-op. For binary files (PDF, XLSX, video, etc.)
     * this triggers on-demand processing if it hasn't happened yet. After this
     * call, ref.processedContent and ref.preview contain extracted text.
     *
     * Used by file tools (get_file_preview) to ensure the preview contains
     * real content instead of placeholder metadata strings.
     */
    ensureProcessed(fileId: string): Promise<void>;
    /**
     * Extract targeted content from a registered file.
     *
     * This is the core dispatch method for the `extract_file_content` tool.
     * Routes extraction to the appropriate processor based on file type and
     * the parameters provided.
     *
     * @param params - Extraction parameters (file_id + type-specific options)
     * @returns Extraction result with text and/or images
     */
    extractContent(params: FileExtractionParams): Promise<FileExtractionResult>;
    private extractVideoTargeted;
    private extractPdfTargeted;
    private extractExcelTargeted;
    private extractPptxTargeted;
    private extractArchiveTargeted;
    private extractAudioTargeted;
    private extractTextTargeted;
    /**
     * List all registered files.
     * Returns a lightweight summary suitable for the LLM.
     */
    list(): FileReference[];
    /**
     * Generate a formatted table of all registered files for the LLM.
     */
    listFormatted(): string;
    /**
     * Read a section of a registered file.
     *
     * Uses StreamingReader for memory-efficient access.
     *
     * @param fileId - File reference ID
     * @param startLine - Starting line (1-indexed)
     * @param endLine - Ending line (1-indexed)
     * @param tokenBudget - Maximum tokens to return
     * @param provider - Provider name for token estimation
     * @returns FileReadResult
     */
    readSection(fileId: string, startLine?: number, endLine?: number, tokenBudget?: number, provider?: string): Promise<FileReadResult>;
    /**
     * Search within a registered file.
     *
     * @param fileId - File reference ID
     * @param pattern - Search pattern (string or regex)
     * @param maxMatches - Maximum matches to return
     * @returns FileSearchResult
     */
    search(fileId: string, pattern: string, maxMatches?: number): Promise<FileSearchResult>;
    /**
     * Search within in-memory content (for tiny files without temp paths).
     */
    private static searchInMemory;
    /**
     * Store a summary for a file reference.
     */
    setSummary(fileId: string, summary: string): void;
    /**
     * Remove a file reference and clean up its temp file.
     */
    remove(fileId: string): Promise<boolean>;
    /**
     * Clear all file references and clean up temp directory.
     */
    clear(): Promise<void>;
    /**
     * Get the number of registered files.
     */
    get size(): number;
    /**
     * Generate the preview text for the initial prompt.
     *
     * Returns a compact summary of all registered files that uses ~50-100 tokens
     * per file instead of full content. The LLM can use file tools to access
     * more content as needed.
     *
     * @returns Formatted string for prompt injection
     */
    generatePromptPreview(): Promise<string>;
    /**
     * Get type-specific extraction hints for the LLM prompt.
     * Tells the LLM what parameters it can use with extract_file_content.
     */
    static getExtractionHint(type: string, sizeStr: string): string | null;
    /**
     * Classify a file into a size tier based on byte size.
     */
    static classifySizeTier(sizeBytes: number): SizeTier;
    /**
     * Process a binary file on-demand, extracting text content via the
     * appropriate processor. This bridges the gap between the lazy registration
     * path (which stores raw binary) and the LLM read tools (which need text).
     *
     * Called lazily on first readSection() or search() for non-text files.
     * Results are cached in ref.processedContent for subsequent reads.
     */
    private processFileOnDemand;
    /**
     * Extract text from a PDF buffer using pdf-parse v2 (pdfjs-dist under the hood).
     *
     * Handles compressed streams (FlateDecode), CMap-encoded text, modern PDFs,
     * and most text-based PDF formats. For scanned/image-only PDFs where no text
     * can be extracted, falls back to a descriptive message.
     */
    private extractPdfText;
    /**
     * Extract text content from an Excel file using ExcelProcessor.
     */
    private extractExcelText;
    /**
     * Extract text content from a Word document using WordProcessor.
     */
    private extractWordText;
    /**
     * Extract text from a PowerPoint file using PptxProcessor.
     */
    private extractPptxText;
    /**
     * Extract metadata and content from a video file using VideoProcessor.
     */
    private extractVideoContent;
    /**
     * Extract metadata and content from an audio file using AudioProcessor.
     */
    private extractAudioContent;
    /**
     * Extract file listing from an archive using ArchiveProcessor.
     */
    private extractArchiveContent;
    /**
     * Extract a preview from a buffer.
     * For text: first N characters.
     * For binary: type-specific metadata.
     */
    private extractPreview;
    /**
     * Detect file type from buffer magic bytes and extension.
     */
    private detectType;
    /**
     * Detect file type from extension alone.
     */
    private detectTypeFromExtension;
    /**
     * Whether a file type contains readable text content.
     * For "unknown" types, optionally checks the buffer for valid UTF-8 text.
     */
    private isTextType;
    /**
     * Heuristic check: does a buffer look like valid text content?
     * Checks the first 512 bytes for mostly printable ASCII/UTF-8 characters.
     * Returns true if >90% of bytes are printable (ASCII 0x20-0x7E, tab, newline, CR).
     */
    private static looksLikeText;
    /**
     * Guess MIME type from file type and extension.
     */
    private guessMimeType;
    /**
     * Guess file extension from magic bytes.
     */
    private guessExtension;
    /**
     * Persist a buffer to the temp directory.
     */
    private persistToTemp;
    /**
     * Evict the least recently used file reference.
     */
    private evictLRU;
    /**
     * Format byte size as human-readable string.
     */
    private formatSize;
}