/** * ┌─────────────────────────────────────────────────────────────────────────┐ * │ TEXT POSITION UTILITIES - Advanced PDF Text Extraction & Positioning │ * ├─────────────────────────────────────────────────────────────────────────┤ * │ Filename: text-position-utils.ts │ * │ Language: TypeScript │ * │ MCP Server: PDF Operations Server │ * │ │ * │ Purpose: │ * │ Provides advanced text extraction with precise positioning using │ * │ PDF.js. Extracts text items with bounding boxes, font information, │ * │ and transformation matrices for accurate watermark placement. │ * │ │ * │ Why this file exists: │ * │ - Enables precise text position detection in PDFs │ * │ - Provides foundation for intelligent watermark placement │ * │ - Extracts font and styling information for each text element │ * │ - Uses PDF.js for highly accurate text coordinate extraction │ * │ │ * │ Key Functions: │ * │ - extractTextPositions: Get all text items with positions │ * │ - calculateTextBounds: Compute precise bounding boxes │ * │ - findTextByContent: Search for specific text and get position │ * │ - analyzeTextLayout: Get text distribution and density │ * │ │ * │ Dependencies: │ * │ - pdfjs-dist: Mozilla PDF.js for text extraction │ * │ - canvas: Required for PDF.js in Node.js environment │ * │ - ../types: Type definitions for text position results │ * │ │ * │ Algorithm Details: │ * │ Uses PDF.js getTextContent() API which provides: │ * │ - Exact text positioning via transformation matrices │ * │ - Font information (name, size, styling) │ * │ - Character-level precision for bounding box calculation │ * │ - Handles rotated, scaled, and transformed text correctly │ * │ │ * │ Security Considerations: │ * │ - Validates page numbers before extraction │ * │ - Handles malformed text content gracefully │ * │ - Limits text extraction to prevent memory exhaustion │ * │ - Sanitizes search queries to prevent injection │ * │ │ * │ Author: PDF MCP Team │ * │ Created: 2025-10-30 │ * │ Version: 1.0.0 │ * └─────────────────────────────────────────────────────────────────────────┘ */ import type { TextItem, TextPositionResult, PageDimensions, BoundingBox } from '../types.js'; /** * Extracts all text items with their positions from a PDF page * * @param pdfPath - Path to the PDF file * @param pageNumber - Page number to analyze (1-indexed) * @param searchQuery - Optional search query to filter results * @returns Complete text position analysis result */ export declare function extractTextPositions(pdfPath: string, pageNumber: number, searchQuery?: string): Promise>; /** * Finds text items matching a specific query and returns their positions * * @param pdfPath - Path to the PDF file * @param pageNumber - Page number to search (1-indexed) * @param query - Text to search for * @param caseSensitive - Whether search should be case-sensitive * @returns Array of matching text items with positions */ export declare function findTextByContent(pdfPath: string, pageNumber: number, query: string, caseSensitive?: boolean): Promise; /** * Analyzes text distribution across a page * * @param textItems - Array of text items with positions * @param pageDimensions - Page dimensions for context * @returns Text layout analysis */ export declare function analyzeTextLayout(textItems: TextItem[], pageDimensions: PageDimensions): { topMargin: number; bottomMargin: number; leftMargin: number; rightMargin: number; textDensity: number; averageFontSize: number; contentBounds: BoundingBox; }; /** * Finds optimal watermark positions that avoid overlapping with text * * @param textItems - Array of text items with positions * @param pageDimensions - Page dimensions * @param watermarkSize - Approximate size of watermark {width, height} * @returns Array of suggested positions sorted by suitability */ export declare function findOptimalWatermarkPositions(textItems: TextItem[], pageDimensions: PageDimensions, watermarkSize: { width: number; height: number; }): Array<{ x: number; y: number; score: number; reason: string; }>; //# sourceMappingURL=text-position-utils.d.ts.map