/** * ┌─────────────────────────────────────────────────────────────────────────┐ * │ TEXT REPLACEMENT UTILITIES - Advanced PDF Text Replacement │ * ├─────────────────────────────────────────────────────────────────────────┤ * │ Filename: text-replacement-utils.ts │ * │ Language: TypeScript │ * │ MCP Server: PDF Operations Server │ * │ │ * │ Purpose: │ * │ Provides utilities for replacing text in PDF documents using pdf-lib. │ * │ Handles covering old text with rectangles and drawing new text at │ * │ precise positions with matching fonts and styles. │ * │ │ * │ Why this file exists: │ * │ - Enables precise text replacement in PDFs │ * │ - Handles coordinate system conversion between PDF.js and pdf-lib │ * │ - Manages font embedding and matching │ * │ - Provides foundation for text replacement tool │ * │ │ * │ Key Functions: │ * │ - replaceTextInPDF: Main function to replace text │ * │ - coverTextWithRectangle: Covers old text with background │ * │ - drawReplacementText: Draws new text at position │ * │ - embedFontForReplacement: Embeds and manages fonts │ * │ - convertCoordinates: Converts between coordinate systems │ * │ │ * │ Dependencies: │ * │ - pdf-lib: PDF manipulation library │ * │ - fs/promises: File system operations │ * │ - ../types: Type definitions for replacements │ * │ │ * │ Algorithm Details: │ * │ Text replacement in PDFs requires: │ * │ 1. Loading the PDF document │ * │ 2. For each replacement: │ * │ a. Draw a rectangle over old text (background color) │ * │ b. Draw new text at the same position │ * │ 3. Save the modified PDF │ * │ │ * │ Coordinate System Notes: │ * │ - PDF.js: Y-axis increases downward from top │ * │ - pdf-lib: Y-axis increases upward from bottom │ * │ - Conversion required: y_pdflib = pageHeight - y_pdfjs - textHeight │ * │ │ * │ Security Considerations: │ * │ - Validates file paths before operations │ * │ - Validates page numbers within document bounds │ * │ - Sanitizes text content to prevent injection │ * │ - Handles font embedding errors gracefully │ * │ │ * │ Author: PDF MCP Team │ * │ Created: 2025-10-31 │ * │ Version: 1.0.0 │ * └─────────────────────────────────────────────────────────────────────────┘ */ import { PDFFont } from 'pdf-lib'; import type { TextReplacementSpec, TextReplacementConfig, TextReplacementResult, TextContext, ContextAwareReplacementPreview } from '../types.js'; /** * ═══════════════════════════════════════════════════════════════════════════ * CONTEXT DETECTION FUNCTIONS - Word Boundary & Context-Aware Replacement * ═══════════════════════════════════════════════════════════════════════════ */ /** * Detects the context type of a text occurrence * * ENHANCED VERSION - Following "Precision Protocol" requirements: * * This function performs DEEP SEMANTIC ANALYSIS to identify the complete * semantic unit containing the search term. This is CRITICAL for preventing * data loss during replacements. * * SEMANTIC UNIT DEFINITION: * A semantic unit is the COMPLETE, MEANINGFUL component containing the match. * Examples: * - "Ramya Lakhani" is ONE semantic unit (a full name) * - "lakhani.ramya.u@gmail.com" is ONE semantic unit (an email address) * - "The ramya book" contains "ramya" as a separate semantic unit * * CONTEXT TYPES (Priority Order): * * 1. EMAIL: Part of an email address * Pattern: [local-part]@[domain].[extension] * Detection: @ symbol with valid email structure * Components: local part BEFORE @, domain AFTER @ (NEVER touch domain!) * Examples: * - "lakhani.ramya.u@gmail.com" * - "ramya@company.com" * Critical: All dots, underscores, and other chars in local part are preserved * * 2. NAME: Part of a full name (person, place, etc.) * Pattern: Capitalized_Word + Space + Capitalized_Word * Detection: Adjacent capitalized words * Components: Each word separated by space (PRESERVE ALL SPACES) * Examples: * - "Ramya Lakhani" → ["Ramya", " ", "Lakhani"] * - "John Smith" → ["John", " ", "Smith"] * Critical: Spaces between words are considered components * * 3. IDENTIFIER: Code identifier (variable, function name, etc.) * Pattern: camelCase, snake_case, PascalCase, etc. * Detection: Adjacent alphanumeric/underscore without spaces * Components: The full identifier as one unit * Examples: * - "userId", "user_id", "getUserName" * Critical: Underscores and mixed case preserved * * 4. PLAIN_TEXT: Standalone word isolated by word boundaries * Pattern: Whitespace or punctuation on both sides * Detection: Word boundaries before and after * Components: The word itself * Examples: * - "The ramya book" → "ramya" is isolated * Critical: MUST NOT match "ram" in "ramayan" (word boundary check) * * COMPONENT EXTRACTION RULES: * - For EMAIL: Extract local part + @ + domain as separate components * - For NAME: Extract each word + the spaces between them * - For IDENTIFIER: Extract as single component (don't split) * - For PLAIN_TEXT: Extract the word only * * @param fullText - The complete text containing the search term * @param searchText - The search term to analyze * @param startIndex - Starting index of search term in fullText * @param endIndex - Ending index of search term in fullText * @param caseSensitive - Whether matching was case-sensitive * @returns Context information with COMPLETE semantic unit and ALL components */ export declare function detectTextContext(fullText: string, searchText: string, startIndex: number, endIndex: number, caseSensitive?: boolean): TextContext; /** * Checks if a position in text is a word boundary * * Word boundaries are: * - Start or end of string * - Whitespace characters * - Punctuation marks * - Special characters * * @param text - Text to check * @param index - Position to check * @returns True if position is a word boundary */ export declare function isWordBoundary(text: string, index: number): boolean; /** * Validates a text replacement in its context * * ENHANCED VERSION - Following "Precision Protocol" requirements: * * This function determines if a replacement will preserve document structure * and data integrity. It performs DEEP VALIDATION to ensure: * 1. No data is accidentally deleted * 2. Semantic meaning is preserved * 3. Format/structure remains valid * 4. All non-target components are kept intact * * VALIDATION RULES BY CONTEXT TYPE: * * NAME CONTEXT: * - MUST preserve all other name components * - Example: "Ramya Lakhani" → "Raj Lakhani" ✓ (preserved "Lakhani") * - Example: "Ramya Lakhani" → "Raj" ✗ (lost "Lakhani" - UNSAFE!) * - Replacement must not be empty/blank * * EMAIL CONTEXT: * - MUST preserve domain (everything after @) * - MUST preserve dots, underscores in local part * - MUST maintain valid email format * - Example: "lakhani.ramya.u@gmail.com" → "lakhani.raj.u@gmail.com" ✓ * - Example: "ramya@gmail.com" → "@gmail.com" ✗ (empty local part - UNSAFE!) * * IDENTIFIER CONTEXT: * - MUST maintain valid identifier format * - Example: "userId" → "userId" ✓ * - Example: "user_id" → "user id" ✗ (space makes it invalid - UNSAFE!) * * PLAIN_TEXT CONTEXT: * - Always safe (isolated word) * - Example: "The ramya book" → "The raj book" ✓ * * @param context - Detected context information with FULL semantic unit * @param searchText - Original search term * @param replacementText - Text to replace with * @returns Validation result with detailed safety assessment and explanation */ export declare function validateContextAwareReplacement(context: TextContext, searchText: string, replacementText: string, caseSensitive?: boolean): ContextAwareReplacementPreview; /** * ═══════════════════════════════════════════════════════════════════════════ * END OF CONTEXT DETECTION FUNCTIONS * ═══════════════════════════════════════════════════════════════════════════ */ /** * Calculates the partial bounding box for a substring within a text item * * When replacing text like "Ramya" in "Ramya Lakhani", we need to calculate: * - The exact position where "Ramya" starts * - The width of only "Ramya" (not the entire text) * * ENHANCED VERSION with improved accuracy: * - Uses character-by-character width measurement for precision * - Handles word boundaries correctly to avoid partial word matches * - Accounts for kerning and spacing differences between fonts * * @param fullText - The complete text string (e.g., "Ramya Lakhani") * @param searchText - The substring to find (e.g., "Ramya") * @param fullBounds - The bounding box of the full text * @param font - The font used for text measurement * @param fontSize - The font size in points * @param caseSensitive - Whether search is case-sensitive * @param matchWholeWords - Only match complete words (default: false) * @returns Array of partial bounds for each occurrence of searchText, or null if not found */ export declare function calculatePartialTextBounds(fullText: string, searchText: string, fullBounds: { x: number; y: number; width: number; height: number; }, font: PDFFont, fontSize: number, caseSensitive?: boolean, matchWholeWords?: boolean): Array<{ bounds: { x: number; y: number; width: number; height: number; }; startIndex: number; endIndex: number; }> | null; /** * Builds the replacement text by replacing a substring within the original text * * For example: * - fullText: "Ramya Lakhani", replacementText: "Raj", startIndex: 0, endIndex: 5 * - Result: "Raj Lakhani" * * @param fullText - The complete original text * @param replacementText - The text to replace with * @param startIndex - Starting index of the substring to replace * @param endIndex - Ending index of the substring to replace * @returns The full text with the substring replaced */ export declare function buildReplacementText(fullText: string, replacementText: string, startIndex: number, endIndex: number): string; /** * Performs text replacement in a PDF document * * Main function that orchestrates the text replacement process: * 1. Loads the PDF document * 2. For each replacement specification: * - Covers old text with a rectangle * - Draws new text at the same position * 3. Saves the modified PDF * * @param filePath - Path to the input PDF file * @param config - Text replacement configuration * @returns Text replacement result with details * @throws Error if file cannot be processed or replacements fail */ export declare function replaceTextInPDF(filePath: string, config: TextReplacementConfig): Promise; /** * Helper function to create a simple text replacement specification * * Useful for quick replacements when you already have the text position data * from detect-text-position tool. * * @param pageNumber - Page number (1-indexed) * @param originalText - Text to replace * @param replacementText - New text * @param bounds - Bounding box from detect-text-position * @param fontSize - Font size in points * @returns Text replacement specification */ export declare function createReplacementSpec(pageNumber: number, originalText: string, replacementText: string, bounds: { x: number; y: number; width: number; height: number; }, fontSize?: number): TextReplacementSpec; //# sourceMappingURL=text-replacement-utils.d.ts.map