/** * OCRAgent - Optical Character Recognition agent * * Python parity with praisonaiagents/agent/ocr_agent.py * Extracts text from documents and images using AI models. */ /** * Configuration for OCR settings. * Python parity with OCRConfig dataclass. */ export interface OCRConfig { /** Include base64 images in response */ includeImageBase64?: boolean; /** Specific pages to extract (for PDFs) */ pages?: number[]; /** Maximum images per page */ imageLimit?: number; /** Timeout in seconds */ timeout?: number; /** Custom API endpoint URL */ apiBase?: string; /** API key for the provider */ apiKey?: string; } /** * Page result from OCR extraction. */ export interface OCRPage { /** Page index (0-based) */ index: number; /** Extracted text as markdown */ markdown: string; /** Base64 images if requested */ images?: string[]; } /** * Result of OCR extraction. */ export interface OCRResult { /** Combined extracted text */ text: string; /** Per-page results */ pages: OCRPage[]; /** Additional metadata */ metadata?: Record; } /** * Configuration for creating an OCRAgent. */ export interface OCRAgentConfig { /** Agent name */ name?: string; /** Optional instructions */ instructions?: string; /** LLM model (e.g., "mistral/mistral-ocr-latest") */ llm?: string; /** Alias for llm parameter */ model?: string; /** Custom API endpoint URL */ baseUrl?: string; /** API key for the provider */ apiKey?: string; /** OCR configuration */ ocr?: boolean | OCRConfig; /** Verbosity level for output */ verbose?: boolean | number; } /** * A specialized agent for OCR (Optical Character Recognition). * * Extracts text from documents (PDFs) and images using AI models. * * Supported Providers: * - Mistral: `mistral/mistral-ocr-latest` * * @example * ```typescript * import { OCRAgent } from 'praisonai'; * * const agent = new OCRAgent({ llm: 'mistral/mistral-ocr-latest' }); * * // Extract from PDF URL * const result = await agent.extract('https://example.com/document.pdf'); * console.log(result.text); * * // Extract from image URL * const result2 = await agent.extract('https://example.com/image.png'); * for (const page of result2.pages) { * console.log(page.markdown); * } * ``` */ export declare class OCRAgent { static readonly DEFAULT_MODEL = "mistral/mistral-ocr-latest"; readonly name: string; private readonly instructions?; private readonly llm; private readonly baseUrl?; private readonly apiKey?; private readonly ocrConfig; private readonly verbose; constructor(config: OCRAgentConfig); private resolveOCRConfig; private buildDocument; private log; /** * Extract text from a document or image. * * @param source - URL or path to document/image * @param options - Override options for this extraction * @returns OCRResult with pages, markdown content, and metadata */ extract(source: string, options?: { includeImageBase64?: boolean; pages?: number[]; imageLimit?: number; model?: string; }): Promise; /** * Async version of extract() - same implementation since extract is already async. */ aextract(source: string, options?: { includeImageBase64?: boolean; pages?: number[]; imageLimit?: number; model?: string; }): Promise; /** * Quick OCR - extract and return markdown text. * * @param source - URL or path to document/image * @returns Extracted text as markdown string */ read(source: string): Promise; /** * Async version of read(). */ aread(source: string): Promise; } /** * Create an OCRAgent instance. * * @param config - OCRAgent configuration * @returns OCRAgent instance */ export declare function createOCRAgent(config: OCRAgentConfig): OCRAgent;