import { FileReader, Document } from '@llamaindex/core/schema'; import { ParserLanguages, ParsingMode, FailPageMode } from '../../api/dist/index.js'; type Language = ParserLanguages; type ResultType = "text" | "markdown" | "json"; type BackoffPattern = "constant" | "linear" | "exponential"; type WriteStream = { write: (text: string) => void; }; /** * Represents a reader for parsing files using the LlamaParse API. * See https://github.com/run-llama/llama_parse */ declare class LlamaParseReader extends FileReader { #private; project_id?: string | undefined; organization_id?: string | undefined; apiKey: string; baseUrl: string; resultType: ResultType; checkInterval: number; maxTimeout: number; verbose: boolean; language: ParserLanguages[]; backoffPattern: BackoffPattern; maxCheckInterval: number; maxErrorCount: number; parsingInstruction?: string | undefined; skipDiagonalText?: boolean | undefined; invalidateCache?: boolean | undefined; doNotCache?: boolean | undefined; fastMode?: boolean | undefined; doNotUnrollColumns?: boolean | undefined; pageSeparator?: string | undefined; pagePrefix?: string | undefined; pageSuffix?: string | undefined; gpt4oMode: boolean; gpt4oApiKey?: string | undefined; boundingBox?: string | undefined; targetPages?: string | undefined; ignoreErrors: boolean; splitByPage: boolean; useVendorMultimodalModel: boolean; vendorMultimodalModelName?: string | undefined; vendorMultimodalApiKey?: string | undefined; webhookUrl?: string | undefined; premiumMode?: boolean | undefined; takeScreenshot?: boolean | undefined; disableOcr?: boolean | undefined; disableReconstruction?: boolean | undefined; inputS3Path?: string | undefined; outputS3PathPrefix?: string | undefined; continuousMode?: boolean | undefined; isFormattingInstruction?: boolean | undefined; annotateLinks?: boolean | undefined; azureOpenaiDeploymentName?: string | undefined; azureOpenaiEndpoint?: string | undefined; azureOpenaiApiVersion?: string | undefined; azureOpenaiKey?: string | undefined; auto_mode?: boolean | undefined; auto_mode_trigger_on_image_in_page?: boolean | undefined; auto_mode_trigger_on_table_in_page?: boolean | undefined; auto_mode_trigger_on_text_in_page?: string | undefined; auto_mode_trigger_on_regexp_in_page?: string | undefined; bbox_bottom?: number | undefined; bbox_left?: number | undefined; bbox_right?: number | undefined; bbox_top?: number | undefined; disable_image_extraction?: boolean | undefined; extract_charts?: boolean | undefined; guess_xlsx_sheet_name?: boolean | undefined; html_make_all_elements_visible?: boolean | undefined; html_remove_fixed_elements?: boolean | undefined; html_remove_navigation_elements?: boolean | undefined; http_proxy?: string | undefined; input_url?: string | undefined; max_pages?: number | undefined; output_pdf_of_document?: boolean | undefined; structured_output?: boolean | undefined; structured_output_json_schema?: string | undefined; structured_output_json_schema_name?: string | undefined; extract_layout?: boolean | undefined; stdout?: WriteStream | undefined; output_tables_as_HTML: boolean; input_s3_region?: string | undefined; output_s3_region?: string | undefined; preserve_layout_alignment_across_pages?: boolean | undefined; spreadsheet_extract_sub_tables?: boolean | undefined; formatting_instruction?: string | undefined; parse_mode?: ParsingMode | undefined; system_prompt?: string | undefined; system_prompt_append?: string | undefined; user_prompt?: string | undefined; job_timeout_in_seconds?: number | undefined; job_timeout_extra_time_per_page_in_seconds?: number | undefined; strict_mode_image_extraction?: boolean | undefined; strict_mode_image_ocr?: boolean | undefined; strict_mode_reconstruction?: boolean | undefined; strict_mode_buggy_font?: boolean | undefined; ignore_document_elements_for_layout_detection?: boolean | undefined; complemental_formatting_instruction?: string | undefined; content_guideline_instruction?: string | undefined; adaptive_long_table?: boolean | undefined; model?: string | undefined; auto_mode_configuration_json?: string | undefined; compact_markdown_table?: boolean | undefined; markdown_table_multiline_header_separator?: string | undefined; page_error_tolerance?: number | undefined; replace_failed_page_mode?: FailPageMode | undefined; replace_failed_page_with_error_message_prefix?: string | undefined; replace_failed_page_with_error_message_suffix?: string | undefined; save_images?: boolean | undefined; preset?: string | undefined; high_res_ocr?: boolean | undefined; outlined_table_extraction?: boolean | undefined; hide_headers?: boolean | undefined; hide_footers?: boolean | undefined; page_header_prefix?: string | undefined; page_header_suffix?: string | undefined; page_footer_prefix?: string | undefined; page_footer_suffix?: string | undefined; merge_tables_across_pages_in_markdown?: boolean | undefined; constructor(params?: Partial> & { language?: ParserLanguages | ParserLanguages[] | undefined; apiKey?: string | undefined; }); /** * Retrieves the result of a parsing job. * * Uses a polling loop with retry logic. Each API call is retried * up to maxErrorCount times if it fails with a 5XX or socket error. * The delay between polls increases according to the specified backoffPattern ("constant", "linear", or "exponential"), * capped by maxCheckInterval. * * @param jobId - The job ID. * @param resultType - The type of result to fetch ("text", "json", or "markdown"). * @returns A Promise resolving to the job result. */ private getJobResult; loadData(filePath?: string): Promise; /** * Loads data from a file and returns an array of Document objects. * To be used with resultType "text" or "markdown". * * @param fileContent - The content of the file as a Uint8Array. * @param filename - Optional filename for the file. * @returns A Promise that resolves to an array of Document objects. */ loadDataAsContent(fileContent: Uint8Array | string, filename?: string): Promise; /** * Loads data from a file and returns an array of JSON objects. * To be used with resultType "json". * * @param filePathOrContent - The file path or the file content as a Uint8Array. * @returns A Promise that resolves to an array of JSON objects. */ loadJson(filePathOrContent: string | Uint8Array): Promise[]>; /** * Downloads and saves images from a given JSON result to a specified download path. * Currently only supports resultType "json". * * @param jsonResult - The JSON result containing image information. * @param downloadPath - The path where the downloaded images will be saved. * @returns A Promise that resolves to an array of image objects. */ getImages(jsonResult: Record[], downloadPath: string): Promise[]>; /** * Constructs the file path for an image. * * @param downloadPath - The base download directory. * @param jobId - The job ID. * @param imageName - The image name. * @returns A Promise that resolves to the full image path. */ private getImagePath; /** * Fetches an image from the API and saves it to the specified path. * * @param imageName - The name of the image. * @param imagePath - The local path to save the image. * @param jobId - The associated job ID. */ private fetchAndSaveImage; /** * Filters out invalid values (null, undefined, empty string) for specific parameters. * * @param params - The parameters object. * @param keysToCheck - The keys to check for valid values. * @returns A new object with filtered parameters. */ private filterSpecificParams; /** * Splits text into Document objects using the page separator. * * @param text - The text to be split. * @returns An array of Document objects. */ private splitTextBySeparator; } export { LlamaParseReader }; export type { BackoffPattern, Language, ResultType };