import type * as Extend from "../index"; /** * **Deprecated:** Use the `POST /parse_runs` endpoint instead to parse and retrieve file contents. The parse runs endpoint provides more control over parsing configuration and better performance. * * The parsed content of the file. This field will only contain data after the file has been parsed via a parse run, extract run, classify run, split run, edit run, or workflow run. * * **Availability:** Only present and populated on `GET /files/{id}` when the file has been previously parsed and the corresponding query parameters are set to true. Will be `null` on `POST /files/upload` and for files that haven't been parsed. The structure varies based on file type. */ export interface FileContents { /** * The raw text content of the file. Available for all file types when the `rawText` query parameter is set to true. * * - **PDF/IMG**: Concatenated raw text from all pages * - **DOCX**: The document's raw text * - **CSV**: Concatenated chunks or CSV text * - **EXCEL**: Not included (use `sheets` instead) * - **TXT/XML/HTML**: The file's text content */ rawText?: string; /** * Page-level content for document file types. * * - **PDF/IMG**: Contains `pageNumber`, `pageHeight`, `pageWidth`, and `markdown` (if `markdown` query param is true) * - **DOCX**: Contains `pageNumber` and `html` (if `html` query param is true) * - **Other file types**: Empty array */ pages?: Extend.FileContentsPagesItem[]; /** Section-level content for documents that support section-based chunking. Available for PDF and IMG file types. */ sections?: Extend.FileContentsSectionsItem[]; /** Sheet-level content for spreadsheet file types. Available for EXCEL files. */ sheets?: Extend.FileContentsSheetsItem[]; }