import type { BoxItem, Orientation, Rectangle, TextItem, TextUnit } from './ocr-engine'; export declare type OcrWorkerInit = { workerUrl?: string; wasmUrl?: string; wasmFallbackUrl?: string; }; export interface IOcrWorker { loadModel(model: string | ArrayBuffer): Promise; loadImage(image: ImageBitmap | ImageData, rectangle?: Rectangle | undefined): Promise; clearImage(): Promise; getBoundingBoxes(unit: TextUnit): Promise; getTextBoxes(unit: TextUnit): Promise; getText(): Promise; getVariable(name: string): Promise; setVariable(name: string, value: string): Promise; getOrientation(): Promise; } /** * High-level async API for performing document image layout analysis and * OCR. */ export declare class OcrWorker implements IOcrWorker { private _id; private _worker; private _ocrEngine; /** * Initialize an OCR engine. * * This will start a Worker in which the OCR operations will actually be * performed. * */ constructor({ workerUrl, wasmUrl, wasmFallbackUrl, }?: OcrWorkerInit); get id(): string; destroy(): Promise; /** * Load a trained model for a specific language. This can be specified either * as a URL to fetch or a buffer containing an already-loaded model. */ loadModel(model: string | ArrayBuffer): Promise; /** * Load an image into the OCR engine for processing. */ loadImage(image: ImageBitmap | ImageData, rectangle?: Rectangle): Promise; /** * Clear the current image and text recognition results. * * This will clear the loaded image data internally, but keep the text * recognition model loaded. * * At present there is no way to shrink WebAssembly memory, so this will not * return the memory used by the image to the OS/browser. To release memory, * the web worker needs to be shut down via {@link destroy}. */ clearImage(): Promise; /** * Perform layout analysis on the current image, if not already done, and * return bounding boxes for a given unit of text. * * This operation is relatively cheap compared to text recognition, so can * provide much faster results if only the location of lines/words etc. on * the page is required, not the text content. */ getBoundingBoxes(unit: TextUnit): Promise; /** * Perform layout analysis and text recognition on the current image, if * not already done, and return bounding boxes and text content for a given * unit of text. */ getTextBoxes(unit: TextUnit): Promise; /** * Perform layout analysis and text recognition on the current image, if * not already done, and return the image's text as a string. */ getText(): Promise; /** * Get the value, represented as a string, of a Tesseract configuration variable. * * See {@link setVariable} for available variables. */ getVariable(name: string): Promise; /** * Set the value of a Tesseract configuration variable. * * For a list of configuration variables, see * https://github.com/tesseract-ocr/tesseract/blob/677f5822f247ccb12b4e026265e88b959059fb59/src/ccmain/tesseractclass.cpp#L53 * * If you have Tesseract installed locally, executing `tesseract --print-parameters` * will also display a list of configuration variables. */ setVariable(name: string, value: string): Promise; /** * Attempt to determine the orientation of the image. * * This currently uses a simplistic algorithm [1] which is designed for * non-uppercase Latin text. It will likely perform badly for other scripts or * if the text is all uppercase. * * [1] See http://www.leptonica.org/papers/skew-measurement.pdf */ getOrientation(): Promise; }