import { Locator, Page } from "playwright-core";
import { JsonSchema, ObjectSchema } from "./jsonSchema";
/**
 *
 * @param file the file you want to extract the data from,
 * @param options.label a label for this extraction process, used for billing and monitoring
 * @param options.dataSchema the json schema of the data you're trying to extract.
 * @param options.prompt optional, a prompt to guide the extraction process and provide more context.
 * @param options.strategy optional, the strategy to use for extraction. use `IMAGE` if the info you're trying to extract is visual and cannot be converted to markdown. Defaults to `MARKDOWN` strategy with `gpt4-turbo` model.
 * @example
 * ```typescript extractStructuredDataFromFile
 * import { extractStructuredDataFromFile } from "@intuned/sdk/ai-extractors";
 *
 * const movie = await extractStructuredDataFromFile({
 *   source: {
 *     type: "url",
 *     data: "<file url>"
 *   },
 *   type: "pdf",
 *   // pages array is optional, do not pass it if you want to include all pages in the process
 *   pages: [1, 2]
 * }, {
 *   label: "extract_movie",
 *   dataSchema: {
 *     type: "object",
 *     properties: {
 *       "name": {
 *         type: "string",
 *         description: "movie name"
 *       },
 *       revenue: {
 *         type: "string",
 *         description: "movie revenue"
 *       }
 *     }
 *   }
 * })
 *
 * ```
 */
export declare function extractStructuredDataFromFile(
  file: ImageFile | PdfFile | SpreadsheetFile | DocumentFile,
  options: {
    label: string;
    dataSchema: JsonSchema;
    prompt?: string;
    strategy?: MarkdownFileStrategy | ImageFileStrategy;
  }
): Promise<any>;

/**
 * Represents a file source from a buffer.
 *
 * @interface
 * @property type - The type of the file source, which is always "buffer".
 * @property data - The buffer data of the file.
 */
interface FileBufferSource {
  type: "buffer";
  data: Buffer;
}

/**
 * Represents a file source from a URL.
 *
 * @interface
 * @property type - The type of the file source, which is always "url".
 * @property data - The URL of the file.
 */
interface FileUrlSource {
  type: "url";
  data: string;
}

/**
 * Represents a file source from a base64 string.
 *
 * @interface
 * @property type - The type of the file source, which is always "base64".
 * @property data - The base64 string of the file data.
 */
interface FileBase64Source {
  type: "base64";
  data: string;
}

/**
 * Represents an image file source.
 *
 * @interface
 * @property type - The type of the file, which is always "image".
 * @property  source - The source of the file data.
 */
interface ImageFile {
  type: "image";
  source: FileBufferSource | FileUrlSource | FileBase64Source;
}

/**
 * Represents a PDF file source.
 *
 * @interface
 * @property type - The type of the file, which is always "pdf".
 * @property [pages] - Optional. The specific pages of the PDF to extract data from, if not provided, all page will be included.
 * @property source - The source of the file data.
 */
export interface PdfFile {
  type: "pdf";
  pages?: number[];
  source: FileBufferSource | FileUrlSource | FileBase64Source;
}

/**
 * Represents a Spreadsheet file source. For now, only .xlsx Excel spreadsheets are supported.
 *
 * @interface
 * @property type - The type of the file, which is always "spreadsheet".
 * @property sheetName - The name of the sheet to extract data from.
 * @property source - The source of the file data.
 */
export interface SpreadsheetFile {
  type: "spreadsheet";
  sheetName: string;
  source: FileBufferSource | FileUrlSource | FileBase64Source;
}

/**
 * Represents a Document file source. For now, only .docx Word files are supported.
 *
 * @interface
 * @property type - The type of the file, which is always "document".
 * @property [pages] - Optional. The specific pages of the document to extract data from, if not provided, all page will be included.
 * @property source - The source of the file data.
 * @property config - Optional. Configurations on how the spreadsheet should be processed when it is converted to a document.
 */
export interface DocumentFile {
  type: "document";
  pages?: number[];
  source: FileBufferSource | FileUrlSource | FileBase64Source;
}

/**
 * Extracts tables from a file (ImageFile or PdfFile).
 *
 * @param file - The file you want to extract the tables from.
 * @param options.label - A label for this extraction process, used for billing and monitoring.
 * @returns {Promise<Array<ExtractedTable>>} A promise that resolves to an array of extracted tables.
 *
 * @example
 * ```typescript extractTablesFromFile
 * import { extractTablesFromFile } from "@intuned/sdk/ai-extractors";
 *
 * const tables = await extractTablesFromFile({
 *   source: {
 *     type: "url",
 *     data: "<file url>"
 *   },
 *   type: "pdf",
 *   // pages array is optional, do not pass it if you want to include all pages in the process
 *   pages: [1, 2]
 * }, {
 *   label: "extract_tables"
 * });
 *
 * console.log(tables);
 * ```
 */
export declare function extractTablesFromFile(
  file: ImageFile | PdfFile | SpreadsheetFile | DocumentFile,
  options: {
    label: string;
  }
): Promise<Array<ExtractedTable>>;

/**
 * Represents a table extracted from a pdf file.
 *
 * @interface
 * @property title - the title of the table if found
 * @property  content - a 2 dimensional array contains the table values.
 */
interface ExtractedTable {
  title: string | null;
  content: (string | null)[][];
}

/**
 * converts a file to markdown (ImageFile or PdfFile).
 *
 * @param file - The file you want to extract the markdown content from.
 * @param options.label - A label for this extraction process, used for billing and monitoring.
 * @returns {Promise<string>} A promise that resolves to the extracted markdown content as a string.
 *
 * @example
 * ```typescript extractMarkdownFromFile
 * import { extractMarkdownFromFile } from "@intuned/sdk/ai-extractors";
 *
 * const markdown = await extractMarkdownFromFile({
 *   source: {
 *     type: "url",
 *     data: "<file url>"
 *   },
 *   type: "pdf",
 *   // pages array is optional, do not pass it if you want to include all pages in the process
 *   pages: [1, 2]
 * }, {
 *   label: "extract_markdown"
 * });
 *
 * console.log(markdown);
 * ```
 */
export declare function extractMarkdownFromFile(
  file: ImageFile | PdfFile | SpreadsheetFile | DocumentFile,
  options: {
    label: string;
  }
): Promise<string>;

/**
 * Extracts structured data from a web page.
 *
 * @param page - The Playwright Page from which to extract the structured data.
 * @param options
 * @param  options.label - A label for this extraction process, used for billing and monitoring.
 * @param  options.dataSchema - The JSON schema of the data you're trying to extract.
 * @param options.prompt - Optional. A prompt to guide the extraction process and provide more context.
 * @param options.strategy - Optional. The strategy to use for extraction, use the `IMAGE` strategy if the info you're trying to extract is visual and does not exist on the html of the page,
 * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
 * @returns A promise that resolves to the extracted structured data.
 *
 * @example
 * ```typescript extractStructuredDataFromPage
 * import { extractStructuredDataFromPage } from "@intuned/sdk/ai-extractors";
 *
 * await page.goto('https://example.com');
 *
 * const options = {
 *   label: "extract_page_data",
 *   dataSchema: {
 *     type: "object",
 *     properties: {
 *       title: { type: "string", description: "The title of the page" },
 *       date: { type: "string", description: "The date of the content" }
 *     }
 *   },
 * };
 *
 * const data = await extractStructuredDataFromPage(page, options);
 * console.log(data);
 *
 * ```
 */
export declare function extractStructuredDataFromPage(
  page: Page,
  options: {
    label: string;
    dataSchema: JsonSchema;
    prompt?: string;
    strategy?: ImageStrategy | HtmlStrategy;
    apiKey?: string;
  }
): Promise<any>;

/**
 * Extracts structured data from a web page.
 *
 * @param locator - The Playwright locator from which to extract the structured data.
 * @param options
 * @param  options.label - A label for this extraction process, used for billing and monitoring.
 * @param  options.dataSchema - The JSON schema of the data you're trying to extract.
 * @param options.prompt - Optional. A prompt to guide the extraction process and provide more context.
 * @param options.strategy - Optional. The strategy to use for extraction, use the `IMAGE` strategy if the info you're trying to extract is visual and does not exist on the html of the page,
 * @returns A promise that resolves to the extracted structured data.
 *
 * @example
 * ```typescript extractStructuredDataFromLocator
 * import { extractStructuredDataFromLocator } from "@intuned/sdk/ai-extractors";
 *
 * await page.goto('https://example.com');
 *
 * const options = {
 *   label: "extract_locator_data",
 *   dataSchema: {
 *     type: "object",
 *     properties: {
 *       title: { type: "string", description: "The title of the page" },
 *       date: { type: "string", description: "The date of the content" }
 *     }
 *   },
 * };
 *
 * const data = await extractStructuredDataFromLocator(page.locator(".section"), options);
 * console.log(data);
 *
 * ```
 */

export declare function extractStructuredDataFromLocator(
  locator: Locator,
  options: {
    label: string;
    dataSchema: JsonSchema;
    prompt?: string;
    strategy?: ImageStrategy | HtmlStrategy;
  }
): Promise<any>;

/**
 * this strategy will use a screenshot of the page/locator with some processing to extract the needed data.
 * should be used when the information you're trying to extract is not present in the dom as a text but can be identified visually.
 * @interface
 * @property model - the model to use in the extraction process.
 * @property  type - the type of the strategy
 */
export interface ImageStrategy {
  model:
    | "claude-3-haiku"
    | "claude-3-haiku-20240307"
    | "claude-3-opus"
    | "claude-3-opus-20240229"
    | "claude-3-sonnet"
    | "claude-3-sonnet-20240229"
    | "claude-3.5-sonnet"
    | "claude-3-5-sonnet-20240620"
    | "claude-3-5-sonnet-20241022"
    | "gpt4-turbo"
    | "gpt-4-turbo-2024-04-09"
    | "gpt-4o"
    | "gpt-4o-2024-05-13"
    | "gpt-4o-mini"
    | "gpt-4o-mini-2024-07-18"
    | "gemini-1.5-pro"
    | "gemini-1.5-pro-002"
    | "gemini-1.5-flash-8b"
    | "gemini-1.5-flash-8b-002"
    | "gemini-1.5-flash"
    | "gemini-1.5-flash-002"
    | "gemini-2.0-flash-exp";
  type: "IMAGE";
}

/**
 * this strategy will use the html of the page/locator to extract the needed data. we filter out some of the attributes to reduce context.
 * the attributes included are only: `aria-label` `data-name` `name` `type` `placeholder` `value` `role` `title` `href` `id` `alt`,
 *
 * @interface
 * @property model - the model to use in the extraction process
 * @property  type - the type of the strategy
 */
export interface HtmlStrategy {
  model:
    | "claude-3-haiku"
    | "claude-3-haiku-20240307"
    | "claude-3-5-haiku"
    | "claude-3-5-haiku-20241022"
    | "claude-3-opus"
    | "claude-3-opus-20240229"
    | "claude-3-sonnet"
    | "claude-3-sonnet-20240229"
    | "claude-3.5-sonnet"
    | "claude-3-5-sonnet-20240620"
    | "claude-3-5-sonnet-20241022"
    | "gpt4-turbo"
    | "gpt-4-turbo-2024-04-09"
    | "gpt3.5-turbo"
    | "gpt-3.5-turbo-0125"
    | "gpt-4o"
    | "gpt-4o-2024-05-13"
    | "gpt-4o-mini"
    | "gpt-4o-mini-2024-07-18"
    | "gemini-1.5-pro"
    | "gemini-1.5-pro-002"
    | "gemini-1.5-flash-8b"
    | "gemini-1.5-flash-8b-002"
    | "gemini-1.5-flash"
    | "gemini-1.5-flash-002"
    | "gemini-2.0-flash-exp";
  type: "HTML";
}

/**
 * this strategy will extract markdown content from the file then run data extraction on it.
 *
 * @interface
 * @property model - the model to use in the extraction process
 * @property  type - the type of the strategy
 */
export interface MarkdownFileStrategy {
  model:
    | "claude-3-haiku"
    | "claude-3-haiku-20240307"
    | "claude-3-5-haiku"
    | "claude-3-5-haiku-20241022"
    | "claude-3-opus"
    | "claude-3-opus-20240229"
    | "claude-3-sonnet"
    | "claude-3-sonnet-20240229"
    | "claude-3.5-sonnet"
    | "claude-3-5-sonnet-20240620"
    | "claude-3-5-sonnet-20241022"
    | "gpt4-turbo"
    | "gpt-4-turbo-2024-04-09"
    | "gpt3.5-turbo"
    | "gpt-3.5-turbo-0125"
    | "gpt-4o"
    | "gpt-4o-2024-05-13"
    | "gpt-4o-mini"
    | "gpt-4o-mini-2024-07-18"
    | "gemini-1.5-pro"
    | "gemini-1.5-pro-002"
    | "gemini-1.5-flash-8b"
    | "gemini-1.5-flash-8b-002"
    | "gemini-1.5-flash"
    | "gemini-1.5-flash-002"
    | "gemini-2.0-flash-exp";
  type: "MARKDOWN";
}

/**
 * this strategy will use the image content of the file to extract the needed data.
 * should be used when the information you're trying to extract cannot be converted to markdown. For example, a checkbox in a pdf file.
 * @interface
 * @property model - the model to use in the extraction process.
 * @property  type - the type of the strategy
 */
export interface ImageFileStrategy {
  model:
    | "claude-3-haiku"
    | "claude-3-haiku-20240307"
    | "claude-3-opus"
    | "claude-3-opus-20240229"
    | "claude-3-sonnet"
    | "claude-3-sonnet-20240229"
    | "claude-3.5-sonnet"
    | "claude-3-5-sonnet-20240620"
    | "claude-3-5-sonnet-20241022"
    | "gpt4-turbo"
    | "gpt-4-turbo-2024-04-09"
    | "gpt-4o"
    | "gpt-4o-2024-05-13"
    | "gpt-4o-mini"
    | "gpt-4o-mini-2024-07-18"
    | "gemini-1.5-pro"
    | "gemini-1.5-pro-002"
    | "gemini-1.5-flash-8b"
    | "gemini-1.5-flash-8b-002"
    | "gemini-1.5-flash"
    | "gemini-1.5-flash-002"
    | "gemini-2.0-flash-exp";
  type: "IMAGE";
}

/**
 * Extracts structured data from content items (text or images).
 *
 * @param content - The content items from which to extract the structured data.
 * @param options.label - A label for this extraction process, used for billing and monitoring.
 * @param options.dataSchema - The JSON schema of the data you're trying to extract.
 * @param [options.prompt] - Optional. A prompt to guide the extraction process.
 * @param options.model - The model to use for extraction.
 * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
 * @returns A promise that resolves to the extracted structured data.
 *
 * @example
 * ```typescript extractStructuredDataFromContent
 * import { extractStructuredDataFromContent } from "@intuned/sdk/ai-extractors";
 *
 * const content = [
 *   { type: "text", data: "Sample text data" },
 *   {
 *     type: "image-url",
 *     image_type: "jpeg",
 *     data: "https://example.com/image.jpg"
 *   }
 * ];
 *
 * const options = {
 *   label: "extract_contact_info",
 *   dataSchema: {
 *     type: "object",
 *     properties: {
 *       name: { type: "string", description: "contact name" },
 *       phone: { type: "string", description: "contact info" }
 *     }
 *   },
 *   model: "gpt4-turbo"
 * };
 *
 * const data = await extractStructuredDataFromContent(content, options);
 * console.log(data);
 * ```
 */
export declare function extractStructuredDataFromContent(
  content:
    | (TextContentItem | ImageBufferContentItem | ImageUrlContentItem)[]
    | TextContentItem
    | ImageBufferContentItem
    | ImageUrlContentItem,
  options: {
    label: string;
    dataSchema: ObjectSchema;
    prompt?: string;
    model:
      | "claude-3-haiku"
      | "claude-3-haiku-20240307"
      | "claude-3-5-haiku"
      | "claude-3-5-haiku-20241022"
      | "claude-3-opus"
      | "claude-3-opus-20240229"
      | "claude-3-sonnet"
      | "claude-3-sonnet-20240229"
      | "claude-3.5-sonnet"
      | "claude-3-5-sonnet-20240620"
      | "claude-3-5-sonnet-20241022"
      | "gpt4-turbo"
      | "gpt-4-turbo-2024-04-09"
      | "gpt3.5-turbo"
      | "gpt-3.5-turbo-0125"
      | "gpt-4o"
      | "gpt-4o-2024-05-13"
      | "gpt-4o-mini"
      | "gpt-4o-mini-2024-07-18"
      | "gemini-1.5-pro"
      | "gemini-1.5-pro-002"
      | "gemini-1.5-flash-8b"
      | "gemini-1.5-flash-8b-002"
      | "gemini-1.5-flash"
      | "gemini-1.5-flash-002"
      | "gemini-2.0-flash-exp";
    apiKey?: string;
  }
): Promise<any>;

/**
 * @interface
 * @property type - The type of the content item, which is always "text".
 * @property data - The text data.
 */
export interface TextContentItem {
  type: "text";
  data: string;
}

/**
 * @interface
 * @property type - The type of the content item, which is always "image-buffer".
 * @property image_type - The image format (e.g., "png", "jpeg", "gif", "webp").
 * @property data - The buffer containing the image data.
 */
export interface ImageBufferContentItem {
  type: "image-buffer";
  image_type: "png" | "jpeg" | "gif" | "webp";
  data: Buffer;
}

/**
 * @interface
 * @property type - The type of the content item, which is always "image-url".
 * @property image_type - The image format (e.g., "png", "jpeg", "gif", "webp").
 * @property data - The URL of the image.
 */
export interface ImageUrlContentItem {
  type: "image-url";
  image_type: "png" | "jpeg" | "gif" | "webp";
  data: string;
}

/**
 * Extracts markdown content from a web page.
 *
 * @param {Page} page - The Playwright Page object from which to extract the markdown content.
 * @returns {Promise<string>} A promise that resolves to the extracted markdown content.
 *
 * @example
 * ```typescript extractMarkdownFromPage
 * import { extractMarkdownFromPage } from "@intuned/sdk/ai-extractors";
 *
 * await page.goto('https://example.com');
 * const markdown = await extractMarkdownFromPage(page);
 * console.log(markdown);
 *
 * ```
 */
export declare function extractMarkdownFromPage(page: Page): Promise<string>;

/**
 * Extracts markdown content from a specific locator within a web page.
 *
 * @param {Locator} locator - The Playwright Locator object from which to extract the markdown content.
 * @returns {Promise<string>} A promise that resolves to the extracted markdown content.
 *
 * @example
 * ```typescript extractMarkdownFromLocator
 * import { extractMarkdownFromLocator } from "@intuned/sdk/ai-extractors";
 *
 * await page.goto('https://example.com');
 * const locator = page.locator('.article');
 * const markdown = await extractMarkdownFromLocator(locator);
 * console.log(markdown);
 *
 * ```
 */
export declare function extractMarkdownFromLocator(
  locator: Locator
): Promise<string>;