import type * as VoyageAI from "../../index.js"; /** * @example * { * inputs: [{}], * model: "model" * } */ export interface MultimodalEmbedRequest { /** * A list of multimodal inputs to be vectorized. * A single input in the list is a dictionary containing a single key "content", whose value represents a sequence of text and images. *
"content" is a list of dictionaries, each representing a single piece of text or image. The dictionaries have four possible keys:
* text, image_url, or image_base64.type is text. The value should be a text string.type is image_base64. The value should be a Base64-encoded image in the data URL format data:[<mediatype>];base64,<data>. Currently supported mediatypes are: image/png, image/jpeg, image/webp, and image/gif.type is image_url. The value should be a URL linking to the image. We support PNG, JPEG, WEBP, and GIF images.image_base64 or image_url, should be present in each dictionary for image data. Consistency is required within a request, meaning each request should use either image_base64 or image_url exclusively for images, not both.inputs contains an image as a URL inputs list contains a single input, which consists of a piece of text and an image (which is provided via a URL).
*
* {
* "inputs": [
* {
* "content": [
* {
* "type": "text",
* "text": "This is a banana."
* },
* {
* "type": "image_url",
* "image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"
* }
* ]
* }
* ],
* "model": "voyage-multimodal-3"
* }
*
* inputs contains a Base64 image
* {
* "inputs": [
* {
* "content": [
* {
* "type": "text",
* "text": "This is a banana."
* },
* {
* "type": "image_base64",
* "image_base64": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAA..."
* }
* ]
* }
* ],
* "model": "voyage-multimodal-3"
* }
*
* input_type is null, the embedding model directly converts your input data into numerical vectors. For retrieval/search purposes—where an input (called a "query") is used to search for relevant pieces of information (referred to as "documents")—we recommend specifying whether your inputs are intended as queries or documents by setting input_type to query or document, respectively. In these cases, Voyage prepends a prompt to your input before vectorizing it, helping the model create more effective vectors tailored for retrieval/search tasks. Since inputs can be multimodal, queries and documents can be text, images, or an interleaving of both modalities. Embeddings generated with and without the input_type argument are compatible. query, the prompt is "Represent the query for retrieving supporting documents: ". document, the prompt is "Represent the query for retrieving supporting documents: ".