{"version":3,"file":"cache_backed.cjs","names":["Embeddings","EncoderBackedStore"],"sources":["../../src/embeddings/cache_backed.ts"],"sourcesContent":["import { sha256 } from \"@langchain/core/utils/hash\";\nimport {\n  type EmbeddingsInterface,\n  Embeddings,\n} from \"@langchain/core/embeddings\";\nimport { BaseStore } from \"@langchain/core/stores\";\n\nimport { AsyncCallerParams } from \"@langchain/core/utils/async_caller\";\nimport { EncoderBackedStore } from \"../storage/encoder_backed.js\";\n\n/**\n * Interface for the fields required to initialize an instance of the\n * CacheBackedEmbeddings class.\n */\nexport interface CacheBackedEmbeddingsFields extends AsyncCallerParams {\n  underlyingEmbeddings: EmbeddingsInterface;\n  documentEmbeddingStore: BaseStore<string, number[]>;\n}\n\n/**\n * Interface for caching results from embedding models.\n *\n * The interface allows works with any store that implements\n * the abstract store interface accepting keys of type str and values of list of\n * floats.\n *\n * If need be, the interface can be extended to accept other implementations\n * of the value serializer and deserializer, as well as the key encoder.\n * @example\n * ```typescript\n * const underlyingEmbeddings = new OpenAIEmbeddings();\n *\n * const cacheBackedEmbeddings = CacheBackedEmbeddings.fromBytesStore(\n *   underlyingEmbeddings,\n *   new ConvexKVStore({ ctx }),\n *   {\n *     namespace: underlyingEmbeddings.modelName,\n *   },\n * );\n *\n * const loader = new TextLoader(\"./state_of_the_union.txt\");\n * const rawDocuments = await loader.load();\n * const splitter = new RecursiveCharacterTextSplitter({\n *   chunkSize: 1000,\n *   chunkOverlap: 0,\n * });\n * const documents = await splitter.splitDocuments(rawDocuments);\n *\n * let time = Date.now();\n * const vectorstore = await ConvexVectorStore.fromDocuments(\n *   documents,\n *   cacheBackedEmbeddings,\n *   { ctx },\n * );\n * console.log(`Initial creation time: ${Date.now() - time}ms`);\n *\n * time = Date.now();\n * const vectorstore2 = await ConvexVectorStore.fromDocuments(\n *   documents,\n *   cacheBackedEmbeddings,\n *   { ctx },\n * );\n * console.log(`Cached creation time: ${Date.now() - time}ms`);\n *\n * ```\n */\nexport class CacheBackedEmbeddings extends Embeddings {\n  protected underlyingEmbeddings: EmbeddingsInterface;\n\n  protected documentEmbeddingStore: BaseStore<string, number[]>;\n\n  constructor(fields: CacheBackedEmbeddingsFields) {\n    super(fields);\n    this.underlyingEmbeddings = fields.underlyingEmbeddings;\n    this.documentEmbeddingStore = fields.documentEmbeddingStore;\n  }\n\n  /**\n   * Embed query text.\n   *\n   * This method does not support caching at the moment.\n   *\n   * Support for caching queries is easy to implement, but might make\n   * sense to hold off to see the most common patterns.\n   *\n   * If the cache has an eviction policy, we may need to be a bit more careful\n   * about sharing the cache between documents and queries. Generally,\n   * one is OK evicting query caches, but document caches should be kept.\n   *\n   * @param document The text to embed.\n   * @returns The embedding for the given text.\n   */\n  async embedQuery(document: string): Promise<number[]> {\n    return this.underlyingEmbeddings.embedQuery(document);\n  }\n\n  /**\n   * Embed a list of texts.\n   *\n   * The method first checks the cache for the embeddings.\n   * If the embeddings are not found, the method uses the underlying embedder\n   * to embed the documents and stores the results in the cache.\n   *\n   * @param documents\n   * @returns A list of embeddings for the given texts.\n   */\n  async embedDocuments(documents: string[]): Promise<number[][]> {\n    const vectors = await this.documentEmbeddingStore.mget(documents);\n    const missingIndicies = [];\n    const missingDocuments = [];\n    for (let i = 0; i < vectors.length; i += 1) {\n      if (vectors[i] === undefined) {\n        missingIndicies.push(i);\n        missingDocuments.push(documents[i]);\n      }\n    }\n    if (missingDocuments.length) {\n      const missingVectors =\n        await this.underlyingEmbeddings.embedDocuments(missingDocuments);\n      const keyValuePairs: [string, number[]][] = missingDocuments.map(\n        (document, i) => [document, missingVectors[i]]\n      );\n      await this.documentEmbeddingStore.mset(keyValuePairs);\n      for (let i = 0; i < missingIndicies.length; i += 1) {\n        vectors[missingIndicies[i]] = missingVectors[i];\n      }\n    }\n    return vectors as number[][];\n  }\n\n  /**\n   * Create a new CacheBackedEmbeddings instance from another embeddings instance\n   * and a storage instance.\n   * @param underlyingEmbeddings Embeddings used to populate the cache for new documents.\n   * @param documentEmbeddingStore Stores raw document embedding values. Keys are hashes of the document content.\n   * @param options.namespace Optional namespace for store keys.\n   * @returns A new CacheBackedEmbeddings instance.\n   */\n  static fromBytesStore(\n    underlyingEmbeddings: EmbeddingsInterface,\n    documentEmbeddingStore: BaseStore<string, Uint8Array>,\n    options?: {\n      namespace?: string;\n    }\n  ) {\n    const encoder = new TextEncoder();\n    const decoder = new TextDecoder();\n    const encoderBackedStore = new EncoderBackedStore<\n      string,\n      number[],\n      Uint8Array\n    >({\n      store: documentEmbeddingStore,\n      keyEncoder: (key) => (options?.namespace ?? \"\") + sha256(key),\n      valueSerializer: (value) => encoder.encode(JSON.stringify(value)),\n      valueDeserializer: (serializedValue) =>\n        JSON.parse(decoder.decode(serializedValue)),\n    });\n    return new this({\n      underlyingEmbeddings,\n      documentEmbeddingStore: encoderBackedStore,\n    });\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkEA,IAAa,wBAAb,cAA2CA,2BAAAA,WAAW;CACpD;CAEA;CAEA,YAAY,QAAqC;AAC/C,QAAM,OAAO;AACb,OAAK,uBAAuB,OAAO;AACnC,OAAK,yBAAyB,OAAO;;;;;;;;;;;;;;;;;CAkBvC,MAAM,WAAW,UAAqC;AACpD,SAAO,KAAK,qBAAqB,WAAW,SAAS;;;;;;;;;;;;CAavD,MAAM,eAAe,WAA0C;EAC7D,MAAM,UAAU,MAAM,KAAK,uBAAuB,KAAK,UAAU;EACjE,MAAM,kBAAkB,EAAE;EAC1B,MAAM,mBAAmB,EAAE;AAC3B,OAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK,EACvC,KAAI,QAAQ,OAAO,KAAA,GAAW;AAC5B,mBAAgB,KAAK,EAAE;AACvB,oBAAiB,KAAK,UAAU,GAAG;;AAGvC,MAAI,iBAAiB,QAAQ;GAC3B,MAAM,iBACJ,MAAM,KAAK,qBAAqB,eAAe,iBAAiB;GAClE,MAAM,gBAAsC,iBAAiB,KAC1D,UAAU,MAAM,CAAC,UAAU,eAAe,GAAG,CAC/C;AACD,SAAM,KAAK,uBAAuB,KAAK,cAAc;AACrD,QAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,QAAQ,KAAK,EAC/C,SAAQ,gBAAgB,MAAM,eAAe;;AAGjD,SAAO;;;;;;;;;;CAWT,OAAO,eACL,sBACA,wBACA,SAGA;EACA,MAAM,UAAU,IAAI,aAAa;EACjC,MAAM,UAAU,IAAI,aAAa;EACjC,MAAM,qBAAqB,IAAIC,+BAAAA,mBAI7B;GACA,OAAO;GACP,aAAa,SAAS,SAAS,aAAa,OAAA,GAAA,2BAAA,QAAa,IAAI;GAC7D,kBAAkB,UAAU,QAAQ,OAAO,KAAK,UAAU,MAAM,CAAC;GACjE,oBAAoB,oBAClB,KAAK,MAAM,QAAQ,OAAO,gBAAgB,CAAC;GAC9C,CAAC;AACF,SAAO,IAAI,KAAK;GACd;GACA,wBAAwB;GACzB,CAAC"}