import { similarity as ml_distance_similarity } from "ml-distance"; import { VectorStore } from "./base.js"; import { Embeddings } from "../embeddings/base.js"; import { Document } from "../document.js"; interface MemoryVector { content: string; embedding: number[]; // eslint-disable-next-line @typescript-eslint/no-explicit-any metadata: Record; } export interface MemoryVectorStoreArgs { similarity?: typeof ml_distance_similarity.cosine; } export class MemoryVectorStore extends VectorStore { declare FilterType: (doc: Document) => boolean; memoryVectors: MemoryVector[] = []; similarity: typeof ml_distance_similarity.cosine; _vectorstoreType(): string { return "memory"; } constructor( embeddings: Embeddings, { similarity, ...rest }: MemoryVectorStoreArgs = {} ) { super(embeddings, rest); this.similarity = similarity ?? ml_distance_similarity.cosine; } async addDocuments(documents: Document[]): Promise { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors( await this.embeddings.embedDocuments(texts), documents ); } async addVectors(vectors: number[][], documents: Document[]): Promise { const memoryVectors = vectors.map((embedding, idx) => ({ content: documents[idx].pageContent, embedding, metadata: documents[idx].metadata, })); this.memoryVectors = this.memoryVectors.concat(memoryVectors); } async similaritySearchVectorWithScore( query: number[], k: number, filter?: this["FilterType"] ): Promise<[Document, number][]> { const filterFunction = (memoryVector: MemoryVector) => { if (!filter) { return true; } const doc = new Document({ metadata: memoryVector.metadata, pageContent: memoryVector.content, }); return filter(doc); }; const filteredMemoryVectors = this.memoryVectors.filter(filterFunction); const searches = filteredMemoryVectors .map((vector, index) => ({ similarity: this.similarity(query, vector.embedding), index, })) .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) .slice(0, k); const result: [Document, number][] = searches.map((search) => [ new Document({ metadata: filteredMemoryVectors[search.index].metadata, pageContent: filteredMemoryVectors[search.index].content, }), search.similarity, ]); return result; } static async fromTexts( texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig?: MemoryVectorStoreArgs ): Promise { const docs: Document[] = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = new Document({ pageContent: texts[i], metadata, }); docs.push(newDoc); } return MemoryVectorStore.fromDocuments(docs, embeddings, dbConfig); } static async fromDocuments( docs: Document[], embeddings: Embeddings, dbConfig?: MemoryVectorStoreArgs ): Promise { const instance = new this(embeddings, dbConfig); await instance.addDocuments(docs); return instance; } static async fromExistingIndex( embeddings: Embeddings, dbConfig?: MemoryVectorStoreArgs ): Promise { const instance = new this(embeddings, dbConfig); return instance; } }