{"version":3,"file":"parent_document.cjs","names":["MultiVectorRetriever","uuid","Document"],"sources":["../../src/retrievers/parent_document.ts"],"sourcesContent":["import * as uuid from \"@langchain/core/utils/uuid\";\n\nimport {\n  type VectorStoreInterface,\n  type VectorStoreRetrieverInterface,\n} from \"@langchain/core/vectorstores\";\nimport { Document } from \"@langchain/core/documents\";\nimport {\n  TextSplitter,\n  TextSplitterChunkHeaderOptions,\n} from \"@langchain/textsplitters\";\nimport type { BaseDocumentCompressor } from \"./document_compressors/index.js\";\nimport {\n  MultiVectorRetriever,\n  type MultiVectorRetrieverInput,\n} from \"./multi_vector.js\";\n\n// oxlint-disable-next-line @typescript-eslint/no-explicit-any\nexport type SubDocs = Document<Record<string, any>>[];\n\n/**\n * Interface for the fields required to initialize a\n * ParentDocumentRetriever instance.\n */\nexport type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & {\n  childSplitter: TextSplitter;\n  parentSplitter?: TextSplitter;\n  /**\n   * A custom retriever to use when retrieving instead of\n   * the `.similaritySearch` method of the vectorstore.\n   */\n  childDocumentRetriever?: VectorStoreRetrieverInterface<VectorStoreInterface>;\n  documentCompressor?: BaseDocumentCompressor | undefined;\n  documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs;\n};\n\n/**\n * A type of document retriever that splits input documents into smaller chunks\n * while separately storing and preserving the original documents.\n * The small chunks are embedded, then on retrieval, the original\n * \"parent\" documents are retrieved.\n *\n * This strikes a balance between better targeted retrieval with small documents\n * and the more context-rich larger documents.\n * @example\n * ```typescript\n * const retriever = new ParentDocumentRetriever({\n *   vectorstore: new MemoryVectorStore(new OpenAIEmbeddings()),\n *   byteStore: new InMemoryStore<Uint8Array>(),\n *   parentSplitter: new RecursiveCharacterTextSplitter({\n *     chunkOverlap: 0,\n *     chunkSize: 500,\n *   }),\n *   childSplitter: new RecursiveCharacterTextSplitter({\n *     chunkOverlap: 0,\n *     chunkSize: 50,\n *   }),\n *   childK: 20,\n *   parentK: 5,\n * });\n *\n * const parentDocuments = await getDocuments();\n * await retriever.addDocuments(parentDocuments);\n * const retrievedDocs = await retriever.invoke(\"justice breyer\");\n * ```\n */\nexport class ParentDocumentRetriever extends MultiVectorRetriever {\n  static lc_name() {\n    return \"ParentDocumentRetriever\";\n  }\n\n  lc_namespace = [\"langchain\", \"retrievers\", \"parent_document\"];\n\n  vectorstore: VectorStoreInterface;\n\n  protected childSplitter: TextSplitter;\n\n  protected parentSplitter?: TextSplitter;\n\n  protected idKey = \"doc_id\";\n\n  protected childK?: number;\n\n  protected parentK?: number;\n\n  childDocumentRetriever:\n    | VectorStoreRetrieverInterface<VectorStoreInterface>\n    | undefined;\n\n  documentCompressor: BaseDocumentCompressor | undefined;\n\n  documentCompressorFilteringFn?: ParentDocumentRetrieverFields[\"documentCompressorFilteringFn\"];\n\n  constructor(fields: ParentDocumentRetrieverFields) {\n    super(fields);\n    this.vectorstore = fields.vectorstore;\n    this.childSplitter = fields.childSplitter;\n    this.parentSplitter = fields.parentSplitter;\n    this.idKey = fields.idKey ?? this.idKey;\n    this.childK = fields.childK;\n    this.parentK = fields.parentK;\n    this.childDocumentRetriever = fields.childDocumentRetriever;\n    this.documentCompressor = fields.documentCompressor;\n    this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;\n  }\n\n  async _getRelevantDocuments(query: string): Promise<Document[]> {\n    let subDocs: SubDocs = [];\n    if (this.childDocumentRetriever) {\n      subDocs = await this.childDocumentRetriever.invoke(query);\n    } else {\n      subDocs = await this.vectorstore.similaritySearch(query, this.childK);\n    }\n\n    if (this.documentCompressor && subDocs.length) {\n      subDocs = await this.documentCompressor.compressDocuments(subDocs, query);\n      if (this.documentCompressorFilteringFn) {\n        subDocs = this.documentCompressorFilteringFn(subDocs);\n      }\n    }\n\n    // Maintain order\n    const parentDocIds: string[] = [];\n    for (const doc of subDocs) {\n      if (!parentDocIds.includes(doc.metadata[this.idKey])) {\n        parentDocIds.push(doc.metadata[this.idKey]);\n      }\n    }\n    const parentDocs: Document[] = [];\n    const storedParentDocs = await this.docstore.mget(parentDocIds);\n    const retrievedDocs: Document[] = storedParentDocs.filter(\n      (doc?: Document): doc is Document => doc !== undefined\n    );\n    parentDocs.push(...retrievedDocs);\n    return parentDocs.slice(0, this.parentK);\n  }\n\n  async _storeDocuments(\n    parentDoc: Record<string, Document>,\n    childDocs: Document[],\n    addToDocstore: boolean\n  ) {\n    if (this.childDocumentRetriever) {\n      await this.childDocumentRetriever.addDocuments(childDocs);\n    } else {\n      await this.vectorstore.addDocuments(childDocs);\n    }\n    if (addToDocstore) {\n      await this.docstore.mset(Object.entries(parentDoc));\n    }\n  }\n\n  /**\n   * Adds documents to the docstore and vectorstores.\n   * If a retriever is provided, it will be used to add documents instead of the vectorstore.\n   * @param docs The documents to add\n   * @param config.ids Optional list of ids for documents. If provided should be the same\n   *   length as the list of documents. Can provided if parent documents\n   *   are already in the document store and you don't want to re-add\n   *   to the docstore. If not provided, random UUIDs will be used as ids.\n   * @param config.addToDocstore Boolean of whether to add documents to docstore.\n   * This can be false if and only if `ids` are provided. You may want\n   *   to set this to False if the documents are already in the docstore\n   *   and you don't want to re-add them.\n   * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers\n   */\n  async addDocuments(\n    docs: Document[],\n    config?: {\n      ids?: string[];\n      addToDocstore?: boolean;\n      childDocChunkHeaderOptions?: TextSplitterChunkHeaderOptions;\n    }\n  ): Promise<void> {\n    const {\n      ids,\n      addToDocstore = true,\n      childDocChunkHeaderOptions = {},\n    } = config ?? {};\n    const parentDocs = this.parentSplitter\n      ? await this.parentSplitter.splitDocuments(docs)\n      : docs;\n    let parentDocIds;\n    if (ids === undefined) {\n      if (!addToDocstore) {\n        throw new Error(\n          `If ids are not passed in, \"config.addToDocstore\" MUST be true`\n        );\n      }\n      parentDocIds = parentDocs.map((_doc: Document) => uuid.v4());\n    } else {\n      parentDocIds = ids;\n    }\n    if (parentDocs.length !== parentDocIds.length) {\n      throw new Error(\n        `Got uneven list of documents and ids.\\nIf \"ids\" is provided, should be same length as \"documents\".`\n      );\n    }\n    for (let i = 0; i < parentDocs.length; i += 1) {\n      const parentDoc = parentDocs[i];\n      const parentDocId = parentDocIds[i];\n      const subDocs = await this.childSplitter.splitDocuments(\n        [parentDoc],\n        childDocChunkHeaderOptions\n      );\n      const taggedSubDocs = subDocs.map(\n        (subDoc: Document) =>\n          new Document({\n            pageContent: subDoc.pageContent,\n            metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },\n          })\n      );\n      await this._storeDocuments(\n        { [parentDocId]: parentDoc },\n        taggedSubDocs,\n        addToDocstore\n      );\n    }\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkEA,IAAa,0BAAb,cAA6CA,gCAAAA,qBAAqB;CAChE,OAAO,UAAU;AACf,SAAO;;CAGT,eAAe;EAAC;EAAa;EAAc;EAAkB;CAE7D;CAEA;CAEA;CAEA,QAAkB;CAElB;CAEA;CAEA;CAIA;CAEA;CAEA,YAAY,QAAuC;AACjD,QAAM,OAAO;AACb,OAAK,cAAc,OAAO;AAC1B,OAAK,gBAAgB,OAAO;AAC5B,OAAK,iBAAiB,OAAO;AAC7B,OAAK,QAAQ,OAAO,SAAS,KAAK;AAClC,OAAK,SAAS,OAAO;AACrB,OAAK,UAAU,OAAO;AACtB,OAAK,yBAAyB,OAAO;AACrC,OAAK,qBAAqB,OAAO;AACjC,OAAK,gCAAgC,OAAO;;CAG9C,MAAM,sBAAsB,OAAoC;EAC9D,IAAI,UAAmB,EAAE;AACzB,MAAI,KAAK,uBACP,WAAU,MAAM,KAAK,uBAAuB,OAAO,MAAM;MAEzD,WAAU,MAAM,KAAK,YAAY,iBAAiB,OAAO,KAAK,OAAO;AAGvE,MAAI,KAAK,sBAAsB,QAAQ,QAAQ;AAC7C,aAAU,MAAM,KAAK,mBAAmB,kBAAkB,SAAS,MAAM;AACzE,OAAI,KAAK,8BACP,WAAU,KAAK,8BAA8B,QAAQ;;EAKzD,MAAM,eAAyB,EAAE;AACjC,OAAK,MAAM,OAAO,QAChB,KAAI,CAAC,aAAa,SAAS,IAAI,SAAS,KAAK,OAAO,CAClD,cAAa,KAAK,IAAI,SAAS,KAAK,OAAO;EAG/C,MAAM,aAAyB,EAAE;EAEjC,MAAM,iBADmB,MAAM,KAAK,SAAS,KAAK,aAAa,EACZ,QAChD,QAAoC,QAAQ,KAAA,EAC9C;AACD,aAAW,KAAK,GAAG,cAAc;AACjC,SAAO,WAAW,MAAM,GAAG,KAAK,QAAQ;;CAG1C,MAAM,gBACJ,WACA,WACA,eACA;AACA,MAAI,KAAK,uBACP,OAAM,KAAK,uBAAuB,aAAa,UAAU;MAEzD,OAAM,KAAK,YAAY,aAAa,UAAU;AAEhD,MAAI,cACF,OAAM,KAAK,SAAS,KAAK,OAAO,QAAQ,UAAU,CAAC;;;;;;;;;;;;;;;;CAkBvD,MAAM,aACJ,MACA,QAKe;EACf,MAAM,EACJ,KACA,gBAAgB,MAChB,6BAA6B,EAAE,KAC7B,UAAU,EAAE;EAChB,MAAM,aAAa,KAAK,iBACpB,MAAM,KAAK,eAAe,eAAe,KAAK,GAC9C;EACJ,IAAI;AACJ,MAAI,QAAQ,KAAA,GAAW;AACrB,OAAI,CAAC,cACH,OAAM,IAAI,MACR,gEACD;AAEH,kBAAe,WAAW,KAAK,SAAmBC,2BAAK,IAAI,CAAC;QAE5D,gBAAe;AAEjB,MAAI,WAAW,WAAW,aAAa,OACrC,OAAM,IAAI,MACR,qGACD;AAEH,OAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK,GAAG;GAC7C,MAAM,YAAY,WAAW;GAC7B,MAAM,cAAc,aAAa;GAKjC,MAAM,iBAJU,MAAM,KAAK,cAAc,eACvC,CAAC,UAAU,EACX,2BACD,EAC6B,KAC3B,WACC,IAAIC,0BAAAA,SAAS;IACX,aAAa,OAAO;IACpB,UAAU;KAAE,GAAG,OAAO;MAAW,KAAK,QAAQ;KAAa;IAC5D,CAAC,CACL;AACD,SAAM,KAAK,gBACT,GAAG,cAAc,WAAW,EAC5B,eACA,cACD"}