{"version":3,"sources":["../../../src/vectorsearch/vector_search/bigquery.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport type { BigQuery, QueryRowsResponse } from '@google-cloud/bigquery';\nimport { z } from 'genkit';\nimport { logger } from 'genkit/logging';\nimport { Document, DocumentDataSchema } from 'genkit/retriever';\nimport type { DocumentIndexer, DocumentRetriever, Neighbor } from './types';\n\n/**\n * Creates a BigQuery Document Retriever.\n *\n * This function returns a DocumentRetriever function that retrieves documents\n * from a BigQuery table based on the provided neighbors.\n *\n * @param {BigQuery} bq - The BigQuery instance.\n * @param {string} tableId - The ID of the BigQuery table.\n * @param {string} datasetId - The ID of the BigQuery dataset.\n * @returns {DocumentRetriever} - The DocumentRetriever function.\n */\nexport const getBigQueryDocumentRetriever = (\n  bq: BigQuery,\n  tableId: string,\n  datasetId: string\n): DocumentRetriever => {\n  const bigQueryRetriever: DocumentRetriever = async (\n    neighbors: Neighbor[]\n  ): Promise<Document[]> => {\n    const ids: string[] = neighbors\n      .map((neighbor) => neighbor.datapoint?.datapointId)\n      .filter(Boolean) as string[];\n\n    const query = `\n      SELECT * FROM \\`${datasetId}.${tableId}\\`\n      WHERE id IN UNNEST(@ids)\n    `;\n\n    const options = {\n      query,\n      params: { ids },\n    };\n\n    let rows: QueryRowsResponse[0];\n\n    try {\n      [rows] = await bq.query(options);\n    } catch (queryError) {\n      logger.error('Failed to execute BigQuery query:', queryError);\n      return [];\n    }\n\n    const documents: Document[] = [];\n\n    for (const row of rows) {\n      try {\n        const docData: { content: any; metadata?: any } = {\n          content: JSON.parse(row.content),\n        };\n\n        if (row.metadata) {\n          docData.metadata = JSON.parse(row.metadata);\n        }\n\n        const parsedDocData = DocumentDataSchema.parse(docData);\n        documents.push(new Document(parsedDocData));\n      } catch (error) {\n        const id = row.id;\n        const errorPrefix = `Failed to parse document data for document with ID ${id}:`;\n\n        if (error instanceof z.ZodError || error instanceof Error) {\n          logger.warn(`${errorPrefix} ${error.message}`);\n        } else {\n          logger.warn(errorPrefix);\n        }\n      }\n    }\n\n    return documents;\n  };\n\n  return bigQueryRetriever;\n};\n\n/**\n * Creates a BigQuery Document Indexer.\n *\n * This function returns a DocumentIndexer function that indexes documents\n * into a BigQuery table. Note this indexer does not handle duplicate\n * documents.\n *\n * @param {BigQuery} bq - The BigQuery instance.\n * @param {string} tableId - The ID of the BigQuery table.\n * @param {string} datasetId - The ID of the BigQuery dataset.\n * @returns {DocumentIndexer} - The DocumentIndexer function.\n */\nexport const getBigQueryDocumentIndexer = (\n  bq: BigQuery,\n  tableId: string,\n  datasetId: string\n): DocumentIndexer => {\n  const bigQueryIndexer: DocumentIndexer = async (\n    docs: Document[]\n  ): Promise<string[]> => {\n    const ids: string[] = [];\n    const rows = docs.map((doc) => {\n      const id = Math.random().toString(36).substring(7);\n      ids.push(id);\n      return {\n        id,\n        content: JSON.stringify(doc.content),\n        metadata: JSON.stringify(doc.metadata),\n      };\n    });\n    await bq.dataset(datasetId).table(tableId).insert(rows);\n    return ids;\n  };\n  return bigQueryIndexer;\n};\n"],"mappings":"AAiBA,SAAS,SAAS;AAClB,SAAS,cAAc;AACvB,SAAS,UAAU,0BAA0B;AActC,MAAM,+BAA+B,CAC1C,IACA,SACA,cACsB;AACtB,QAAM,oBAAuC,OAC3C,cACwB;AACxB,UAAM,MAAgB,UACnB,IAAI,CAAC,aAAa,SAAS,WAAW,WAAW,EACjD,OAAO,OAAO;AAEjB,UAAM,QAAQ;AAAA,wBACM,SAAS,IAAI,OAAO;AAAA;AAAA;AAIxC,UAAM,UAAU;AAAA,MACd;AAAA,MACA,QAAQ,EAAE,IAAI;AAAA,IAChB;AAEA,QAAI;AAEJ,QAAI;AACF,OAAC,IAAI,IAAI,MAAM,GAAG,MAAM,OAAO;AAAA,IACjC,SAAS,YAAY;AACnB,aAAO,MAAM,qCAAqC,UAAU;AAC5D,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,YAAwB,CAAC;AAE/B,eAAW,OAAO,MAAM;AACtB,UAAI;AACF,cAAM,UAA4C;AAAA,UAChD,SAAS,KAAK,MAAM,IAAI,OAAO;AAAA,QACjC;AAEA,YAAI,IAAI,UAAU;AAChB,kBAAQ,WAAW,KAAK,MAAM,IAAI,QAAQ;AAAA,QAC5C;AAEA,cAAM,gBAAgB,mBAAmB,MAAM,OAAO;AACtD,kBAAU,KAAK,IAAI,SAAS,aAAa,CAAC;AAAA,MAC5C,SAAS,OAAO;AACd,cAAM,KAAK,IAAI;AACf,cAAM,cAAc,sDAAsD,EAAE;AAE5E,YAAI,iBAAiB,EAAE,YAAY,iBAAiB,OAAO;AACzD,iBAAO,KAAK,GAAG,WAAW,IAAI,MAAM,OAAO,EAAE;AAAA,QAC/C,OAAO;AACL,iBAAO,KAAK,WAAW;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAcO,MAAM,6BAA6B,CACxC,IACA,SACA,cACoB;AACpB,QAAM,kBAAmC,OACvC,SACsB;AACtB,UAAM,MAAgB,CAAC;AACvB,UAAM,OAAO,KAAK,IAAI,CAAC,QAAQ;AAC7B,YAAM,KAAK,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,UAAU,CAAC;AACjD,UAAI,KAAK,EAAE;AACX,aAAO;AAAA,QACL;AAAA,QACA,SAAS,KAAK,UAAU,IAAI,OAAO;AAAA,QACnC,UAAU,KAAK,UAAU,IAAI,QAAQ;AAAA,MACvC;AAAA,IACF,CAAC;AACD,UAAM,GAAG,QAAQ,SAAS,EAAE,MAAM,OAAO,EAAE,OAAO,IAAI;AACtD,WAAO;AAAA,EACT;AACA,SAAO;AACT;","names":[]}