import { TextLoader } from "./text.js"; /** * Loads a CSV file into a list of documents. * Each document represents one row of the CSV file. * * When `column` is not specified, each row is converted into a key/value pair * with each key/value pair outputted to a new line in the document's pageContent. * * @example * // CSV file: * // id,html * // 1,Corruption discovered at the core of the Banking Clan! * // 2,Corruption discovered at the core of the Banking Clan! * * const loader = new CSVLoader("path/to/file.csv"); * const docs = await loader.load(); * * // docs[0].pageContent: * // id: 1 * // html: Corruption discovered at the core of the Banking Clan! * * When `column` is specified, one document is created for each row, and the * value of the specified column is used as the document's pageContent. * * @example * // CSV file: * // id,html * // 1,Corruption discovered at the core of the Banking Clan! * // 2,Corruption discovered at the core of the Banking Clan! * * const loader = new CSVLoader("path/to/file.csv", "html"); * const docs = await loader.load(); * * // docs[0].pageContent: * // Corruption discovered at the core of the Banking Clan! */ type CSVLoaderOptions = { column?: string; separator?: string; }; export class CSVLoader extends TextLoader { protected options: CSVLoaderOptions = {}; constructor( filePathOrBlob: string | Blob, options?: CSVLoaderOptions | string ) { super(filePathOrBlob); if (typeof options === "string") { this.options = { column: options }; } else { this.options = options ?? this.options; } } protected async parse(raw: string): Promise { const { column, separator = "," } = this.options; const { dsvFormat } = await CSVLoaderImports(); const psv = dsvFormat(separator); const parsed = psv.parse(raw.trim()); if (column !== undefined) { if (!parsed.columns.includes(column)) { throw new Error(`Column ${column} not found in CSV file.`); } // Note TextLoader will raise an exception if the value is null. // eslint-disable-next-line @typescript-eslint/no-non-null-assertion return parsed.map((row) => row[column]!); } return parsed.map((row) => Object.keys(row) .map((key) => `${key.trim()}: ${row[key]?.trim()}`) .join("\n") ); } } async function CSVLoaderImports() { try { const { dsvFormat } = await import("d3-dsv"); return { dsvFormat }; } catch (e) { console.error(e); throw new Error( "Please install d3-dsv as a dependency with, e.g. `yarn add d3-dsv@2`" ); } }