#!/usr/bin/env ts-node import fs from 'fs'; import yargs from 'yargs'; import { config as setupEnv } from 'dotenv-flow'; import axios from 'axios'; import papa, { ParseStepResult } from 'papaparse'; import { isURL } from 'class-validator'; import logger from '@/logger'; setupEnv({ silent: true }); const makeValidUrl = (url: string) => { if (/^http/.test(url)) { return url; } else { return 'https://' + url; } }; const scriptName = 'Ingest data from csv'; const args = yargs(process.argv.slice(2)) .options({ f: { type: 'string', alias: 'file', demand: true, description: 'The path of the csv', }, u: { type: 'string', alias: 'url', demand: false, description: 'The base url of api endpoint', }, }) .parse(); const getRetailerUrls = (data: Record) => { const result = []; if (data?.['url']) { result.push(makeValidUrl(data['url'])); } if (data?.['url1']) { result.push(makeValidUrl(data['url1'])); } if (data?.['url2']) { result.push(makeValidUrl(data['url2'])); } if (data?.['url3']) { result.push(makeValidUrl(data['url3'])); } if (data?.['url4']) { result.push(makeValidUrl(data['url4'])); } if (data?.['url5']) { result.push(makeValidUrl(data['url5'])); } if (data?.['url6']) { result.push(makeValidUrl(data['url6'])); } if (data?.['AllModern Link']) { result.push(makeValidUrl(data['AllModern Link'])); } if (data?.['AM url']) { result.push(makeValidUrl(data['AM url'])); } if (data?.['BL url']) { result.push(makeValidUrl(data['BL url'])); } if (data?.['CB url']) { result.push(makeValidUrl(data['CB url'])); } if (data?.['JM url']) { result.push(makeValidUrl(data['JM url'])); } if (data?.['KC url']) { result.push(makeValidUrl(data['KC url'])); } if (data?.['LG url']) { result.push(makeValidUrl(data['LG url'])); } if (data?.['O url']) { result.push(makeValidUrl(data['O url'])); } if (data?.['PB url']) { result.push(makeValidUrl(data['PB url'])); } if (data?.['W url']) { result.push(makeValidUrl(data['W url'])); } if (data?.['WE url']) { result.push(makeValidUrl(data['WE url'])); } if (data?.['WM url']) { result.push(makeValidUrl(data['WM url'])); } if (data?.['UO url']) { result.push(makeValidUrl(data['UO url'])); } if (data?.['Amazon Link']) { result.push(makeValidUrl(data['Amazon Link'])); } if (data?.['amazon']) { result.push(makeValidUrl(data['amazon'])); } if (data?.['AMZN url']) { result.push(makeValidUrl(data['AMZN url'])); } if (data?.['Alt Premier Link']) { result.push(makeValidUrl(data['Alt Premier Link'])); } if (data?.['Alt url (P)']) { result.push(makeValidUrl(data['Alt url (P)'])); } if (data?.['Alt url (T)']) { result.push(makeValidUrl(data['Alt url (T)'])); } if (data?.['Alt url']) { result.push(makeValidUrl(data['Alt url'])); } return result.filter((url) => typeof url === 'string' && isURL(url)); }; export type RetailerProduct = { googleShoppingUrl: string; retailerUrls: string[]; ingestionId?: string; }; const ingest = async ( data: RetailerProduct[], baseUrl = 'http://localhost:8000' ) => { logger.info(`parsing complete ${data.length} records. Ingesting starting.`); try { await axios.post(`${baseUrl}/v3/aggregate-products/batch`, data, { headers: { 'Content-Type': 'application/json', }, auth: { username: process.env.ROOT_API_KEY_CLIENT_ID, password: process.env.ROOT_API_KEY_CLIENT_SECRET, }, }); logger.info('Success'); } catch (error) { logger.error(error); } }; const main = async () => { const { f: path, u: url } = args; const file = fs.createReadStream(path); let data: RetailerProduct[] = []; papa.parse(file, { worker: true, header: true, chunkSize: 100, step: async (result: ParseStepResult>, parser) => { const googleShoppingUrls: string[] = [ result.data['Google Shopping ID'], result.data['GS ID'], result.data['GS 2 ID'], result.data['gs1'], result.data['gs2'], result.data['gs3'], ] .map(makeValidUrl) .filter((url) => !!url && isURL(url) && typeof url === 'string'); const googleShoppingUrl = googleShoppingUrls[0]; if (!googleShoppingUrl) { logger.warn( `Could not find a google shopping url on line ${result.meta.cursor}` ); return; } const ingestionId = result.data['GID']?.toLowerCase() || result.data['gid']?.toLowerCase(); data.push({ googleShoppingUrl: googleShoppingUrl, retailerUrls: getRetailerUrls(result.data), ingestionId, }); googleShoppingUrls.slice(1).forEach((url) => { data.push({ googleShoppingUrl: url, ingestionId, retailerUrls: [], }); }); if (data.length > 100) { parser.pause(); await ingest(data, url); data = []; parser.resume(); } }, complete: async () => { await ingest(data, url); }, error: (error) => { logger.error(error); }, }); }; console.time(scriptName); main() .then(() => { console.timeEnd(scriptName); }) .catch((error) => { process.exit(1); });