import { URL } from 'url'; import { config as setupEnv } from 'dotenv-flow'; import connect from '@/config/db'; import { Page } from '@/entities'; import logger from '@/logger'; import { makeValidURL, queryObjToQueryString, transformURL, } from '@/repositories/PageRepository/utils'; setupEnv(); const main = async () => { await connect(); let unupdatedPages = new Array(); let updatedPageCount = 0; const updatePages = async () => { // uncomment to ensure too many pages don't get indexed // if (indexedPageCount > 10000) { // stream.pause(); // stream.destroy(); // throw new Error('too many pages indexed'); // } console.log( 'unupdatedPages:', unupdatedPages.map(({ id }) => id) ); await Page.save(unupdatedPages, { reload: false }); updatedPageCount = updatedPageCount + unupdatedPages.length; logger.info(`updated ${updatedPageCount} pages`); unupdatedPages = []; }; const stream = await Page.createQueryBuilder('Page').stream(); await new Promise((resolve, reject) => { stream.on('data', (pgPage: Record) => { const page = Page.create( Object.entries(pgPage).reduce((prev, [key, value]) => { return { ...prev, [key.startsWith('Page_') ? key.slice(5) : key]: value, }; }, {}) ); if (!page.urlOriginal) { return; } const { hostname, pathname, search, searchParams } = new URL( makeValidURL(transformURL(page.urlOriginal)) ); const updatedUrl = `${hostname}${pathname}`; const urlQuery = search ? queryObjToQueryString( Array.from(searchParams.entries()).reduce((prev, [key, value]) => { return { ...prev, [key]: value }; }, {}) ) : undefined; if (updatedUrl !== page.url || page.urlQuery !== urlQuery) { page.url = updatedUrl; page.urlQuery = urlQuery; unupdatedPages.push(page); } if (unupdatedPages.length >= 100) { stream.pause(); updatePages() .then(() => { stream.resume(); }) .catch((error) => { stream.destroy(error); reject(error); }); } }); stream.on('end', async () => { resolve(undefined); if (unupdatedPages.length >= 0) { try { updatePages().then(resolve); } catch (error) { stream.destroy(); reject(error); throw error; } } }); }); }; console.time('main'); main() .then(() => { console.timeEnd('main'); }) .catch((error) => { console.error(error); process.exit(1); });