import assert from 'assert'; import { createHash } from 'crypto'; import { pick } from 'lodash'; import { RetailerKind } from '@/types'; import logger from '@/logger'; export type ValidIdentifier = string; /** * Requires at least one identifier. */ export type ValidIdentifiers = [ValidIdentifier, ...ValidIdentifier[]]; /** * HACK: Quick, fairly unique, but short id that makes it so we can store many * identifiers without running out of storage. */ const hash = (p: string): string => { return createHash('sha256').update(p).digest('base64').slice(0, 10); }; const makeValidUrl = (url: string) => { if (/^http/.test(url)) { return url; } else { return 'https://' + url; } }; export const getProductIdentifiersFromData = ({ url, retailer, }: { url: string; retailer: string | RetailerKind; }): ValidIdentifiers => { const { pathname, searchParams, search, hostname } = new URL( makeValidUrl(url) ); searchParams.sort(); const params = searchParams ? Object.fromEntries(searchParams) : {}; const domain = hostname.replace(/^[^.]+\./g, ''); switch (retailer) { case RetailerKind.AMAZON: { const id = pathname.match(/dp\/(\w+)/)?.[1]; if (!id) { throw new Error(`Could not get identifier from amazon url: ${url}`); } return [`amazonid:${id.toUpperCase().trim()}`]; } case RetailerKind.WORLD_MARKET: { const productSlug = pathname .match(/^\/product\/([A-Za-z+-]+)/)?.[1] ?.toLowerCase(); if (productSlug) { return [`wm:${productSlug}`]; } break; } case RetailerKind.WAYFAIR: case RetailerKind.PERIGOLD: case RetailerKind.JOSS_AND_MAIN: case RetailerKind.BIRCH_LANE: case RetailerKind.ALL_MODERN: { /** * Use the last id from the url: * @example * https://www.wayfair.com/furniture/pdp/mercury-row-osteen-40-h-x-36-w-steel-etagere-bookcase-w005504125.html * @returns `w005504125` */ const skuPath = pathname .match(/([A-Za-z]+\d+).html$/)?.[1] ?.toLowerCase(); const skuId = `${retailer.toLowerCase().slice(0, 10)}-sku:${skuPath}`; return [skuId]; } case RetailerKind.LULU_AND_GEORGIA: { const productSlug = pathname .match(/^\/products\/([A-Za-z-]+)/)?.[1] ?.toLowerCase(); if (productSlug) { return [`lag:${productSlug}`]; } break; } case RetailerKind.PIER1_IMPORTS: { if (params.variant) { const pattern = pathname.match(/\/products\/[^?]+/); if (pattern && pattern[0]) { return [pattern[0]]; } } break; } case RetailerKind.CRATE_AND_BARREL: { const pattern = pathname.match(/\/([0-9A-Za-z-]+)/); if (pattern && pattern[0]) { return [`cb:${pattern[0]}`]; } break; } case RetailerKind.URBAN_OUTFITTERS: { const pattern = pathname.match(/\/shop\/([^?]+)$/); if (pattern && pattern[0]) { return [`uo:${pattern[0]}`]; } break; } case RetailerKind.WEST_ELM: { const productSlug = pathname.match(/^\/products\/([0-9A-Za-z-]+)/)?.[1]; if (productSlug) { return [`westelm:${productSlug}`]; } break; } case RetailerKind.POTTERY_BARN: { const productSlug = pathname.match(/^\/products\/([0-9A-Za-z-]+)/)?.[1]; if (productSlug) { return [`pb:${productSlug}`]; } break; } default: { if (domain.startsWith('homedepot.')) { const productId = pathname.match(/(\d+)$/)?.[0]; if (productId) { return [`homedepot:${productId}`]; } } if (domain.startsWith('walmart.')) { const productId = pathname.match(/(\d+)$/)?.[0]; if (productId) { return [`walmart:${productId}`]; } } const hashedPathname = [...searchParams.values()].length > 0 ? hash(`${pathname}?${searchParams.toString()}`) : hash(pathname); logger.warn( `Could not get product identifiers for ${retailer} product data, so using default (retailer + hash(pathname[?[...queryParams]])), ${retailer}:${hashedPathname} for pathname ${pathname} and query params ${search}.` ); return [`${retailer}:${hashedPathname}`]; } } const hashedPathname = [...searchParams.values()].length > 0 ? hash(`${pathname}?${searchParams.toString()}`) : hash(pathname); logger.warn( `Could not get product identifiers for ${retailer} product data, so using default (retailer + hash(pathname[?[...queryParams]])), ${retailer}:${hashedPathname} for pathname ${pathname} and query params ${search}.` ); return [`${retailer}:${hashedPathname}`]; }; export const getSearchObjectFromParams = ( searchParams: URLSearchParams ): Record => { // sort for stability searchParams.sort(); return [...searchParams.entries()].reduce((prev, [key, value]) => { return { ...prev, ...(value && value.length > 0 ? { [encodeURIComponent(key)]: decodeURIComponent(value), } : {}), }; }, {}); }; // To make sure the order is the same as the backend const queryObjToVariantId = (queryObj: Record): string => { return Object.keys(queryObj) .sort() .map((key) => key + ':' + encodeURIComponent(queryObj[key])) .join(','); }; export const getVariantIdentifiersFromData = ({ url, retailer, }: { url: string; retailer: RetailerKind; }): string[] => { const { pathname, searchParams, hostname } = new URL(makeValidUrl(url)); const domain = hostname.replace(/^[^.]+\./g, ''); searchParams.sort(); const params = searchParams ? Object.fromEntries(searchParams) : {}; switch (retailer) { case RetailerKind.AMAZON: { const id = pathname.match(/dp\/(\w+)/)?.[1]; if (!id) { throw new Error(`Could not get identifier from amazon url: ${url}`); } return [`amazonid:${id.toUpperCase().trim()}`]; } case RetailerKind.URBAN_OUTFITTERS: { const params = getSearchObjectFromParams(searchParams); if (params.color && params.type && params.size) { return [queryObjToVariantId(pick(params, ['color', 'size', 'type']))]; } break; } case RetailerKind.DOT_AND_BO: { const params = getSearchObjectFromParams(searchParams); if (params.color || params.primarycolor) { return [queryObjToVariantId(pick(params, ['color', 'primarycolor']))]; } break; } case RetailerKind.WAYFAIR: case RetailerKind.PERIGOLD: case RetailerKind.JOSS_AND_MAIN: case RetailerKind.BIRCH_LANE: case RetailerKind.ALL_MODERN: { /** * Use the last id from the url: * @example * https://www.wayfair.com/furniture/pdp/mercury-row-osteen-40-h-x-36-w-steel-etagere-bookcase-w005504125.html * @returns `w005504125` */ const piid = params.piid || params['PiID%5B%5D']; if (piid) { return [`piid:${piid.split(',').sort().join(',')}`]; } break; } case RetailerKind.BURKE_DECOR: case RetailerKind.LULU_AND_GEORGIA: { if (params.variant) { return [`variant:${params.variant}`]; } break; } case RetailerKind.PIER1_IMPORTS: { if (params.variant) { return [`variant:${params.variant}`]; } break; } case RetailerKind.CRATE_AND_BARREL: { const pattern = pathname.match(/\/[0-9A-Za-z-]+\/([0-9A-Za-z-]+)/); if (pattern && pattern[0]) { return [`cb-variant:${pattern[0]}`]; } break; } case RetailerKind.POTTERY_BARN: { if (params.sku) { return [`pbsku:${params.sku}`]; } break; } default: if (domain.startsWith('bedbathandbeyond.') && params.skuId) { return [`bedbathandbeyond-skuId:${params.skuId}`]; } if (domain.startsWith('worldmarket.') && params.option) { return [`worldmarket-option:${params.option}`]; } if (domain.startsWith('houzz.')) { const productId = pathname.match(/~(\d+)$/)?.[0]; if (productId) { return [`houzz:${productId}`]; } } if (params.variant) { return [`variant:${params.variant}`]; } if (params.sku) { return [`sku:${params.sku}`]; } if (params.skuId) { return [`sku:${params.skuId}`]; } } logger.warn(`Could not get identifiers for ${retailer} product data.`, { params, pathname, }); return []; }; export const isValidIdentifier = ( identifier: string ): identifier is ValidIdentifier => { return /^[\w \-,:%+/]+$/g.test(identifier); }; export const areValidIdentifiers = ( identifiers: string[] ): identifiers is ValidIdentifiers => { return ( identifiers.length > 0 && identifiers.every((id) => isValidIdentifier(id)) ); }; export function assertIsValidIdentifier( identifier: string ): asserts identifier is ValidIdentifier { assert( isValidIdentifier(identifier), `${identifier} is not a valid identifier` ); } export const getIdentifiersFromData = ({ url, retailer, }: { url: string; retailer: RetailerKind; }): { product: ValidIdentifiers; variant: string[] } => { return { product: getProductIdentifiersFromData({ url, retailer }), variant: getVariantIdentifiersFromData({ url, retailer }), }; }; export function assertAreValidIdentifiers( identifiers: string[] ): asserts identifiers is ValidIdentifiers { assert(identifiers.length > 0, 'Must provide at least one identifier'); identifiers.forEach((id) => assertIsValidIdentifier(id)); }