import { Logger } from 'besonders-logger' import { isBefore } from 'date-fns' import { partial, pick } from 'lodash-es' import { isEqual } from 'lodash-es' import stringify from 'safe-stable-stringify' import type { Applog, ApplogForInsert, ApplogValue, DatalogQueryPattern, DatalogQueryResultEntry, ResultContext, SearchContext, ValueOrMatcher, } from './datom-types.ts' const { WARN, LOG, DEBUG, VERBOSE, ERROR } = Logger.setup(Logger.INFO) // eslint-disable-line no-unused-vars export const isoDateStrCompare = (strA: string, strB: string, dir: 'asc' | 'desc' = 'asc') => dir === 'asc' ? strA.localeCompare(strB, 'en-US') : strB.localeCompare(strA, 'en-US') export const objEqualByKeys = (keys: string[], objA: object, objB: object) => { return isEqual(pick(objA, keys), pick(objB, keys)) } /** * Deep equality for an `ApplogValue`. `vl` can now hold JSON objects/arrays, so a bare * `===` would treat structurally-identical objects as different. `isEqual` short-circuits * on referential identity internally, so primitives stay on the cheap path. */ export const valueEq = (a: ApplogValue, b: ApplogValue): boolean => isEqual(a, b) /** Canonical, hashable Map key for an `ApplogValue` (see {@link valueKey}). */ export type ValueKey = string | number | boolean | null /** * Canonical key for using an `ApplogValue` as a Map key. Primitives pass through unchanged * (so primitive-keyed lookups are identity-stable); objects/arrays are stably stringified * (key-sorted) behind a `\0obj:` sentinel so an object value can't collide with a string * value that happens to equal its serialization. */ export const valueKey = (vl: ApplogValue): ValueKey => { switch (typeof vl) { case 'string': case 'number': case 'boolean': return vl default: // null | object | array return vl === null ? null : '\0obj:' + stringify(vl) } } /** Transitive total-order comparator over (ts, cid). Safe for `Array.sort`. */ export const compareApplogsByTs = (logA: Applog, logB: Applog, dir: 'asc' | 'desc' = 'asc') => { const tsCmp = isoDateStrCompare(logA.ts, logB.ts, dir) if (tsCmp !== 0) return tsCmp return dir === 'asc' ? logA.cid.localeCompare(logB.cid) : logB.cid.localeCompare(logA.cid) } export const compareApplogsByEnAt = partial(objEqualByKeys, ['en', 'at']) /** * Pairwise: is `a` strictly later than `b` per (ts, pv-chain, cid)? * * NOT TRANSITIVE. Use only for two-log decisions (e.g. lastWriteWins replacement * check). Do NOT pass to `Array.sort` — it can produce cycles at 3+ chain links * and break the engine's sort. */ export function isLaterByTsAndPv(a: Applog, b: Applog): boolean { const tsCmp = isoDateStrCompare(a.ts, b.ts) if (tsCmp !== 0) return tsCmp > 0 if (a.en === b.en && a.at === b.at) { if (a.pv === b.cid) return true if (b.pv === a.cid) return false } return a.cid.localeCompare(b.cid) > 0 } /** * Sort applogs in chain-aware order (modifies array, also returns for chaining). * * Two-phase: * 1. Transitive `(ts, cid)` `Array.sort` — fast, total-order, common case. * 2. For each contiguous same-ts cluster (rare), chain-stabilize via `pv` * so chain-tail logs land last (asc) / first (desc) within their (en, at) * sub-group. Cross-group order within a cluster stays cid-lex. * * No-tie inputs pay only one linear scan past `Array.sort`. */ export function sortApplogsByTs(appLogArray: Applog[], dir: 'asc' | 'desc' = 'asc') { appLogArray.sort((a, b) => compareApplogsByTs(a, b, dir)) let i = 0 while (i < appLogArray.length) { let j = i + 1 while (j < appLogArray.length && appLogArray[j].ts === appLogArray[i].ts) j++ if (j - i > 1) chainStabilizeCluster(appLogArray, i, j, dir) i = j } return appLogArray } /** * Re-order applogs[from..to-1] (all sharing `ts`) so each (en, at) sub-group * appears in pv-chain order. Cross-group positions in the cluster are preserved * — only logs within the same (en, at) sub-group get reshuffled. */ function chainStabilizeCluster(applogs: Applog[], from: number, to: number, dir: 'asc' | 'desc') { const groups = new Map() for (let k = from; k < to; k++) { const log = applogs[k] const key = log.en + '|' + log.at const existing = groups.get(key) if (existing) existing.push(log) else groups.set(key, [log]) } // All groups singleton — nothing to do (the common case) if (groups.size === to - from) return const orderedByGroup = new Map() for (const [key, logs] of groups) { orderedByGroup.set(key, logs.length === 1 ? logs : topoSortByPv(logs, dir)) } const cursors = new Map() for (let k = from; k < to; k++) { const key = applogs[k].en + '|' + applogs[k].at const cursor = cursors.get(key) ?? 0 applogs[k] = orderedByGroup.get(key)![cursor] cursors.set(key, cursor + 1) } } /** * Topologically order `logs` (all sharing en, at, ts) by pv-chain. * asc → root-first; desc → tail-first. Forks / multi-roots: tie-break by cid. */ function topoSortByPv(logs: Applog[], dir: 'asc' | 'desc'): Applog[] { const cidSet = new Set(logs.map(l => l.cid)) const childrenByPv = new Map() const roots: Applog[] = [] for (const log of logs) { if (log.pv && cidSet.has(log.pv)) { const children = childrenByPv.get(log.pv) if (children) children.push(log) else childrenByPv.set(log.pv, [log]) } else { roots.push(log) } } const lexCmp = (a: Applog, b: Applog) => a.cid.localeCompare(b.cid) roots.sort(lexCmp) for (const children of childrenByPv.values()) children.sort(lexCmp) const result: Applog[] = [] const visited = new Set() const stack = [...roots].reverse() while (stack.length > 0) { const log = stack.pop()! if (visited.has(log.cid)) continue visited.add(log.cid) result.push(log) const children = childrenByPv.get(log.cid) if (children) for (let i = children.length - 1; i >= 0; i--) stack.push(children[i]) } // Defensive: cycle (shouldn't happen with content-addressed pv) — append unvisited if (result.length < logs.length) { for (const log of logs) if (!visited.has(log.cid)) result.push(log) } return dir === 'desc' ? result.reverse() : result } export const isTsBefore = (log: Applog, logToCompare: Applog) => isBefore(new Date(log.ts), new Date(logToCompare.ts)) export const uniqueEnFromAppLogs = (appLogArray: Applog[]) => [...new Set(appLogArray.map(eachLog => eachLog.en))] export const areApplogsEqual = (logA: Applog, logB: Applog) => isEqual(logA, logB) export type RemoveDuplicateAppLogsMode = 'safety' | 'cleanup' const warnMissingRemoveDuplicateMode = () => { WARN(`[removeDuplicateAppLogs] mode not set; pass 'safety' or 'cleanup' for optimal behavior`) } const removeDuplicateAppLogsCleanup = (appLogArray: Applog[]) => { const logMap = new Map() const verboseEnabled = VERBOSE.isEnabled for (const eachLog of appLogArray) { if (!eachLog) { ERROR(`falsy entry in applogs`, appLogArray) throw new Error(`falsy entry in applogs`) } if (!eachLog.cid) { ERROR(`applog with missing CID`, eachLog) throw new Error(`applog with missing CID`) } const key = eachLog.cid const existing = logMap.get(key) if (existing) { if (verboseEnabled) VERBOSE(`Skipping duplicate applog:`, [existing, eachLog]) } else { logMap.set(key, eachLog) } } return Array.from(logMap.values()) } const removeDuplicateAppLogsSafety = (appLogArray: Applog[]) => { const seen = new Set() const verboseEnabled = VERBOSE.isEnabled const existingByCid = verboseEnabled ? new Map() : null let result: Applog[] | null = null let index = 0 for (const eachLog of appLogArray) { if (!eachLog) { ERROR(`falsy entry in applogs`, appLogArray) throw new Error(`falsy entry in applogs`) } if (!eachLog.cid) { ERROR(`applog with missing CID`, eachLog) throw new Error(`applog with missing CID`) } const key = eachLog.cid if (seen.has(key)) { if (!result) { result = appLogArray.slice(0, index) } if (verboseEnabled) VERBOSE(`Skipping duplicate applog:`, [existingByCid?.get(key), eachLog]) } else { seen.add(key) if (existingByCid) existingByCid.set(key, eachLog) if (result) result.push(eachLog) } index++ } return result ?? appLogArray } /** * Deduplicate applogs by CID. * - safety: fast duplicate check; returns original array if no duplicates found. * - cleanup: optimized for merged arrays with likely duplicates. */ export const removeDuplicateAppLogs = (appLogArray: Applog[], mode?: RemoveDuplicateAppLogsMode) => { if (!mode) { warnMissingRemoveDuplicateMode() return removeDuplicateAppLogsCleanup(appLogArray) } return mode === 'safety' ? removeDuplicateAppLogsSafety(appLogArray) : removeDuplicateAppLogsCleanup(appLogArray) } // export const removeDuplicateAndMaybeDeletedAppLogs = (ds: Thread, appLogArray: Applog[], removeDeletedEntities = true) => { // const logMap = new Map() // for (const eachLog of appLogArray) { // if (!removeDeletedEntities || ds.entityIsDeleted(eachLog.en)) // logMap.set(stringify(eachLog), eachLog) // } // return Array.from(logMap.values()) // } export const getHashID = (stringifiable: any, lngth = 8) => cyrb53hash(stringify(stringifiable), 31, lngth) as string export function isVariable(x: any): x is string { return typeof x === 'string' && x.startsWith('?') } export function variableNameWithoutQuestionmark(str: string) { return str.slice(1) } // export function isMatcher(x: any): x is string { // return // } export function isStaticPattern(x: any): x is ApplogValue { if (!['string', 'boolean', 'number', 'function'].includes(typeof x)) WARN(`Unhandled pattern value type:`, typeof x, x) return !isVariable(x) && ['string', 'boolean', 'number'].includes(typeof x) } // export function isIgnorePattern(x: any): boolean { // return x === '_' // } /* * In a pattern from a Query: * - variables that don't have a value in the search context: * - remove from pattern * - add to variableToFill as: { en: 'movieID' } (useful for mapTo) * - variables that have a value set: * - replace placeholder with actual value */ export function resolveOrRemoveVariables(pattern: DatalogQueryPattern, candidate: SearchContext) { let variablesToFill = {} as Partial<{ [key in keyof Applog]: string }> const newPattern: DatalogQueryPattern = {} for (const [patternKey, patternValue] of Object.entries(pattern)) { if (isVariable(patternValue)) { const varName = variableNameWithoutQuestionmark(patternValue) const candidateValue = candidate[varName] if (candidateValue) { newPattern[patternKey] = candidateValue // & not adding it to newPattern } else { variablesToFill[patternKey] = varName } } else { newPattern[patternKey] = patternValue // keep static value } } return [newPattern, variablesToFill] as const } function matchVariable(variable: string, triplePart: ApplogValue, context: SearchContext): SearchContext { if (context.hasOwnProperty(variable)) { // TODO: fix lint error with: if (Object.hasOwnProperty.call(context, variable)) { const bound = context[variable] const match = matchPart(bound, triplePart, context) // if (VERBOSE.isEnabled) VERBOSE('[matchVariable] match?', variable, bound, match) return match } // if (VERBOSE.isEnabled) VERBOSE('[matchVariable] initializing variable', variable, 'to', triplePart) return { ...context, [variable]: triplePart } } export function matchPartStatic(field: keyof Applog, patternPart: ValueOrMatcher, atomPart: ApplogValue): boolean { // if (VERBOSE.isEnabled) VERBOSE('[matchPartStatic]', field, patternPart, patternPart === atomPart ? '===' : '!==', atomPart) let result if (patternPart) { const typ = typeof patternPart if (typ === 'string') { result = patternPart === atomPart // shortcut for most common use-case } else if (typ === 'function') { result = (patternPart as Function)(atomPart) } else if (Array.isArray(patternPart)) { // A bare array is ambiguous now that `vl` may itself be an array value: it could // mean set-membership or a literal-array match. Fail loudly rather than match wrong. throw ERROR( `[matchPartStatic] a bare array is not a valid matcher for field '${field}'.` + ` Use anyOf(...) for set-membership, or a predicate (v) => isEqual(v, [...]) to match a literal array value.`, patternPart, ) } else if (typeof (patternPart as any).has === 'function') { result = (patternPart as Set).has(atomPart) } // if (field === 'at' && typ === 'string' && patternPart.endsWith('*')) { // return typeof atomPart === 'string' && atomPart.startsWith(patternPart.slice(0, -1)) // } else { // object/array values compare by deep equality; primitives via isEqual's === fast-path result = valueEq(patternPart as ApplogValue, atomPart) } } else { result = patternPart === atomPart } // if (VERBOSE.isEnabled) VERBOSE('[matchPartStatic] =>', field.startsWith('!') ? '!' : '', result) if (field.charAt(0) === '!') { return !result } else { return result } } export function matchPart(patternPart: ValueOrMatcher, atomPart: ApplogValue, context: SearchContext): ResultContext { if (!context) { // if (VERBOSE.isEnabled) VERBOSE('[matchPart] no context') return null } if (typeof patternPart === 'string') { if (isVariable(patternPart)) { return matchVariable(patternPart, atomPart, context) } /* TODO: else if (isIgnorePattern(patternPart)) { return matchVariable(patternPart, atomPart, context) } */ } // if (VERBOSE.isEnabled) VERBOSE('[matchPart]', patternPart, patternPart === atomPart ? '===' : '!==', atomPart) if (typeof patternPart === 'function') { return patternPart(atomPart) ? context : null } if (Array.isArray(patternPart)) { throw ERROR( `[matchPart] a bare array is not a valid matcher.` + ` Use anyOf(...) for set-membership, or a predicate to match a literal array value.`, patternPart, ) } if (patternPart && typeof (patternPart as any).has === 'function') { return (patternPart as Set).has(atomPart) ? context : null } return valueEq(patternPart as ApplogValue, atomPart) ? context : null } /** * Check if pattern matches triple with context substitutions */ export function matchPattern(pattern: DatalogQueryPattern, applog: Applog, context: SearchContext): ResultContext { return Object.entries(pattern).reduce((context, [field, patternValue]) => { const applogValue = applog[field] // @ts-expect-error wtf no idea //HACK: ts weird const patternValT: ValueOrMatcher = patternValue return matchPart(patternValT, applogValue, context) }, context) } export function actualize { return Object.fromEntries(find.map((findField) => { if (context === null) { throw new Error(`actualize context is null ${find}`) } return [ isVariable(findField) ? findField.replace(/^\?/, '') : findField, isVariable(findField) ? context[findField] : findField, ] })) as DatalogQueryResultEntry