/* eslint-disable */ /* eslint-comments/no-unlimited-disable */ import fs from 'fs'; import path from 'path'; import DataFactory from '@rdfjs/data-model'; import type { NamedNode, Literal } from '@rdfjs/types'; import type { AggregateExpression, ConstructQuery, Pattern, SelectQuery, Variable } from 'sparqljs'; import { Generator } from 'sparqljs'; import type { FindAllOptions, FindOptionsWhere } from '../storage/FindOptionsTypes'; import { SparqlQueryBuilder } from '../storage/query-adapter/sparql/SparqlQueryBuilder'; import { createSparqlBasicGraphPattern, createSparqlGraphPattern, createSparqlSelectQuery, createValuesPatternsForVariables, entityGraphTriple, entityVariable, rdfTypeNamedNode, rdfTypesVariable, rdfTypeVariable, createSparqlSelectGroup } from '../util/SparqlUtil'; import { ensureArray } from '../util/Util'; type OutputFormat = 'text' | 'json'; type PlanStep = | { kind: 'select'; name: string; wouldExecute: boolean; sparql: string; notes?: string[]; } | { kind: 'construct'; name: string; wouldExecute: boolean; sparql: string; notes?: string[]; } | { kind: 'note'; name: string; wouldExecute: boolean; notes: string[]; }; interface ExplainPlan { input: unknown; normalizedOptions: FindAllOptions; steps: PlanStep[]; meta: Record; } function usageAndExit(exitCode: number): never { const msg = [ 'Usage:', ' node dist/tools/explain-findall-sparql.js --input [--format text|json] [--simulate-entity-values N]', '', 'Input JSON can be either:', ' 1) a full FindAllOptions object: { "where": { ... }, "relations": { ... }, "order": { ... }, "limit": 10, ... }', ' 2) a FindOptionsWhere object (treated as { where: })', '', 'Notes:', ' - This utility does not query a SPARQL endpoint. When findAll would inject VALUES(?entity) from a pre-SELECT,', ' you can pass --simulate-entity-values N to show an example VALUES block with N placeholder IRIs.', '' ].join('\n'); console.error(msg); process.exit(exitCode); } function parseArgs(argv: string[]): { input?: string; format: OutputFormat; simulateEntityValues: number } { const out: { input?: string; format: OutputFormat; simulateEntityValues: number } = { format: 'text', simulateEntityValues: 0 }; for (let i = 0; i < argv.length; i += 1) { const arg = argv[i]; if (arg === '--help' || arg === '-h') { usageAndExit(0); } if (arg === '--input' || arg === '-i') { out.input = argv[i + 1]; i += 1; continue; } if (arg === '--format' || arg === '-f') { const v = argv[i + 1] as OutputFormat | undefined; if (v !== 'text' && v !== 'json') { console.error(`Invalid --format: ${String(v)}`); usageAndExit(2); } out.format = v; i += 1; continue; } if (arg === '--simulate-entity-values') { const v = Number.parseInt(argv[i + 1] ?? '', 10); if (!Number.isFinite(v) || v < 0) { console.error(`Invalid --simulate-entity-values: ${argv[i + 1]}`); usageAndExit(2); } out.simulateEntityValues = v; i += 1; continue; } if (arg.startsWith('-')) { console.error(`Unknown arg: ${arg}`); usageAndExit(2); } } return out; } function readJsonFile(p: string): any { const abs = path.isAbsolute(p) ? p : path.join(process.cwd(), p); const raw = fs.readFileSync(abs, 'utf8'); try { return JSON.parse(raw); } catch (e) { throw new Error(`Failed to parse JSON from ${abs}: ${(e as Error).message}`); } } function coerceSparqlVariable(v: any): Variable | undefined { if (!v) return undefined; if (typeof v === 'string') { const name = v.startsWith('?') ? v.slice(1) : v; return DataFactory.variable(name) as any; } if (typeof v === 'object' && v.termType === 'Variable' && typeof v.value === 'string') { return v; } return undefined; } function normalizeFindAllOptions(input: any): FindAllOptions { const asOptions = typeof input === 'object' && input !== null && ('where' in input || 'select' in input || 'relations' in input || 'order' in input || 'limit' in input || 'offset' in input || 'subQueries' in input); const options: FindAllOptions = asOptions ? input : { where: input as FindOptionsWhere }; // Allow simple JSON-friendly forms for variables. const group = coerceSparqlVariable((options as any).group); const entitySelectVariable = coerceSparqlVariable((options as any).entitySelectVariable); const subQueries = Array.isArray((options as any).subQueries) ? (options as any).subQueries.map((sq: any) => { const select = Array.isArray(sq?.select) ? sq.select.map(coerceSparqlVariable).filter(Boolean) : sq?.select; return { ...sq, ...(select ? { select } : {}) }; }) : (options as any).subQueries; return { ...options, ...(group ? { group } : {}), ...(entitySelectVariable ? { entitySelectVariable } : {}), ...(subQueries ? { subQueries } : {}) }; } function stringifyQuery(query: SelectQuery | ConstructQuery): string { const gen = new Generator(); return gen.stringify(query); } function buildEntitySelectQueryForFindAll( selectQueryData: ReturnType, options?: FindAllOptions ): SelectQuery | undefined { // Mirrors SparqlQueryAdapter.buildFindAllQueryData() for the entitySelectQuery creation. const wherePatterns: Pattern[] = [...selectQueryData.where, ...selectQueryData.graphWhere]; wherePatterns.push({ type: 'bgp', triples: [ { subject: entityVariable, predicate: rdfTypeNamedNode, object: rdfTypeVariable } ] }); const entitySelectVariable = options?.entitySelectVariable ?? entityVariable; const groupBy = ensureArray(selectQueryData?.group ?? options?.group ?? []); groupBy.push(entitySelectVariable); // All non-aggregated variables in SELECT must be in GROUP BY for (const selectVariable of selectQueryData.selectVariables ?? []) { const expr = selectVariable.expression as any; if (!('aggregation' in (expr as AggregateExpression)) && expr?.constructor?.name === 'Variable') { groupBy.push(expr as Variable); } } if (selectQueryData.where.length === 0) { return undefined; } return createSparqlSelectQuery( [ entitySelectVariable, // (GROUP_CONCAT(DISTINCT str(?rdfType); SEPARATOR = " | ") AS ?rdfTypes) { expression: { type: 'aggregate', aggregation: 'group_concat', separator: ' | ', distinct: true, expression: { type: 'operation', operator: 'STR', args: [rdfTypeVariable] } }, variable: rdfTypesVariable } as any, ...(selectQueryData.selectVariables?.map(({ variable, expression }) => { if (!expression) return variable as any; return { variable, expression } as any; }) ?? []) ] as any, wherePatterns, selectQueryData.orders, groupBy as any, options?.limit, options?.offset ); } function simulateEntityIdValues(n: number): (NamedNode | Literal)[] { const values: NamedNode[] = []; for (let i = 1; i <= n; i += 1) { values.push(DataFactory.namedNode(`urn:skl-dry-run:entity-${i}`)); } return values; } async function main(): Promise { const args = parseArgs(process.argv.slice(2)); if (!args.input) { usageAndExit(2); } const input = readJsonFile(args.input); const options = normalizeFindAllOptions(input); const qb = new SparqlQueryBuilder(); const queryData = qb.buildEntitySelectPatternsFromOptions(entityVariable, options); const selectQueryData = qb.buildEntitySelectPatternsFromOptions(entityVariable, { ...options, relations: undefined }); // Mirrors SparqlQueryAdapter.buildFindAllQueryData() for the relations union tweak. if ((queryData?.relationsQueryData?.unionPatterns ?? []).length > 0) { queryData?.relationsQueryData?.unionPatterns.push(createSparqlGraphPattern(entityVariable, [createSparqlBasicGraphPattern([entityGraphTriple])])); } const entitySelectQuery = buildEntitySelectQueryForFindAll(selectQueryData, options); const wouldPreSelectForOrderingAndValues = queryData.orders.length > 0 && options?.limit !== 1 && !!entitySelectQuery; const steps: PlanStep[] = []; if (entitySelectQuery) { const notes: string[] = []; if (wouldPreSelectForOrderingAndValues) { notes.push( 'In SparqlQueryAdapter.findAll(), this SELECT is executed first to compute entity ordering.', 'Its results are then used to inject a VALUES block over ?entity into the main CONSTRUCT query.' ); if (options?.limit === undefined) { notes.push('Warning: limit is undefined, so this pre-SELECT may return all matching entity IDs (potentially huge).'); } else { notes.push(`VALUES size is <= limit (${options.limit}).`); } } else { notes.push('In some cases (relations/type constraints), findAll executes this SELECT to support framing/type handling.'); notes.push('In that path, it also embeds this SELECT as a subquery inside the CONSTRUCT.'); } steps.push({ kind: 'select', name: 'Entity Pre-SELECT', wouldExecute: wouldPreSelectForOrderingAndValues, sparql: stringifyQuery(entitySelectQuery), notes }); } let constructWhere = queryData.graphWhere; let constructNotes: string[] = []; if (wouldPreSelectForOrderingAndValues) { if (args.simulateEntityValues > 0) { const variableValueFilters = createValuesPatternsForVariables({ [entityVariable.value]: simulateEntityIdValues(args.simulateEntityValues) as any }); constructWhere = [...variableValueFilters, ...constructWhere]; constructNotes = [ `This CONSTRUCT includes a simulated VALUES(?${entityVariable.value}) with ${args.simulateEntityValues} placeholder IRIs.`, 'In real execution, those VALUES come from the pre-SELECT results.' ]; } else { constructNotes = [ 'In real execution, this CONSTRUCT is preceded by the pre-SELECT and will have a VALUES(?entity) block injected.', 'Pass --simulate-entity-values N to show an example VALUES block.' ]; } } else if (entitySelectQuery) { // Mirrors the else-if path where the entity select is embedded into the CONSTRUCT. const entitySelectGroupQuery = createSparqlSelectGroup([entitySelectQuery]); constructWhere = [entitySelectGroupQuery, ...constructWhere]; constructNotes = ['This CONSTRUCT embeds the entity SELECT as a subquery (GROUP pattern) in its WHERE.']; } const constructQuery = qb.buildConstructFromEntitySelectQuery(constructWhere, queryData.graphSelectionTriples, options?.select, queryData.selectVariables); steps.push({ kind: 'construct', name: 'Main CONSTRUCT', wouldExecute: true, sparql: stringifyQuery(constructQuery), ...(constructNotes.length > 0 ? { notes: constructNotes } : {}) }); if (wouldPreSelectForOrderingAndValues && options?.limit === undefined) { steps.push({ kind: 'note', name: 'Performance Hint', wouldExecute: false, notes: [ 'findAll() with no limit triggers a pre-SELECT that can return a very large ID set, then injects it into VALUES(?entity).', 'This often causes slow queries due to large intermediate results and huge VALUES blocks.' ] }); } const plan: ExplainPlan = { input, normalizedOptions: options, steps, meta: { wouldPreSelectForOrderingAndValues, hasRelations: (queryData?.relationsQueryData?.unionPatterns ?? []).length > 0, hasTypeConstraint: options?.where?.type !== undefined, limit: options?.limit, offset: options?.offset } }; if (args.format === 'json') { console.log(JSON.stringify(plan, null, 2)); return; } // Text for (const step of plan.steps) { if (step.kind === 'note') { console.log(`\n# ${step.name}\n`); for (const n of step.notes) console.log(`- ${n}`); continue; } console.log(`\n# ${step.name}\n`); if ('notes' in step && step.notes?.length) { for (const n of step.notes) console.log(`- ${n}`); console.log(''); } console.log(step.sparql.trim()); } } main().catch((err: unknown) => { console.error(err); process.exit(1); });