/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ /** * Tier-0 search scan: linear pass over EntityTable columns that are * ALREADY POPULATED during normal IFC parsing. Adds zero work to the * load hot path — `name` and `globalId` are batch-extracted by the * parser for geometry entities + types via `batchExtractGlobalIdAndName`, * and we read them straight out of the columnar TypedArrays. * * Crucially: this never calls `extractEntityAttributesOnDemand` (see * AGENTS.md §2). Empty-string rows are skipped via O(1) checks against * the StringTable index 0 (which is the canonical empty string). * * Tier-1 (worker-built inverted index) and Tier-3 (DuckDB SQL) layer * on top of the same `SearchResult` shape in later phases. */ import type { IfcDataStore } from '@ifc-lite/parser'; /** A federated model carries its IFC store + an offset into the global ID space. */ export interface ScanModel { id: string; ifcDataStore: IfcDataStore | null; } export type MatchField = 'globalId' | 'name' | 'type' | 'description' | 'objectType'; export interface SearchResult { modelId: string; /** Local express ID inside the source model — federation conversion happens in the UI layer. */ expressId: number; typeName: string; name: string; globalId: string; description: string; objectType: string; /** Field that produced the highest score for this entity. */ matchField: MatchField; score: number; } export interface ScanOptions { /** Maximum results returned (sorted by descending score). Default 50. */ limit?: number; /** If set, abort after this many entities scanned per model (perf safety). */ maxScanPerModel?: number; } const DEFAULT_LIMIT = 50; /** Scoring weights — higher = better. Stable enough that downstream UI can compare. */ const SCORE = { GUID_EXACT: 1000, NAME_EXACT: 500, NAME_PREFIX: 100, TYPE_EXACT: 80, TYPE_PREFIX: 60, NAME_SUBSTR: 40, OBJECTTYPE_SUBSTR: 20, DESCRIPTION_SUBSTR: 10, } as const; /** * Run a Tier-0 search across one or more models. * * Returns up to `limit` results sorted by descending score, then by * (modelId, expressId) for stable ordering. Empty/whitespace queries * return an empty array — the caller should not even open the popover. */ export function runTier0Scan( models: readonly ScanModel[], query: string, options: ScanOptions = {}, ): SearchResult[] { const trimmed = query.trim(); if (trimmed.length < 1) return []; const limit = options.limit ?? DEFAULT_LIMIT; const needle = trimmed.toLowerCase(); // GUID exact-match fast path — IFC GlobalIds are 22-char base64-like strings. // We test the trimmed (case-sensitive) form because GUIDs are case-sensitive. const looksLikeGuid = trimmed.length === 22 && /^[A-Za-z0-9_$]{22}$/.test(trimmed); const collected: SearchResult[] = []; for (const model of models) { const store = model.ifcDataStore; if (!store) continue; const table = store.entities; if (!table || table.count === 0) continue; // GUID fast path: O(1) lookup, push and continue (still scan others // for additional substring matches, but skip the row-level pass for // this particular GUID). if (looksLikeGuid) { const exactExpressId = table.getExpressIdByGlobalId(trimmed); if (exactExpressId > 0) { collected.push({ modelId: model.id, expressId: exactExpressId, typeName: table.getTypeName(exactExpressId) ?? '', name: table.getName(exactExpressId), globalId: trimmed, description: table.getDescription(exactExpressId), objectType: table.getObjectType(exactExpressId), matchField: 'globalId', score: SCORE.GUID_EXACT, }); } } scanModel(model.id, store, needle, options.maxScanPerModel, collected); } // Stable sort: score desc, then modelId asc, then expressId asc. collected.sort((a, b) => { if (b.score !== a.score) return b.score - a.score; if (a.modelId !== b.modelId) return a.modelId < b.modelId ? -1 : 1; return a.expressId - b.expressId; }); // Dedupe — the GUID fast path may have added a row that the linear pass // also matched. Keep the highest-scoring instance per (modelId, expressId). const seen = new Set(); const out: SearchResult[] = []; for (const r of collected) { const key = `${r.modelId}:${r.expressId}`; if (seen.has(key)) continue; seen.add(key); out.push(r); if (out.length >= limit) break; } return out; } function scanModel( modelId: string, store: IfcDataStore, needle: string, maxScan: number | undefined, out: SearchResult[], ): void { const table = store.entities; const strings = store.strings; const count = table.count; const cap = maxScan != null ? Math.min(count, maxScan) : count; // Direct typed-array references — avoids per-row method-call overhead. const expressId = table.expressId; const nameIdx = table.name; const globalIdIdx = table.globalId; const descriptionIdx = table.description; const objectTypeIdx = table.objectType; for (let i = 0; i < cap; i++) { // Fast skip: rows where every searchable string slot is the empty // string (StringTable index 0) — common for non-geometry entities // that the parser never batch-extracted. This keeps 4M-entity scans // O(populated rows) in practice. const nIdx = nameIdx[i]; const gIdx = globalIdIdx[i]; const dIdx = descriptionIdx[i]; const oIdx = objectTypeIdx[i]; if (nIdx === 0 && gIdx === 0 && dIdx === 0 && oIdx === 0) continue; const name = nIdx !== 0 ? strings.get(nIdx) : ''; const globalId = gIdx !== 0 ? strings.get(gIdx) : ''; // Score every applicable field and keep the max — Tier-1's // `scoreEntry` does the same, and the result-merge code in the UI // depends on Tier-0 / Tier-1 producing comparable orderings. The // previous short-circuit (skip type once name produced any score) // ranked an entity that hits NAME_SUBSTR (40) below an entity that // would have hit TYPE_EXACT (80) on its name field, so the same // logical match scored differently across paths. let score = 0; let matchField: MatchField = 'name'; const bump = (s: number, mf: MatchField): void => { if (s > score) { score = s; matchField = mf; } }; if (name) { const nameLower = name.toLowerCase(); if (nameLower === needle) bump(SCORE.NAME_EXACT, 'name'); else if (nameLower.startsWith(needle)) bump(SCORE.NAME_PREFIX, 'name'); else if (nameLower.includes(needle)) bump(SCORE.NAME_SUBSTR, 'name'); } { // Type lookup uses the resolved type-name accessor (handles enum // → PascalCase conversion). Cheap but a method call, so it stays // inside the per-row loop only because it can outrank NAME_SUBSTR. // getTypeName returns undefined for entities the parser never typed // (e.g. CAT_SKIP/non-product rows); guard it — calling .toLowerCase() // on that undefined crashed the whole search on every keystroke (#1195). const typeName = table.getTypeName(expressId[i]); if (typeName) { const typeLower = typeName.toLowerCase(); if (typeLower === needle) bump(SCORE.TYPE_EXACT, 'type'); else if (typeLower.startsWith(needle)) bump(SCORE.TYPE_PREFIX, 'type'); } } let objectType = ''; if (oIdx !== 0) { objectType = strings.get(oIdx); if (objectType.toLowerCase().includes(needle)) { bump(SCORE.OBJECTTYPE_SUBSTR, 'objectType'); } } let description = ''; if (dIdx !== 0) { description = strings.get(dIdx); if (description.toLowerCase().includes(needle)) { bump(SCORE.DESCRIPTION_SUBSTR, 'description'); } } if (score === 0) continue; // Resolve remaining display fields lazily so non-matches stay cheap. const id = expressId[i]; out.push({ modelId, expressId: id, typeName: table.getTypeName(id) ?? '', name, globalId, description: description || (dIdx !== 0 ? strings.get(dIdx) : ''), objectType: objectType || (oIdx !== 0 ? strings.get(oIdx) : ''), matchField, score, }); } }