import { VectorBase } from '@memberjunction/ai-vectors'; import { EntityField, EntityInfo, IMetadataProvider, UserInfo } from '@memberjunction/core'; import { MJAIModelEntity, MJEntityDocumentEntity, MJTemplateEntity, MJTemplateEntityExtended, MJVectorDatabaseEntity } from '@memberjunction/core-entities'; import { EmbeddingData, TemplateParamData, VectorEmeddingData, VectorizeEntityParams, VectorizeEntityResponse } from '../generic/vectorSync.types.js'; /** * Class that specializes in vectorizing entities using embedding models and upserting them into Vector Databases */ export declare class EntityVectorSyncer extends VectorBase { _startTime: Date; _endTime: Date; /** Accumulates render errors across batches so they can be reported through the progress callback */ private _renderErrors; /** Accumulates vector-DB upsert errors across batches so a failed upsert is reflected in the run's success flag */ private _upsertErrors; /** * Returns the active metadata provider — explicit override (via `this.Provider = ...`) * if set, otherwise the global default. Provided as `ProviderToUse` for backward * compatibility with code paths that referenced this name before the base class * standardized on the `Provider` getter/setter. */ protected get ProviderToUse(): IMetadataProvider; /** @param provider - Optional request-scoped metadata provider; see {@link VectorBase} constructor. */ constructor(provider?: IMetadataProvider | null); /** * Verbose, entity-scoped progress line. Suppressed unless verbose logging is on * (MJ_VERBOSE), and always prefixed with the entity document name so the lines from * concurrently-running vectorization pipelines stay attributable in interleaved output. * Regular-mode callers (e.g. the Vectorize Entity action) print the concise per-document * summary instead. */ private vlog; /** * Refreshes the EntityDocumentCache and configures the AIEngine and TemplateEngineServer * @param forceRefresh If true, the cache and enginges will be refreshed even if it is already loaded * @param contextUser The context user to use to refresh the cache and configure the engines */ Config(forceRefresh: boolean, contextUser?: UserInfo): Promise; VectorizeEntity(params: VectorizeEntityParams, contextUser?: UserInfo): Promise; /** * Build a human-readable summary of render + upsert failures accumulated during the run, * or an empty string when there were none. */ private buildVectorizeErrorSummary; /** * Creates an AsyncBatchTransform that renders templates and generates embeddings * in the main thread. This replaces the worker_threads-based VectorizeTemplates * worker, which couldn't access ClassFactory registrations in its isolated V8 context. */ private createVectorCreator; /** * Creates an AsyncBatchTransform that upserts vectors to the vector database * in the main thread. This replaces the worker_threads-based UpsertVectors * worker for the same ClassFactory reasons. */ private createVectorUpserter; /** * Renders templates for a batch of entity records and generates embeddings for the rendered text. * Tracks render failures and attaches error info to the returned EmbeddingData array so the * caller can surface them through the progress callback. */ private renderAndEmbedBatch; /** * Upserts a batch of embedding data as vector records into the vector database. */ /** Default max chars for large text fields (nvarchar(MAX) or MaxLength > 5000) in vector metadata */ private static readonly DEFAULT_LARGE_FIELD_TRUNCATION; /** * Parse the EntityDocumentConfiguration JSON from an entity document. * Returns an empty object if the Configuration column is null or invalid JSON. */ private parseDocumentConfig; /** * Get fields to include in vector metadata for display in search results. * Respects EntityDocumentConfiguration.metadata.fieldStrategy and per-field overrides. * * Default behavior (no config or fieldStrategy = "all"): * Include all fields except PKs, binary types, and system (__mj_*) fields. * * "include" strategy: only fields explicitly listed in config.metadata.fields with included=true. * "exclude" strategy: all eligible fields except those listed with included=false. */ private getDisplayFields; /** * Get the truncation limit for a field based on its MaxLength and * optional per-field or global overrides from EntityDocumentConfiguration. */ private getFieldTruncationLimit; private upsertBatchToVectorDB; /** * Starts the async data paging loop that feeds records into the stream pipeline. */ private startDataPaging; /** * This method will create a default Entity Document for the given entityID, vectorDatabase, and AIModel * @param entityID * @param vectorDatabase * @param AIModel * @returns */ CreateDefaultEntityDocument(EntityID: string, VectorDatabase: MJVectorDatabaseEntity, AIModel: MJAIModelEntity): Promise; protected GetVectorDatabaseAndEmbeddingClassByEntityDocumentID(entityDocumentID: string, createDocumentIfNotFound?: boolean): Promise; /** * Resolves the API key for a vector database provider. Checks the Credential Engine * first (if VectorDatabase.CredentialID is set), then falls back to the legacy * environment variable AI_VENDOR_API_KEY__. */ protected ResolveVectorDBAPIKey(vectorDBEntity: MJVectorDatabaseEntity): Promise; GetEntityDocument(EntityDocumentID: string): Promise; GetEntityDocumentByName(EntityDocumentName: string, ContextUser?: UserInfo): Promise; /** * Returns active Entity Documents for vectorization. Defaults to the * `Record Duplicate` document type for back-compat with the historical use * case (duplicate detection); pass `entityDocumentType` to target a different * type (e.g. `'Search'` for the search-tier vector pool that backs * `Provider.SearchEntity`). * * When multiple active EntityDocuments exist for the same entity (e.g. for * different content variants), only the first row encountered is kept — the * one-doc-per-entity restriction matches what existing vectorize callers * already expect. * * **Cache source.** Reads from `KnowledgeHubMetadataEngine`'s cached * EntityDocuments array (already loaded by `Config()` above) — no fresh * RunView. The denormalized `Type` and `Entity` columns are available on * the cached rows, so the type and entity filters are pure client-side * predicates. * * **Empty result is not an error.** When no Active documents match (none configured * yet, or all inactive), this returns an empty array — it does NOT throw. An unknown/ * misspelled `entityDocumentType` is surfaced as a `LogStatus` warning, still returning * `[]`. Callers treat the empty case as "nothing to do" (e.g. `VectorizeEntityAction` * reports `NO_DOCUMENTS` success), so the unattended daily Entity Vector Sync job isn't * flagged as failed on a fresh DB with no Search documents. * * @param entityNames If provided, only Entity Documents for the specified entities will be returned. * @param entityDocumentType Name of the EntityDocumentType to filter by. Defaults to 'Record Duplicate'. Pass 'Search' for the search-tier pool. * @returns Active, de-duped-per-entity Entity Documents of the given type; `[]` when none match. */ GetActiveEntityDocuments(entityNames?: string[], entityDocumentType?: string): Promise; /** * Resolves the VectorIndex for the given EntityDocument by looking up its VectorIndexID * using the cached KnowledgeHubMetadataEngine. If VectorIndexID is not set on the * EntityDocument, throws a descriptive error instructing the user to configure it. */ private GetVectorIndexForEntityDocument; protected CreateTemplateForEntityDocument(entityDocument: MJEntityDocumentEntity): Promise; protected BuildTemplateContent(entityFields: EntityField[]): string; protected GetEntityFieldsForSimilaritySearch(entityID: string): Promise; protected GetTemplateData(entity: EntityInfo, record: Record, template: MJTemplateEntityExtended, relatedData: TemplateParamData[]): Promise>; protected GetRelatedTemplateDataForBatch(entity: EntityInfo, records: unknown[], template: MJTemplateEntityExtended): Promise; /** * Creates or updates the Entity Record Document for a single source record. * Thin back-compat wrapper over {@link UpsertEntityRecordDocumentBatch}; the pipeline * uses the batch path directly to avoid an N+1 read per record. */ protected UpsertEntityRecordDocumentRecords(embeddingData: EmbeddingData, contextUser: UserInfo): Promise; /** * Creates or updates Entity Record Documents for a batch of source records. * * **Reads are batched + parallel, saves are per-record.** Previously each record did its own * `RunView` to find its existing ERD — an N+1 read storm (~1 query per source record) * that dominated the upsert phase and tripped the sequential/duplicate-RunView telemetry. * Here we group by (EntityID, EntityDocumentID) and issue the existence checks for ALL groups * in a SINGLE `RunViews` batch (one round trip, run in parallel) with a `RecordID IN (…)` filter * per group, map the results by RecordID, then find-or-create + `Save()` each record (the * per-record `Save()` is inherent to the BaseEntity contract). Grouping is usually a single read * within one entity's pipeline; a mixed batch produces several views, now executed together. */ protected UpsertEntityRecordDocumentBatch(batch: EmbeddingData[], contextUser: UserInfo): Promise; /** * Find-or-create + persist a single Entity Record Document from its embedding data. * `existingErd` (when provided) is reused so the caller can resolve existence in a * single batched read rather than one query per record. */ private saveEntityRecordDocument; /** * This method is resposnible for determining if the template(s) given have aligned parameters, meaning they don't have overlapping parameter names that have * different meanings. It is okay for scenarios where there are > 1 template in use for a message to have different parameter names, but if they have the SAME parameter names * they must not have different settings. */ protected ValidateTemplateContextParamAlignment(template: MJTemplateEntityExtended): boolean; /** * Build the SQL filter to select records that are in the given list. * For single PK entities, uses a simple IN clause. * For composite PK entities, uses an EXISTS clause that concatenates PK columns to match the RecordID format. */ protected BuildListFilter(entity: EntityInfo, listDetailsSchema: string, listId: string): string; } //# sourceMappingURL=entityVectorSync.d.ts.map