/*! * @file ExternalizedDynamoDbRepository.ts * @description This file contains the ExternalizedDynamoDbRepository class, which is used to store and retrieve externalized items from DynamoDB and S3. * @author Dean Heffernan * @copyright 2025 Squiz */ // External import { createHash } from 'crypto'; import { ConditionalCheckFailedException } from '@aws-sdk/client-dynamodb'; // Team Submodules import { AbstractDynamoDbRepository, EntityDefinition, QueryOptions } from '../dynamodb/AbstractDynamoDbRepository'; import { DynamoDbManager, Transaction } from '../dynamodb/DynamoDbManager'; // Local import { S3ExternalStorage, S3StorageLocation } from '../s3/S3ExternalStorage'; /** * The ExternalizedDynamoDbRepository class is used to store and retrieve externalized items from DynamoDB and S3. * @class ExternalizedDynamoDbRepository * @extends {AbstractDynamoDbRepository} * @param {string} tableName - The name of the DynamoDB table to use for storing externalized items. * @param {ContentDynamodbDbManager} dbManager - The DynamoDB database manager to use for storing externalized items. * @param {string} entityName - The name of the entity to store externalized items for. * @param {EntityDefinition} entityDefinition - The entity definition to use for storing externalized items. * @param {DATA_CLASS} classRef - The class to use for storing externalized items. * @param {S3ExternalStorage} storage - The S3 storage to use for storing externalized items. * @returns {ExternalizedDynamoDbRepository} A new instance of the ExternalizedDynamoDbRepository class. */ export abstract class ExternalizedDynamoDbRepository< SHAPE extends object, DATA_CLASS extends SHAPE & { storageLocation?: S3StorageLocation }, > extends AbstractDynamoDbRepository { /** * Creates a new instance of the ExternalizedDynamoDbRepository class. * @constructor * @param {string} tableName - The name of the DynamoDB table to use for storing externalized items. * @param {DynamoDbManager} dbManager - The DynamoDB database manager to use for storing externalized items. * @param {string} entityName - The name of the entity to store externalized items for. * @param {EntityDefinition} entityDefinition - The entity definition to use for storing externalized items. * @param {DATA_CLASS} classRef - The class to use for storing externalized items. * @param {S3ExternalStorage} storage - The S3 storage to use for storing externalized items. * @returns {ExternalizedDynamoDbRepository} A new instance of the ExternalizedDynamoDbRepository class. */ constructor( tableName: string, dbManager: DynamoDbManager, entityName: string, entityDefinition: EntityDefinition, classRef: { new (data?: Record): DATA_CLASS }, private readonly storage: S3ExternalStorage, ) { super(tableName, dbManager, entityName, entityDefinition, classRef); } /** * Removes storageLocation from the response. * storageLocation is an internal S3 storage implementation detail that should never be exposed to API consumers. * @param {DATA_CLASS} item - The item to process. * @returns {DATA_CLASS} The item without storageLocation. */ private removeStorageLocationFromResponse(item: DATA_CLASS): DATA_CLASS { if ((item as any).storageLocation !== undefined) { const { storageLocation: _storageLocation, ...itemWithoutStorage } = item as any; return new this.classRef(itemWithoutStorage); } return item; } /** * Creates a new item in the repository. * If the item exceeds DynamoDB's size limit, it will automatically externalize the content to S3. * @param {DATA_CLASS} value - The value to create the item for. * @param {Transaction} transaction - The transaction to use for creating the item. * @param {Partial} additionalValue - Additional value to use for creating the item. * @param {Object} options - The options to use for creating the item. * @returns {Promise} A promise that resolves to the created item. */ public async createItem( value: DATA_CLASS, transaction: Transaction = {}, additionalValue: Partial = {}, options: { overrideExisting?: boolean } = { overrideExisting: false }, ): Promise { try { const result = await this.createItemInternal(value, transaction, additionalValue, options); return this.removeStorageLocationFromResponse(result); } catch (error) { // If the item exceeds DynamoDB's size limit, externalize the content to S3 and retry if (this.isDynamoItemSizeLimitError(error)) { console.warn( `ExternalizedDynamoDbRepository: Item exceeded DynamoDB size limit for ${this.entityName}. Retrying with external storage.`, ); const externalizedValue = await this.prepareValueForStorage(value); const result = await this.createItemInternal(externalizedValue, transaction, additionalValue, options); return this.removeStorageLocationFromResponse(result); } throw error; } } /** * Internal method to create an item in the repository. * @param {DATA_CLASS} value - The value to create the item for. * @param {Transaction} transaction - The transaction to use for creating the item. * @param {Partial} additionalValue - Additional value to use for creating the item. * @param {Object} options - The options to use for creating the item. * @returns {Promise} A promise that resolves to the created item. */ private async createItemInternal( value: DATA_CLASS, transaction: Transaction = {}, additionalValue: Partial = {}, options: { overrideExisting?: boolean } = { overrideExisting: false }, ): Promise { let previousLocation: S3StorageLocation | undefined; if (options?.overrideExisting) { previousLocation = await this.fetchStoredLocation({ ...value, ...additionalValue }); } // If storageLocation exists, we need to save it directly to DynamoDB // We can't use super.createItem because it strips storageLocation during model validation // So we build the DynamoDB Item manually let createdItem: DATA_CLASS; if (value.storageLocation) { // Manually validate the value (without storageLocation) const valueWithoutStorageLocation = { ...value }; delete (valueWithoutStorageLocation as any).storageLocation; new this.classRef(valueWithoutStorageLocation as Record); // Build columns from value properties const columns: any = {}; for (const modelProperty of Object.keys(valueWithoutStorageLocation)) { columns[modelProperty] = valueWithoutStorageLocation[modelProperty as keyof DATA_CLASS]; } // Convert fields defined as JSON strings (like 'layouts') to JSON strings this.convertSelectedValuesToJsonString(columns); // Manually add storageLocation to columns (after JSON conversion) columns.storageLocation = value.storageLocation; // Build key fields const keyFields: Record = { [this.keys.pk.attributeName]: this.getPk({ ...value, ...additionalValue }), [this.keys.sk.attributeName]: this.getSk({ ...value, ...additionalValue }), }; // Add index fields for (const [_key, index] of Object.entries(this.indexes)) { try { keyFields[index.pk.attributeName] = this.getKey({ ...value, ...additionalValue }, index.pk.attributeName); keyFields[index.sk.attributeName] = this.getKey({ ...value, ...additionalValue }, index.sk.attributeName); } catch (e) { // Ignore optional index errors if ((e as Error).name === 'MissingKeyValuesError') { continue; } throw e; } } // Execute DynamoDB put const putCommandInput = { TableName: this.tableName, Item: { ...keyFields, ...columns, }, ConditionExpression: options.overrideExisting ? undefined : `attribute_not_exists(${this.keys.pk.attributeName})`, }; if (transaction.id) { // Add to transaction instead of executing directly this.dbManager.addWriteTransactionItem(transaction.id, { Put: putCommandInput, }); } else { await this.client.put(putCommandInput); } createdItem = value; } else { // No storageLocation, use parent's method // Strip storageLocation before passing to parent to avoid "Excess properties" error const { storageLocation: _, ...valueWithoutStorage } = value as any; createdItem = await super.createItem(valueWithoutStorage as DATA_CLASS, transaction, additionalValue, options); } // Clean up old S3 file if it exists and is different from new location const newLocation = createdItem.storageLocation; if (previousLocation && previousLocation.key !== newLocation?.key) { await this.storage.delete(previousLocation); } // Check if we should hydrate from S3 // Only hydrate if content is actually stored in S3 (not just stale storageLocation) if (this.isContentInS3(createdItem)) { // Content fields are empty, so full content is in S3 return (await this.hydrateFromExternalStorage(createdItem)) || createdItem; } return createdItem; } /** * Updates an item in the repository. * If the item exceeds DynamoDB's size limit, it will automatically externalize the content to S3. * @param {Partial} value - The value to update the item with. * @returns {Promise} A promise that resolves to the updated item. */ public async updateItem(value: Partial): Promise { try { const result = await this.updateItemInternal(value); return result ? this.removeStorageLocationFromResponse(result) : undefined; } catch (error) { // If the item exceeds DynamoDB's size limit, externalize the content to S3 and retry if (this.isDynamoItemSizeLimitError(error)) { console.warn( `ExternalizedDynamoDbRepository: Update exceeded DynamoDB size limit for ${this.entityName}. Retrying with external storage.`, ); const externalizedValue = await this.prepareValueForStorage(value as DATA_CLASS); const result = await this.updateItemInternal(externalizedValue); return result ? this.removeStorageLocationFromResponse(result) : undefined; } throw error; } } /** * Internal method to update an item in the repository. * @param {Partial} value - The value to update the item with. * @returns {Promise} A promise that resolves to the updated item. */ private async updateItemInternal(value: Partial): Promise { // Fetch previous storageLocation before update const previousLocation = await this.fetchStoredLocation(value); // If storageLocation exists in the update, we need to save it directly to DynamoDB // We can't use super.updateItem because it strips storageLocation during model validation let updatedItem: DATA_CLASS | undefined; if ((value as DATA_CLASS).storageLocation) { // Get old value first const oldValue = await super.getItem(value); if (!oldValue) { return undefined; } // Merge old and new values const mergedValue = { ...oldValue, ...value }; // Validate merged value (without storageLocation) const mergedWithoutStorageLocation = { ...mergedValue }; delete (mergedWithoutStorageLocation as any).storageLocation; new this.classRef(mergedWithoutStorageLocation as Record); // Convert fields defined as JSON strings (like 'layouts') to JSON strings const newValueCopy = { ...value }; this.convertSelectedValuesToJsonString(newValueCopy); this.convertSelectedValuesToJsonString(oldValue as Record); // Build UpdateExpression manually including storageLocation const updateExpression = []; const expressionAttributeNames: Record = {}; const expressionAttributeValues: Record = {}; const updatedAttributes: string[] = []; for (const modelProperty of Object.keys(value)) { const propValue = (newValueCopy as any)[modelProperty]; if (propValue === (oldValue as any)[modelProperty]) { continue; // don't update unchanged properties } const propName = `#${modelProperty}`; const propValuePlaceHolder = `:${modelProperty}`; updateExpression.push(`${propName} = ${propValuePlaceHolder}`); expressionAttributeNames[propName] = modelProperty; expressionAttributeValues[propValuePlaceHolder] = propValue; updatedAttributes.push(modelProperty); } if (!updatedAttributes.length) { // nothing to update updatedItem = mergedValue; } else { // Execute update const updateCommandInput = { TableName: this.tableName, Key: { [this.keys.pk.attributeName]: this.getPk(value), [this.keys.sk.attributeName]: this.getSk(value), }, UpdateExpression: `SET ${updateExpression.join(', ')}`, ExpressionAttributeNames: expressionAttributeNames, ExpressionAttributeValues: expressionAttributeValues, ConditionExpression: `attribute_exists(${this.keys.pk.attributeName})`, ReturnValues: 'ALL_NEW' as const, }; const result = await this.client.update(updateCommandInput); updatedItem = result.Attributes ? this.hydrateItem(result.Attributes) : undefined; // Preserve storageLocation from DynamoDB response (hydrateItem might strip it) if (updatedItem && result.Attributes?.storageLocation) { (updatedItem as any).storageLocation = result.Attributes.storageLocation; } } } else { // No storageLocation in update value, use parent's method // Strip storageLocation before passing to parent to avoid "Excess properties" error const { storageLocation: _, ...valueWithoutStorage } = value as any; updatedItem = await super.updateItem(valueWithoutStorage); } if (!updatedItem) { return undefined; } // Check for stale storageLocation - this happens when content transitions from large (S3) to small (inline DynamoDB) // If storageLocation exists but content is NOT in S3 (content is inline), the storageLocation is stale let newLocation = updatedItem.storageLocation; if (newLocation && !this.isContentInS3(updatedItem)) { // Remove the stale storageLocation from DynamoDB await this.client.update({ TableName: this.tableName, Key: { [this.keys.pk.attributeName]: this.getPk(value), [this.keys.sk.attributeName]: this.getSk(value), }, UpdateExpression: 'REMOVE storageLocation', }); // Mark that there's no longer a new S3 location (we just removed it) newLocation = undefined; delete (updatedItem as any).storageLocation; } // Clean up old S3 file if it exists and is different from new location if (previousLocation && previousLocation.key !== newLocation?.key) { await this.storage.delete(previousLocation); } // Check if we should hydrate from S3 // Only hydrate if content is actually stored in S3 (not just stale storageLocation) // For large->small updates, DynamoDB might still have storageLocation but content is in DynamoDB if (this.isContentInS3(updatedItem)) { // Content fields are empty, so full content is in S3 return (await this.hydrateFromExternalStorage(updatedItem)) || updatedItem; } return updatedItem; } /** * Gets an item from the repository. * @param {Partial} item - The item to get. * @returns {Promise} A promise that resolves to the item. */ public async getItem(item: Partial): Promise { const record = await super.getItem(item); return await this.hydrateFromExternalStorage(record); } /** * Gets items from the repository. * @param {Partial[]} items - The items to get. * @returns {Promise} A promise that resolves to the items. */ public async getItems(items: Partial[]): Promise { const records = await super.getItems(items); return (await Promise.all(records.map((record) => this.hydrateFromExternalStorage(record)))) as DATA_CLASS[]; } /** * Queries items from the repository. * @param {Partial} item - The item to query. * @param {QueryOptions} options - The options to use for querying the items. * @returns {Promise} A promise that resolves to the items. */ public async queryItems(item: Partial, options?: QueryOptions): Promise { const records = await super.queryItems(item, options); return (await Promise.all(records.map((record) => this.hydrateFromExternalStorage(record)))) as DATA_CLASS[]; } /** * Deletes an item from the repository. * @param {Partial} partialItem - The item to delete. * @param {Transaction} transaction - The transaction to use for deleting the item. * @returns {Promise} A promise that resolves to the number of items deleted. */ public async deleteItem(partialItem: Partial, transaction: Transaction = {}): Promise { const location = await this.fetchStoredLocation(partialItem); const deleted = await super.deleteItem(partialItem, transaction); if (deleted && location) { await this.storage.delete(location); } return deleted; } /** * Deletes items from the repository. * @param {Partial[]} items - The items to delete. * @returns {Promise} A promise that resolves when the items are deleted. */ public async deleteItems(items: Partial[]): Promise { const locations = await Promise.all(items.map((item) => this.fetchStoredLocation(item))); await super.deleteItems(items); await Promise.all(locations.map((location) => this.storage.delete(location))); } /** * Prepares a value for storage by externalizing large content to S3. * @param {DATA_CLASS} value - The value to prepare for storage. * @returns {Promise} A promise that resolves to the prepared value for DynamoDB. * @throws {Error} If S3 storage is not configured or if saving to S3 fails. */ public async prepareValueForStorage(value: DATA_CLASS): Promise { const serialized = JSON.stringify(value); const plainObject = JSON.parse(serialized); // Remove any existing storageLocation before saving delete plainObject.storageLocation; // Generate reference ID from pk and sk for consistent S3 key const pk = this.getPk(value); const sk = this.getSk(value); // Create hash of pk and sk for S3 key const hash = createHash('sha256').update(`${pk}#${sk}`).digest('hex'); // Save full payload to S3 const { location } = await this.storage.save(this.entityName, hash, plainObject); // Create minimal payload for DynamoDB with only key fields // Key fields are computed from the entity definition - everything else is externalized const minimalPayload = this.getKeyFieldsValues(plainObject); // Add storageLocation reference minimalPayload.storageLocation = location; // Return minimal payload to be stored in DynamoDB // DO NOT pass through constructor as it will strip storageLocation! // DO NOT mutate the original value - storageLocation is an internal implementation detail return minimalPayload as DATA_CLASS; } /** * Extracts only the key field values from an object based on the entity definition. * Large content fields (defined in fieldsAsJsonString) are set to empty values. * All other fields (key fields and small metadata) are preserved. * * @param {Record} obj - The source object to extract key fields from. * @returns {Record} An object containing only the key field values. */ protected getKeyFieldsValues(obj: Record): Record { const result: Record = {}; // Copy all properties from the source object for (const [key, value] of Object.entries(obj)) { // Skip storageLocation as it's handled separately if (key === 'storageLocation') { continue; } // If this field is in fieldsAsJsonString, it's large content - set to empty if (this.fieldsAsJsonString.includes(key)) { // Set arrays to empty arrays, objects to empty objects if (Array.isArray(value)) { result[key] = []; } else if (value !== null && typeof value === 'object') { result[key] = {}; } // Skip primitive large fields entirely continue; } // Keep all other fields (key fields and small metadata) result[key] = value; } return result; } /** * Override parent's hydrateItem to preserve storageLocation * The parent's hydrateItem creates a new instance which strips storageLocation */ protected hydrateItem(item: Record): DATA_CLASS { // Extract storageLocation before calling parent's hydrateItem const storageLocation = item.storageLocation as S3StorageLocation | undefined; // Call parent's hydrateItem which will strip storageLocation const hydrated = super.hydrateItem(item); // Add storageLocation back if it existed if (storageLocation) { (hydrated as any).storageLocation = storageLocation; } return hydrated; } /** * Checks if content is actually stored in S3 by examining if large content fields are empty. * When content is externalized to S3, fieldsAsJsonString fields are set to empty objects/arrays. * @param {DATA_CLASS} item - The item to check. * @returns {boolean} True if content is in S3, false if content is in DynamoDB. */ private isContentInS3(item: DATA_CLASS): boolean { if (!item.storageLocation) { return false; } // Check if large content fields are empty (indicating content is in S3) for (const fieldName of this.fieldsAsJsonString) { const fieldValue = (item as any)[fieldName]; if (fieldValue !== undefined) { const isEmpty = typeof fieldValue === 'object' && Object.keys(fieldValue).length === 0; if (isEmpty) { return true; } } } return false; } /** * Hydrates a record from external storage. * @param {DATA_CLASS} record - The record to hydrate. * @returns {Promise} A promise that resolves to the hydrated record (without storageLocation). */ private async hydrateFromExternalStorage(record?: DATA_CLASS): Promise { // If no record or no storageLocation, return record as-is if (!record || !record.storageLocation) { return record; } try { // Load full content from S3 return (await this.storage.load(record.storageLocation)) as DATA_CLASS; } catch (error) { // Graceful degradation: if S3 is not configured or content doesn't exist, // return the record with empty content fields instead of throwing error console.warn( `Failed to load content from S3 for ${this.entityName}:`, error instanceof Error ? error.message : String(error), ); // Return record with empty content, remove storageLocation to avoid confusion return this.removeStorageLocationFromResponse(record); } } /** * Fetches the stored location of an item. * @param {Partial} partialItem - The item to fetch the stored location for. * @returns {Promise} A promise that resolves to the stored location. */ private async fetchStoredLocation(partialItem: Partial): Promise { try { const output = await this.client.get({ TableName: this.tableName, Key: { [this.keys.pk.attributeName]: this.getPk(partialItem), [this.keys.sk.attributeName]: this.getSk(partialItem), }, ProjectionExpression: 'storageLocation', }); return output.Item?.storageLocation as S3StorageLocation | undefined; } catch (error) { if (error instanceof ConditionalCheckFailedException) { return undefined; } throw error; } } /** * Check if the error is due to DynamoDB item size limit being exceeded. * @param {unknown} error - The error to check. * @returns {boolean} True if the error is due to DynamoDB item size limit being exceeded, false otherwise. */ protected isDynamoItemSizeLimitError(error: unknown): boolean { if (!error || typeof error !== 'object') { return false; } const err = error as Record; const cancellationReasons = (err?.CancellationReasons ?? err?.cancellationReasons) as | Array> | undefined; const validationCodes = ['ValidationException', 'TransactionCanceledException']; const hasValidationCode = validationCodes.includes(err?.code as string) || validationCodes.includes(err?.name as string) || validationCodes.includes((err?.originalError as Record)?.code as string) || validationCodes.includes((err?.originalError as Record)?.name as string) || (Array.isArray(cancellationReasons) && cancellationReasons.some((reason) => validationCodes.includes((reason?.Code ?? reason?.code) as string))); const hasSizeMessage = this.hasDynamoSizeKeyword(err?.message as string) || this.hasDynamoSizeKeyword((err?.originalError as Record)?.message as string) || (Array.isArray(cancellationReasons) && cancellationReasons.some((reason) => this.hasDynamoSizeKeyword((reason?.Message ?? reason?.message) as string), )); return hasValidationCode || hasSizeMessage; } /** * Check if the message contains the DynamoDB size keyword. * @param {string} message - The message to check. * @returns {boolean} True if the message contains the DynamoDB size keyword, false otherwise. */ private hasDynamoSizeKeyword(message?: string): boolean { if (typeof message !== 'string') { return false; } const normalized = message.toLowerCase(); return ( normalized.includes('item size') || normalized.includes('maximum allowed size') || normalized.includes('exceeds') || normalized.includes('400 kb') || normalized.includes('400kb') ); } }