import { Disklet } from 'disklet' import { Memlet, navigateMemlet } from 'memlet' import { checkDatabaseName, doesDatabaseExist, getBucketPath, getConfig, getConfigPath, getOrMakeMemlet, getPartitionPath, isPositiveInteger, setConfig } from './helpers' import { BaseType, DataDump, HashBaseConfig, HashBaseOptions } from './types' export interface HashBase { databaseName: string insert: (partition: string, hash: string, data: K) => Promise query: (partition: string, hashes: string[]) => Promise> delete: (partition: string, hashes: string[]) => Promise> dumpData: ( partition?: string ) => Promise>> } interface DataDumpDataset { [partition: string]: { [path: string]: K } } interface BucketDictionary { [bucketName: string]: { bucketFetcher: Promise bucketPath: string bucketData: { [hash: string]: K } } } export async function openHashBase( storage: Disklet | Memlet, databaseName: string ): Promise> { const memlet = getOrMakeMemlet(storage) const configData: HashBaseConfig = await getConfig(memlet, databaseName) if (configData.type !== BaseType.HashBase) { throw new Error( `Tried to open HashBase, but type is ${String(configData.type)}` ) } async function find( partition: string, hashes: string[], remove = false ): Promise> { if (hashes.length === 0) { return [] } const { prefixSize } = configData const bucketFetchers = [] const bucketDict: BucketDictionary = {} for (let inputIndex = 0; inputIndex < hashes.length; inputIndex++) { const hash = hashes[inputIndex] if (hash.length < prefixSize) continue const bucketName: keyof typeof bucketDict = hash.substring(0, prefixSize) if (bucketDict[bucketName] === undefined) { const bucketPath = getBucketPath(databaseName, partition, bucketName) const bucketFetcher = memlet.getJson(bucketPath).then( bucketData => (bucketDict[bucketName].bucketData = bucketData), () => { // assume bucket doesn't exist } ) bucketDict[bucketName] = { bucketFetcher, bucketPath, bucketData: {} } bucketFetchers.push(bucketFetcher) } } await Promise.all(bucketFetchers) const results: Array = [] const bucketNames = new Set() for (let inputIndex = 0; inputIndex < hashes.length; inputIndex++) { const bucketName = hashes[inputIndex].substring(0, prefixSize) bucketNames.add(bucketName) const bucketData = bucketDict[bucketName].bucketData const hashData: K | undefined = bucketData[hashes[inputIndex]] if (remove) { // eslint-disable-next-line @typescript-eslint/no-dynamic-delete delete bucketData[hashes[inputIndex]] } results.push(hashData) } let resultPromise: Promise = Promise.resolve() if (remove) { const deletePromises = Array.from(bucketNames).map(async bucketName => { const { bucketPath, bucketData } = bucketDict[bucketName] return await memlet.setJson(bucketPath, bucketData) }) resultPromise = Promise.all(deletePromises) } await resultPromise return results } const out: HashBase = { databaseName, async insert(partition: string, hash: string, data: K): Promise { const { prefixSize } = configData if (hash.length < prefixSize) { throw new Error( `hash must be a string of length at least ${prefixSize}` ) } const prefix = hash.substring(0, prefixSize) const bucketPath = getBucketPath(databaseName, partition, prefix) const setNewData = async (oldData = {}): Promise => await memlet.setJson( bucketPath, Object.assign(oldData, { [hash]: data }) ) await memlet.getJson(bucketPath).then( async bucketData => await setNewData(bucketData), // assuming bucket doesn't exist async () => await setNewData() ) }, async query(partition: string, hashes: string[]) { return await find(partition, hashes) }, delete: async (partition: string, hashes: string[]) => { return await find(partition, hashes, true) }, async dumpData( partition: string = '' ): Promise>> { const datatDumpDataset: DataDumpDataset = {} // Recursive function for reading files/folders in the partition // disklet to accumulate data as a dataDumpDataset const dump = async ( memlet: Memlet, partition: string = '' ): Promise => { const listing = await memlet.list() const promises = Object.keys(listing).map(async path => { if (getConfigPath(databaseName).includes(path)) { return } const type = listing[path] if (type === 'folder') { // Assert that the partition is not defined because we should // only expect to recurse one folder level in the disklet. if (partition !== '') throw new Error('Unexpected partition hierarchy') // Recurse into folder using the path as the partition key. return await dump(navigateMemlet(memlet, path), path) } if (type === 'file') { // Write the file to the dataDumpDataset const fileData = await memlet.getJson(path) datatDumpDataset[partition] = { ...datatDumpDataset[partition], ...fileData } return } throw new Error(`Unknown listing type ${String(type)}`) }) await Promise.all(promises) } const partitionMemlet = navigateMemlet( memlet, getPartitionPath(databaseName, partition) ) await dump(partitionMemlet, partition) return { config: configData, data: datatDumpDataset } } } return out } export async function createHashBase( storage: Disklet | Memlet, options: HashBaseOptions ): Promise> { const memlet = getOrMakeMemlet(storage) const { prefixSize } = options const dbName = checkDatabaseName(options.name) if (!isPositiveInteger(prefixSize)) { throw new Error(`prefixSize must be a number greater than 0`) } const databaseExists = await doesDatabaseExist(memlet, dbName) if (databaseExists) { throw new Error(`database ${dbName} already exists`) } const configData: HashBaseConfig = { type: BaseType.HashBase, prefixSize } await setConfig(memlet, dbName, configData) return await openHashBase(memlet, dbName) } export async function createOrOpenHashBase( storage: Disklet | Memlet, options: HashBaseOptions ): Promise> { const memlet = getOrMakeMemlet(storage) try { return await createHashBase(memlet, options) } catch (error) { if (error instanceof Error && !error.message.includes('already exists')) { throw error } return await openHashBase(memlet, options.name) } }