/** * @license * Copyright 2025 Steven Roussey * SPDX-License-Identifier: Apache-2.0 */ import { DataPortSchemaObject, FromSchema, TypedArraySchemaOptions } from "@workglow/util/schema"; import { type ITabularMigration, type ITabularMigrationApplier } from "../migrations"; import { BaseTabularStorage } from "./BaseTabularStorage"; import { AnyTabularStorage, AutoGeneratedKeys, DeleteSearchCriteria, InsertEntity, Page, PageRequest, QueryOptions, SearchCriteria, SimplifyPrimaryKey, TabularChangePayload, TabularSubscribeOptions } from "./ITabularStorage"; export declare const HF_TABULAR_REPOSITORY: import("@workglow/util").ServiceToken; export interface HuggingFaceTabularStorageOptions { /** HuggingFace API token for private datasets. */ token?: string; /** Base URL for the HuggingFace Dataset Viewer API. */ baseUrl?: string; indexes?: readonly (keyof any | readonly (keyof any)[])[]; } /** * Read-only tabular storage backed by the HuggingFace Dataset Viewer API. * Supports both user-provided schemas and auto-detection via {@link fromDataset}. */ export declare class HuggingFaceTabularStorage, Entity = FromSchema, PrimaryKey = SimplifyPrimaryKey, Value = Omit, InsertType extends InsertEntity> = InsertEntity>> extends BaseTabularStorage { private readonly dataset; private readonly config; private readonly split; private readonly token?; private readonly baseUrl; constructor(dataset: string, config: string, split: string, schema: Schema, primaryKeyNames: PrimaryKeyNames, options?: HuggingFaceTabularStorageOptions, tabularMigrations?: ReadonlyArray); /** Fetches the dataset features and converts them to a JSON Schema. */ static fromDataset(dataset: string, config: string, split: string, options?: HuggingFaceTabularStorageOptions): Promise>; /** Validates the dataset exists and that any user-provided schema lines up. */ setupDatabase(): Promise; /** * Returns an in-memory applier for HF tabular storage. NOTE: HF datasets * are read-only; backfill ops will throw because `put` is unsupported. * DDL ops are no-ops (records are JS objects). Migrations on HF storages * are useful only for advancing bookkeeping in lockstep with a producer * that re-publishes datasets. */ getMigrationApplier(): ITabularMigrationApplier | null; get(key: PrimaryKey): Promise; getAll(options?: QueryOptions): Promise; getOffsetPage(offset: number, limit: number): Promise; /** * HuggingFace datasets are read-only, so the concurrency-stability that * keyset pagination provides is unnecessary. The HF API also doesn't * expose tuple comparisons, so we drive cursor pagination from the * `/rows` endpoint's offset and encode the next offset in the cursor. * * The HF `/rows` endpoint caps each fetch at 100 rows, but the * {@link ITabularStorage.getPage} contract lets callers ask for any * positive limit. Loop in 100-row chunks until we either fill the * caller's `limit` or hit the end of the dataset, so a `getPage({ limit: * 200 })` doesn't silently return only 100 rows with a `nextCursor` of * `undefined` (which would terminate iteration despite more data). */ getPage(request?: PageRequest): Promise>; size(): Promise; put(_value: InsertType): Promise; putBulk(_values: InsertType[]): Promise; delete(_value: PrimaryKey | Entity): Promise; deleteAll(): Promise; /** The HF /filter endpoint supports equality only; other operators throw. */ query(criteria: SearchCriteria, options?: QueryOptions): Promise; deleteSearch(_criteria: DeleteSearchCriteria): Promise; subscribeToChanges(_callback: (change: TabularChangePayload) => void, _options?: TabularSubscribeOptions): () => void; destroy(): void; private fetchApi; private rowToEntity; } //# sourceMappingURL=HuggingFaceTabularStorage.d.ts.map