/** * TypeScript type definitions for the Flare WASM module. * These map to the wasm-bindgen exports from flare-web. */ /** Check if WebGPU is available in this browser. */ export declare function webgpu_available(): boolean; /** Get JSON string with device capabilities. */ export declare function device_info(): string; /** Initialize the Flare engine (async). */ export declare function init(): Promise; /** Progress callback: (loaded_bytes, total_bytes). total_bytes is 0 when unknown. */ export type ProgressCallback = (loaded: number, total: number) => void; /** * The Flare inference engine. * Load a GGUF model, then use the streaming API (begin_stream / next_token) * or the batch API (generate_tokens) to run inference. */ export declare class FlareEngine { /** Load a GGUF model from raw bytes. */ static load(ggufBytes: Uint8Array): FlareEngine; /** Reset the KV cache (start a new conversation). */ reset(): void; /** Vocabulary size. */ readonly vocab_size: number; /** Number of transformer layers. */ readonly num_layers: number; /** Hidden dimension. */ readonly hidden_dim: number; /** * Auto-detected chat template name: "ChatML", "Llama3", "Alpaca", or "Raw". * Detected from the GGUF tokenizer.chat_template metadata, falling back to * architecture-based detection. */ readonly chat_template_name: string; /** * EOS token ID read from the GGUF model metadata, if present. * The generator stops automatically when this token is produced. */ readonly eos_token_id: number | undefined; /** * Format a user message and optional system prompt using the model's chat * template. Pass the result to FlareTokenizer.encode() before generating. * Pass an empty string for systemMessage to omit the system turn. */ apply_chat_template(userMessage: string, systemMessage: string): string; // --- Token-by-token streaming API --- /** * Prepare for token-by-token streaming. Runs the prefill pass on * promptTokens and initialises internal streaming state. Call engine.reset() * first to start a fresh conversation, then call next_token() in a * requestAnimationFrame loop. */ begin_stream(promptTokens: Uint32Array, maxTokens: number): void; /** * Generate the next token and return its ID, or undefined when the stream is * complete (EOS reached, maxTokens exhausted, or stop_stream() was called). * Call inside requestAnimationFrame so the browser can update the DOM between * tokens. */ next_token(): number | undefined; /** Signal the current stream to stop after the next next_token() call. */ stop_stream(): void; /** Whether the current stream has finished. */ readonly stream_done: boolean; // --- Batch generation API (returns all tokens at once) --- /** Generate tokens (greedy, temperature=0). Stops at EOS. Returns token ID array. */ generate_tokens(promptTokens: Uint32Array, maxTokens: number): Uint32Array; /** Generate tokens with sampling parameters. Stops at EOS. */ generate_with_params( promptTokens: Uint32Array, maxTokens: number, temperature: number, topP: number ): Uint32Array; } /** * Progressive loader: fetches a GGUF model from a URL with streaming download * progress, then parses and returns a FlareEngine. */ export declare class FlareProgressiveLoader { constructor(url: string); /** Fetch, stream, and parse the model. Calls onProgress as chunks arrive. */ load(onProgress: ProgressCallback): Promise; } /** * BPE tokenizer: encode text to token IDs and decode token IDs back to text. * Load from a HuggingFace tokenizer.json string. */ export declare class FlareTokenizer { /** Load from a tokenizer.json string. */ static from_json(json: string): FlareTokenizer; /** Encode text to token IDs. */ encode(text: string): Uint32Array; /** Decode token IDs to text. */ decode(tokens: Uint32Array): string; /** Decode a single token ID to text (for streaming). */ decode_one(tokenId: number): string; /** BOS token ID (may be undefined). */ readonly bos_token_id: number | undefined; /** EOS token ID (may be undefined). */ readonly eos_token_id: number | undefined; /** Vocabulary size. */ readonly vocab_size: number; } /** * BPE tokenizer: encode text to token IDs and decode token IDs back to text. * Load from a HuggingFace tokenizer.json string. */ export declare class FlareTokenizer { /** Load from a tokenizer.json string. */ static from_json(json: string): FlareTokenizer; /** Encode text to token IDs. */ encode(text: string): Uint32Array; /** Decode token IDs to text. */ decode(tokens: Uint32Array): string; /** Decode a single token ID to text (for streaming). */ decode_one(tokenId: number): string; /** BOS token ID (may be undefined). */ readonly bos_token_id: number | undefined; /** EOS token ID (may be undefined). */ readonly eos_token_id: number | undefined; /** Vocabulary size. */ readonly vocab_size: number; }