import * as _ai_sdk_provider from '@ai-sdk/provider'; import { EmbeddingModelV3 } from '@ai-sdk/provider'; import { ToolSet, generateText, StepResult, streamText } from 'ai'; import z$1, { z } from 'zod'; import { j as AICallLogger } from './interface-DMzwv0lD.js'; import { b as StageContext, C as CheckCompletionContext, a as Stage } from './stage-DHgQdIcT.js'; import { b as SuspendedStateSchema, C as CompletionCheckResult } from './types-ByGg__Kd.js'; import './events-B3XPPu0c.js'; /** * Schema Helpers and Utilities * * Provides common schemas and utilities for building type-safe workflows. * Reduces boilerplate and enforces best practices. */ /** * Constant for stages that don't need sequential input * Use when a stage receives data from workflowContext instead of the input parameter * * @example * export const myStage: Stage< * typeof NoInputSchema, // Explicit: this stage uses workflowContext * typeof OutputSchema, * typeof ConfigSchema * > = { * inputSchema: NoInputSchema, * // ... * }; */ declare const NoInputSchema: z.ZodObject<{}, z.core.$strip>; /** * Access previous stage output with guaranteed type safety * * Requires that the stage output exists, throws clear error if missing. * Use this for required dependencies on previous stages. * * @param workflowContext - The workflow context containing all previous stage outputs * @param stageId - ID of the stage to access * @param field - Optional: specific field to extract from stage output * @returns The stage output (or field within it) * @throws Error if stage or field is missing * * @example * // Get entire stage output * const extractedData = requireStageOutput( * context.workflowContext, * "data-extraction" * ); * * // Get specific field * const guidelines = requireStageOutput( * context.workflowContext, * "guidelines", * "guidelines" * ); */ declare function requireStageOutput(workflowContext: Record, stageId: string, field?: string): T; /** * Stage Factory - Simplified stage definition with auto-metrics * * Provides a `defineStage()` function that reduces boilerplate by: * - Inferring types from schemas * - Auto-calculating metrics (timing handled by executor) * - Adding fluent context helpers (require/optional) * - Supporting both sync and async-batch modes * * @example * ```typescript * export const myStage = defineStage({ * id: "my-stage", * name: "My Stage", * description: "Does something useful", * dependencies: ["previous-stage"], * * schemas: { * input: InputSchema, // or "none" for NoInputSchema * output: OutputSchema, * config: ConfigSchema, * }, * * async execute(ctx) { * const prevData = ctx.require("previous-stage"); * // ... stage logic * return { output: { ... } }; * }, * }); * ``` */ /** * Helper type to safely infer input type, handling the "none" special case */ type InferInput = TInput extends "none" ? z.infer : TInput extends z.ZodTypeAny ? z.infer : never; /** * Enhanced stage context with fluent helpers */ interface EnhancedStageContext> extends StageContext { /** * Require output from a previous stage (throws if not found) * * @example * const { extractedData } = ctx.require("data-extraction"); */ require: (stageId: K) => TContext[K]; /** * Optionally get output from a previous stage (returns undefined if not found) * * @example * const optionalData = ctx.optional("optional-stage"); * if (optionalData) { ... } */ optional: (stageId: K) => TContext[K] | undefined; } /** * Simplified execute result - just output and optional custom metrics */ interface SimpleStageResult { output: TOutput; /** * Custom metrics specific to this stage (e.g., itemsProcessed, sectionsFound) * Timing metrics (startTime, endTime, duration) are auto-calculated by executor * AI metrics should be added here by stages that create their own AIHelper */ customMetrics?: Record; /** * Optional artifacts to store */ artifacts?: Record; } /** * Simplified suspended result - metrics are auto-filled by the factory */ interface SimpleSuspendedResult { suspended: true; state: { batchId: string; submittedAt: string; pollInterval: number; maxWaitTime: number; metadata?: Record; apiKey?: string; }; pollConfig: { pollInterval: number; maxWaitTime: number; nextPollAt: Date; }; /** * Optional custom metrics (timing & AI metrics are auto-filled) */ customMetrics?: Record; } /** * Sync stage definition */ interface SyncStageDefinition = Record, TId extends string = string> { /** Unique stage identifier */ id: TId; /** Human-readable name */ name: string; /** Optional description */ description?: string; /** Stage IDs this stage depends on (validated at workflow build time) */ dependencies?: string[]; /** Zod schemas for validation */ schemas: { /** Input schema, or "none" for stages that use workflowContext */ input: TInput; /** Output schema */ output: TOutput; /** Configuration schema */ config: TConfig; }; /** * Execute the stage logic * Return just { output } - metrics are auto-calculated */ execute: (ctx: EnhancedStageContext, z.infer, TContext>) => Promise>>; /** * Optional: Estimate cost before execution */ estimateCost?: (input: InferInput, config: z.infer) => number; } /** * Async-batch stage definition (for long-running batch jobs) */ interface AsyncBatchStageDefinition = Record, TId extends string = string> extends Omit, "execute"> { /** Mark as async-batch mode */ mode: "async-batch"; /** * Execute the stage - either return result or suspend for batch processing * * When resuming from suspension, ctx.resumeState contains the suspended state. * Check this to determine whether to submit a new batch or fetch results. * * Return SimpleSuspendedResult when suspending - metrics will be auto-filled. */ execute: (ctx: EnhancedStageContext, z.infer, TContext>) => Promise> | SimpleSuspendedResult>; /** * Check if the batch job is complete * Called by the orchestrator when polling suspended stages * * Context includes workflowRunId, stageId, config, log, and storage * so you don't need to store these in metadata. */ checkCompletion: (suspendedState: z.infer, context: CheckCompletionContext>) => Promise>>; } /** * Define a sync stage with simplified API */ declare function defineStage = Record>(definition: SyncStageDefinition): Stage; /** * Define an async-batch stage with simplified API */ declare function defineStage = Record>(definition: AsyncBatchStageDefinition): Stage; /** * Define an async-batch stage with proper type inference for checkCompletion * * This is a dedicated function (not an alias) to ensure TypeScript properly * infers callback parameter types without overload resolution ambiguity. */ declare function defineAsyncBatchStage = Record>(definition: AsyncBatchStageDefinition): Stage; /** * Model Helper - Centralized model selection and cost tracking for AI scripts */ interface ModelConfig { id: string; name: string; inputCostPerMillion: number; outputCostPerMillion: number; provider: string; description?: string; supportsAsyncBatch?: boolean; batchDiscountPercent?: number; isEmbeddingModel?: boolean; supportsTools?: boolean; supportsStructuredOutputs?: boolean; contextLength?: number; maxCompletionTokens?: number | null; } /** * Filter options for listModels() */ interface ModelFilter { /** Only include embedding models */ isEmbeddingModel?: boolean; /** Only include models that support function calling */ supportsTools?: boolean; /** Only include models that support structured outputs */ supportsStructuredOutputs?: boolean; /** Only include models that support async batch */ supportsAsyncBatch?: boolean; } /** * Configuration for workflow-engine.models.ts sync config */ interface ModelSyncConfig { /** Only include models matching these patterns (applied before exclude) */ include?: (string | RegExp)[]; /** Output path relative to consumer's project root (default: src/generated/models.ts) */ outputPath?: string; /** Patterns to exclude models (string for exact match, RegExp for pattern) */ exclude?: (string | RegExp)[]; /** Custom models to add (embeddings, rerankers, etc.) */ customModels?: Record; } /** * Model Registry - augmented by consumer's generated file for autocomplete * Import the generated file to populate this interface */ interface ModelRegistry { } /** * Register models at runtime (called by generated file) */ declare function registerModels(models: Record): void; /** * Get a model from the runtime registry */ declare function getRegisteredModel(key: string): ModelConfig | undefined; /** * List all registered models */ declare function listRegisteredModels(): Array<{ key: string; config: ModelConfig; }>; interface ModelStats { modelId: string; modelName: string; apiCalls: number; inputTokens: number; outputTokens: number; totalTokens: number; inputCost: number; outputCost: number; totalCost: number; } /** * Static enum for built-in models - provides .enum accessor for AVAILABLE_MODELS keys */ declare const ModelKeyEnum: z$1.ZodEnum<{ "gemini-2.5-flash": "gemini-2.5-flash"; }>; /** * Type representing all available model keys * Supports both built-in enum keys AND dynamically registered keys via ModelRegistry */ type ModelKey = z$1.infer | keyof ModelRegistry; /** * Zod schema that validates model keys against both the static enum AND the runtime registry * Use ModelKey.parse() to validate and type model key strings */ declare const ModelKey: z$1.ZodPipe>; /** * Available AI models with their configurations * Prices should be updated regularly from provider pricing pages */ declare const AVAILABLE_MODELS: Record; /** * Default model selection * Change this to switch the default model across all scripts */ declare const DEFAULT_MODEL_KEY: ModelKey; /** * Get a model configuration by key * Checks both built-in AVAILABLE_MODELS and runtime MODEL_REGISTRY */ declare function getModel(key: ModelKey): ModelConfig; /** * Get the default model configuration */ declare function getDefaultModel(): ModelConfig; /** * List all available models (built-in + registered) * @param filter Optional filter to narrow down models by capability */ declare function listModels(filter?: ModelFilter): Array<{ key: string; config: ModelConfig; }>; /** * Check if a model supports async batch processing */ declare function modelSupportsBatch(modelKey: ModelKey): boolean; /** * Interface for model with bound recording function * Useful for parallel execution where you want to pass model + recordCall together */ interface ModelWithRecorder { id: string; name: string; recordCall: (inputTokens: number, outputTokens: number) => void; } /** * Get model by key with bound recordCall function * Perfect for parallel execution - no need to write model name twice * * Usage: * const model = getModelById("gemini-2.5-flash", modelTracker); * const result = await generateText({ * model: openRouter(model.id), * prompt: "...", * }); * model.recordCall(result.usage.inputTokens, result.usage.outputTokens); */ declare function getModelById(modelKey: ModelKey, tracker?: ModelStatsTracker): ModelWithRecorder; /** * Calculate costs based on token usage */ declare function calculateCost(modelKey: ModelKey, inputTokens: number, outputTokens: number): { inputCost: number; outputCost: number; totalCost: number; }; /** * Model stats tracker class - tracks single model OR aggregates multiple models */ declare class ModelStatsTracker { private modelKey?; private modelConfig?; private stats; private perModelStats; private isAggregating; constructor(modelKey?: ModelKey); /** * Create an aggregating tracker that combines stats from multiple models * Perfect for parallel execution where different calls use different models */ static createAggregating(): ModelStatsTracker; /** * Get a model helper with bound recordCall for parallel execution * Perfect for running multiple AI calls in parallel with different models * * Usage: * const flashModel = tracker.getModelById("gemini-2.5-flash"); * const liteModel = tracker.getModelById("gemini-2.5-flash-lite"); * * const [result1, result2] = await Promise.all([ * generateText({ * model: openRouter(flashModel.id), * prompt: prompt1, * }).then(r => { flashModel.recordCall(r.usage.inputTokens, r.usage.outputTokens); return r; }), * generateText({ * model: openRouter(liteModel.id), * prompt: prompt2, * }).then(r => { liteModel.recordCall(r.usage.inputTokens, r.usage.outputTokens); return r; }), * ]); */ getModelById(modelKey: ModelKey): { id: string; name: string; recordCall: (inputTokens: number, outputTokens: number) => void; }; /** * Record an API call with token usage * * For sequential execution: * tracker.switchModel("gemini-2.5-flash") * tracker.recordCall(inputTokens, outputTokens) * * For parallel execution: * tracker.recordCall(inputTokens, outputTokens, "gemini-2.5-flash") * tracker.recordCall(inputTokens, outputTokens, "gemini-2.5-pro") */ recordCall(inputTokens?: number, outputTokens?: number, modelKeyOverride?: ModelKey): void; /** * Estimate cost for a prompt without making an API call * Useful for dry-run mode to preview costs * * Note: This method is async because it lazy-loads the tiktoken library * to avoid bundling 2MB of tokenizer data for browser clients. * * @param prompt - The prompt text to estimate * @param estimatedOutputTokens - Estimated number of output tokens (default: 500) * @returns Object with token counts and cost estimates */ estimateCost(prompt: string, estimatedOutputTokens?: number): Promise<{ inputTokens: number; outputTokens: number; totalTokens: number; inputCost: number; outputCost: number; totalCost: number; }>; /** * Get current statistics (single model or aggregated) * Returns null only if tracker is in aggregating mode - use getAggregatedStats() instead */ getStats(): ModelStats | null; /** * Get aggregated statistics from all models */ getAggregatedStats(): { perModel: ModelStats[]; totals: { totalApiCalls: number; totalInputTokens: number; totalOutputTokens: number; totalTokens: number; totalInputCost: number; totalOutputCost: number; totalCost: number; }; }; /** * Print statistics to console */ printStats(): void; /** * Print aggregated statistics from all models */ printAggregatedStats(): void; /** * Reset statistics */ reset(): void; } /** * Print available models to console */ declare function printAvailableModels(): void; /** * Register a custom embedding provider factory. * * Consumers can register any AI SDK community provider (Voyage, Cohere, Jina, etc.) * without modifying the workflow engine. The factory receives the model ID and must * return an EmbeddingModelV3 instance. * * @example * ```typescript * import { registerEmbeddingProvider } from "@bratsos/workflow-engine"; * import { voyage } from "voyage-ai-provider"; * * registerEmbeddingProvider("voyage", (modelId) => voyage.embeddingModel(modelId)); * ``` */ declare function registerEmbeddingProvider(providerName: string, factory: (modelId: string) => EmbeddingModelV3): void; /** * Custom provider resolver. Given a ModelConfig, return an AI SDK * LanguageModel to use, or null/undefined to fall back to built-in resolution. */ type ProviderResolver = (modelConfig: ModelConfig) => _ai_sdk_provider.LanguageModelV3 | null | undefined; type AICallType = "text" | "object" | "embed" | "stream" | "batch"; interface AITextResult { text: string; inputTokens: number; outputTokens: number; cost: number; /** Structured output when experimental_output is used */ output?: any; /** * Reasoning/thinking text emitted by the model, when available. Reasoning * models (e.g. via `providerOptions.anthropic.thinking` or OpenRouter's * `reasoning`) emit on a separate channel; this surfaces it. It is NOT part * of `text` (the answer). Undefined when the model produced no reasoning. */ reasoning?: string; } interface AIObjectResult { object: T; inputTokens: number; outputTokens: number; cost: number; } interface AIEmbedResult { embedding: number[]; embeddings: number[][]; dimensions: number; inputTokens: number; cost: number; } type AISDKStreamResult = ReturnType; interface AIStreamResult { stream: AsyncIterable; getUsage(): Promise<{ inputTokens: number; outputTokens: number; cost: number; }>; /** * The full answer text, after the stream completes. Consumes the stream if * not already consumed. When the model streamed nothing on the text channel * (e.g. a reasoning model), this reconciles against the buffered final text, * so a text answer is never lost. May be `""` for a reasoning-only response — * in that case the content is in {@link getReasoning}. */ getText(): Promise; /** * The model's reasoning/thinking text, when available. Reasoning is a * separate channel from the answer; a model can reason without emitting any * answer text (in which case `getText()` is empty but this is not). Resolves * to `undefined` when the model produced no reasoning. */ getReasoning(): Promise; /** The raw AI SDK result - use this for methods like toUIMessageStreamResponse */ rawResult: AISDKStreamResult; } /** * Context for logging to workflow persistence (optional). * When provided, batch operations can log to the database. */ interface LogContext { workflowRunId: string; stageRecordId: string; /** Function to create a log entry in persistence */ createLog: (data: { workflowRunId: string; workflowStageId: string; level: "DEBUG" | "INFO" | "WARN" | "ERROR"; message: string; metadata?: Record; }) => Promise; } /** Log function type for batch operations */ type BatchLogFn = (level: "DEBUG" | "INFO" | "WARN" | "ERROR", message: string, meta?: Record) => void; interface TextOptions { temperature?: number; maxTokens?: number; /** Tool definitions for the model to use */ tools?: TTools; /** Tool choice: 'auto' (default), 'required' (force tool use), 'none', or specific tool name */ toolChoice?: Parameters[0]["toolChoice"]; /** Condition to stop tool execution (e.g., stepCountIs(3)) */ stopWhen?: Parameters[0]["stopWhen"]; /** Callback fired when each step completes (for collecting tool results) */ onStepFinish?: (stepResult: StepResult) => Promise | void; /** Experimental structured output - use with tools for combined tool calling + structured output */ experimental_output?: Parameters[0]["experimental_output"]; /** * Provider-specific options passed directly to the AI SDK call. Use this to * control reasoning per call, e.g. * `{ openrouter: { reasoning: { enabled: false } } }` or * `{ anthropic: { thinking: { type: "disabled" } } }`. */ providerOptions?: Record>; } interface ObjectOptions { temperature?: number; maxTokens?: number; /** Tool definitions for the model to use */ tools?: TTools; /** Condition to stop tool execution (e.g., stepCountIs(3)) */ stopWhen?: Parameters[0]["stopWhen"]; /** Callback fired when each step completes (for collecting tool results) */ onStepFinish?: (stepResult: StepResult) => Promise | void; /** * Provider-specific options passed directly to the AI SDK call (e.g. * `{ anthropic: { thinking: { type: "disabled" } } }`). */ providerOptions?: Record>; } interface EmbedOptions { taskType?: "RETRIEVAL_QUERY" | "RETRIEVAL_DOCUMENT" | "SEMANTIC_SIMILARITY"; /** Override the default embedding dimensions (from embedding-config.ts) */ dimensions?: number; /** Provider-specific options passed directly to the AI SDK's embed() call */ providerOptions?: Record>; } interface StreamOptions { temperature?: number; maxTokens?: number; onChunk?: (chunk: string) => void; /** Tool definitions for the model to use */ tools?: Parameters[0]["tools"]; /** Condition to stop tool execution (e.g., stepCountIs(3)) */ stopWhen?: Parameters[0]["stopWhen"]; /** Callback fired when each step completes (for collecting tool results) */ onStepFinish?: Parameters[0]["onStepFinish"]; /** * Provider-specific options passed directly to the AI SDK call. Use this to * control reasoning per call, e.g. * `{ openrouter: { reasoning: { enabled: false } } }` or * `{ anthropic: { thinking: { type: "disabled" } } }`. */ providerOptions?: Record>; } interface MediaPart { type: "file"; data: Buffer | Uint8Array | string; mediaType: string; filename?: string; } interface TextPart { type: "text"; text: string; } type ContentPart = TextPart | MediaPart; type TextInput = string | ContentPart[]; type StreamTextInput = { prompt: string; messages?: never; system?: string; } | { messages: Parameters[0]["messages"]; prompt?: never; system?: string; }; /** Provider identifier for batch operations */ type AIBatchProvider = "google" | "anthropic" | "openai"; /** A request to be processed in a batch */ interface AIBatchRequest { /** Unique identifier for this request (used to match results) */ id: string; /** The prompt to send to the model */ prompt: string; /** Optional Zod schema for structured JSON output */ schema?: z.ZodTypeAny; } /** Result of a single request in a batch */ interface AIBatchResult { /** The request ID (matches the id from AIBatchRequest) */ id: string; /** Original prompt (may be empty if not available from provider) */ prompt: string; /** The parsed result (JSON object if schema was provided, otherwise string) */ result: T; /** Input tokens used */ inputTokens: number; /** Output tokens used */ outputTokens: number; /** Status of this individual result */ status: "succeeded" | "failed"; /** Error message if status is "failed" */ error?: string; } /** Handle for tracking a submitted batch */ interface AIBatchHandle { /** Batch identifier from the provider */ id: string; /** Current status of the batch */ status: "pending" | "processing" | "completed" | "failed"; /** The provider used for this batch (for resume support) */ provider?: AIBatchProvider; } /** Interface for batch operations on an AI model */ interface AIBatch { /** Submit requests for batch processing */ submit(requests: AIBatchRequest[]): Promise; /** Check the status of a batch */ getStatus(batchId: string): Promise; /** Retrieve results from a completed batch */ getResults(batchId: string, metadata?: Record): Promise[]>; /** Check if results have been recorded for this batch */ isRecorded(batchId: string): Promise; /** Record batch results manually when batch provider integration is not implemented */ recordResults(batchId: string, results: AIBatchResult[]): Promise; } interface RecordCallParams { modelKey: ModelKey; callType: AICallType; prompt: string; response: string; inputTokens: number; outputTokens: number; metadata?: Record; } interface AIHelperStats { totalCalls: number; totalInputTokens: number; totalOutputTokens: number; totalCost: number; perModel: Record; } interface AIHelper { /** Current topic path */ readonly topic: string; generateText(modelKey: ModelKey, prompt: TextInput, options?: TextOptions): Promise; generateObject(modelKey: ModelKey, prompt: TextInput, schema: TSchema, options?: ObjectOptions): Promise>>; embed(modelKey: ModelKey, text: string | string[], options?: EmbedOptions): Promise; streamText(modelKey: ModelKey, input: StreamTextInput, options?: StreamOptions): AIStreamResult; batch(modelKey: ModelKey, provider?: AIBatchProvider): AIBatch; createChild(segment: string, id?: string): AIHelper; recordCall(params: RecordCallParams): void; recordCall(modelKey: ModelKey, prompt: string, response: string, tokens: { input: number; output: number; }, options?: { callType?: AICallType; isBatch?: boolean; metadata?: Record; }): void; getStats(): Promise; } /** * Create an AI helper instance with topic-based tracking. * * @param topic - Initial topic path (e.g., "workflow.abc123" or "reranker") * @returns AIHelper instance * * @example * ```typescript * // Simple topic * const ai = createAIHelper("reranker"); * * // Hierarchical topic * const ai = createAIHelper("workflow.abc123", logger) * .createChild("stage", "extraction"); * // topic: "workflow.abc123.stage.extraction" * * // Use AI methods * const result = await ai.generateText("gemini-2.5-flash", prompt); * ``` */ declare function createAIHelper(topic: string, logger: AICallLogger, logContext?: LogContext, providerResolver?: ProviderResolver): AIHelper; export { type AIBatch as A, type BatchLogFn as B, getModelById as C, DEFAULT_MODEL_KEY as D, type EmbedOptions as E, getRegisteredModel as F, listModels as G, listRegisteredModels as H, type InferInput as I, modelSupportsBatch as J, printAvailableModels as K, type LogContext as L, ModelKey as M, NoInputSchema as N, type ObjectOptions as O, registerEmbeddingProvider as P, registerModels as Q, type RecordCallParams as R, type SimpleStageResult as S, type TextOptions as T, requireStageOutput as U, type ModelFilter as V, type AIBatchHandle as a, type AIBatchProvider as b, type AIBatchRequest as c, type AIBatchResult as d, type AICallType as e, type AIEmbedResult as f, type AIHelper as g, type AIObjectResult as h, type AIStreamResult as i, type AITextResult as j, AVAILABLE_MODELS as k, type AsyncBatchStageDefinition as l, type EnhancedStageContext as m, type ModelConfig as n, type ModelRegistry as o, type ModelStats as p, ModelStatsTracker as q, type ModelSyncConfig as r, type StreamOptions as s, type SyncStageDefinition as t, calculateCost as u, createAIHelper as v, defineAsyncBatchStage as w, defineStage as x, getDefaultModel as y, getModel as z };