import type { SomeJSONSchema } from 'ajv/dist/types/json-schema'; import type { Sharp } from 'sharp'; import type { BuiltInEngineName } from '../engines/index.js'; import type { Logger } from '../lib/logger.js'; import type { ModelPool } from '../pool.js'; import type { ModelStore } from '../store.js'; import { AssistantMessage, ChatMessage, CompletionFinishReason, TextCompletionParams, ToolDefinition } from '../types/completions.js'; import type { ContextShiftStrategy } from '../engines/node-llama-cpp/types.js'; import type { StableDiffusionWeightType, StableDiffusionSamplingMethod, StableDiffusionSchedule } from '../engines/stable-diffusion-cpp/types.js'; import type { TransformersJsModelClass, TransformersJsTokenizerClass, TransformersJsProcessorClass, TransformersJsDataType } from '../engines/transformers-js/types.js'; export * from '../types/completions.js'; export type ModelTaskType = 'text-completion' | 'embedding' | 'image-to-text' | 'image-to-image' | 'text-to-image' | 'speech-to-text'; export interface ModelOptionsBase { engine: BuiltInEngineName | (string & {}); task: ModelTaskType | (string & {}); prepare?: 'blocking' | 'async' | 'on-demand'; minInstances?: number; maxInstances?: number; location?: string; } export interface BuiltInModelOptionsBase extends ModelOptionsBase { engine: BuiltInEngineName; task: ModelTaskType; url?: string; location?: string; } export interface ModelConfigBase extends ModelOptionsBase { id: string; minInstances: number; maxInstances: number; modelsCachePath: string; } export interface ModelConfig extends ModelConfigBase { url?: string; location?: string; task: ModelTaskType | (string & {}); engine: BuiltInEngineName | (string & {}); ttl?: number; prefix?: string; initialMessages?: ChatMessage[]; device?: { gpu?: boolean | 'auto' | (string & {}); }; } export interface CompletionChunk { tokens: number[]; text: string; } export interface ProcessingOptions { timeout?: number; signal?: AbortSignal; } export interface Image { handle: Sharp; width: number; height: number; channels: 1 | 2 | 3 | 4; } export interface CompletionProcessingOptions extends ProcessingOptions { onChunk?: (chunk: CompletionChunk) => void; } export interface SpeechToTextProcessingOptions extends ProcessingOptions { onChunk?: (chunk: { text: string; }) => void; } export interface EngineContext { config: TModelConfig; meta?: TModelMeta; log: Logger; } export interface TextCompletionRequestBase extends TextCompletionParams { model: string; stream?: boolean; } export interface TextCompletionRequest extends TextCompletionRequestBase { prompt?: string; } export interface ChatCompletionRequest extends TextCompletionRequestBase { messages: ChatMessage[]; grammar?: string; tools?: Record; } export interface TextEmbeddingInput { type: 'text'; content: string; } export interface ImageEmbeddingInput { type: 'image'; content: Image; } export type EmbeddingInput = TextEmbeddingInput | ImageEmbeddingInput | string; export interface EmbeddingRequest { model: string; input: EmbeddingInput | EmbeddingInput[]; dimensions?: number; pooling?: 'cls' | 'mean'; } export interface ImageToTextRequest { model: string; image: Image; prompt?: string; maxTokens?: number; } export interface StableDiffusionRequest { negativePrompt?: string; guidance?: number; styleRatio?: number; strength?: number; sampleSteps?: number; batchCount?: number; samplingMethod?: StableDiffusionSamplingMethod; cfgScale?: number; controlStrength?: number; } export interface TextToImageRequest extends StableDiffusionRequest { model: string; prompt: string; width?: number; height?: number; seed?: number; } export interface ImageToImageRequest extends StableDiffusionRequest { model: string; image: Image; prompt: string; width?: number; height?: number; seed?: number; } export interface SpeechToTextRequest { model: string; url?: string; file?: string; language?: string; prompt?: string; maxTokens?: number; } export interface ModelRequestMeta { sequence: number; abortController: AbortController; } export type IncomingRequest = TextCompletionRequest | ChatCompletionRequest | EmbeddingRequest | ImageToTextRequest | SpeechToTextRequest; export type ModelInstanceRequest = ModelRequestMeta & IncomingRequest; export interface EngineTextCompletionArgs extends EngineContext { onChunk?: (chunk: CompletionChunk) => void; resetContext?: boolean; request: TextCompletionRequest; } export interface EngineChatCompletionArgs extends EngineContext { onChunk?: (chunk: CompletionChunk) => void; resetContext?: boolean; request: ChatCompletionRequest; } export interface EngineEmbeddingArgs extends EngineContext { request: EmbeddingRequest; } export interface EngineImageToTextArgs extends EngineContext { request: ImageToTextRequest; } export interface EngineTextToImageArgs extends EngineContext { request: TextToImageRequest; } export interface EngineImageToImageArgs extends EngineContext { request: ImageToImageRequest; } export interface EngineSpeechToTextArgs extends EngineContext { request: SpeechToTextRequest; onChunk?: (chunk: { text: string; }) => void; } export interface FileDownloadProgress { file: string; loadedBytes: number; totalBytes: number; } export interface EngineStartContext { pool: ModelPool; store: ModelStore; } export interface ModelEngine { autoGpu?: boolean; start?: (ctx: EngineStartContext) => Promise; prepareModel: (ctx: EngineContext, onProgress?: (progress: FileDownloadProgress) => void, signal?: AbortSignal) => Promise; createInstance: (ctx: EngineContext, signal?: AbortSignal) => Promise; disposeInstance: (instance: TInstance) => Promise; processChatCompletionTask?: (args: EngineChatCompletionArgs, instance: TInstance, signal?: AbortSignal) => Promise; processTextCompletionTask?: (args: EngineTextCompletionArgs, instance: TInstance, signal?: AbortSignal) => Promise; processEmbeddingTask?: (args: EngineEmbeddingArgs, instance: TInstance, signal?: AbortSignal) => Promise; processImageToTextTask?: (args: EngineImageToTextArgs, instance: TInstance, signal?: AbortSignal) => Promise; processSpeechToTextTask?: (args: EngineSpeechToTextArgs, instance: TInstance, signal?: AbortSignal) => Promise; processTextToImageTask?: (args: EngineTextToImageArgs, instance: TInstance, signal?: AbortSignal) => Promise; processImageToImageTask?: (args: EngineImageToImageArgs, instance: TInstance, signal?: AbortSignal) => Promise; } interface EmbeddingModelOptions { task: 'embedding'; } export type TextCompletionGrammar = string | SomeJSONSchema; interface TextCompletionModelOptions { task: 'text-completion'; contextSize?: number; grammars?: Record; completionDefaults?: TextCompletionParams; initialMessages?: ChatMessage[]; prefix?: string; batchSize?: number; } interface LlamaCppModelOptionsBase extends BuiltInModelOptionsBase { engine: 'node-llama-cpp'; task: 'text-completion' | 'embedding'; sha256?: string; batchSize?: number; contextShiftStrategy?: ContextShiftStrategy; tools?: { definitions: Record; includeParamsDocumentation?: boolean; parallelism?: number; }; device?: { gpu?: boolean | 'auto' | (string & {}); gpuLayers?: number; cpuThreads?: number; memLock?: boolean; }; } interface LlamaCppEmbeddingModelOptions extends LlamaCppModelOptionsBase, EmbeddingModelOptions { task: 'embedding'; } export interface LlamaCppTextCompletionModelOptions extends LlamaCppModelOptionsBase, TextCompletionModelOptions { task: 'text-completion'; } interface GPT4AllModelOptions extends BuiltInModelOptionsBase { engine: 'gpt4all'; task: 'text-completion' | 'embedding'; md5?: string; device?: { gpu?: boolean | 'auto' | (string & {}); gpuLayers?: number; cpuThreads?: number; }; } type GPT4AllTextCompletionModelOptions = TextCompletionModelOptions & GPT4AllModelOptions; type GPT4AllEmbeddingModelOptions = GPT4AllModelOptions & EmbeddingModelOptions; export interface TransformersJsModel { processor?: { url?: string; file?: string; }; processorClass?: TransformersJsProcessorClass; tokenizerClass?: TransformersJsTokenizerClass; modelClass?: TransformersJsModelClass; dtype?: Record | TransformersJsDataType; } interface TransformersJsModelOptions extends BuiltInModelOptionsBase { engine: 'transformers-js'; task: 'image-to-text' | 'speech-to-text' | 'text-completion' | 'embedding'; textModel?: TransformersJsModel; visionModel?: TransformersJsModel; speechModel?: TransformersJsModel; device?: { gpu?: boolean | 'auto' | (string & {}); }; } export interface ModelFileSource { url?: string; file?: string; sha256?: string; } interface StableDiffusionModelOptions extends BuiltInModelOptionsBase { engine: 'stable-diffusion-cpp'; task: 'image-to-text' | 'text-to-image' | 'image-to-image'; sha256?: string; url?: string; diffusionModel?: boolean; vae?: ModelFileSource; clipL?: ModelFileSource; clipG?: ModelFileSource; t5xxl?: ModelFileSource; taesd?: ModelFileSource; controlNet?: ModelFileSource; samplingMethod?: StableDiffusionSamplingMethod; weightType?: StableDiffusionWeightType; schedule?: StableDiffusionSchedule; loras?: ModelFileSource[]; } export interface CustomEngineModelOptions extends ModelOptionsBase { } export type BuiltInModelOptions = LlamaCppTextCompletionModelOptions | LlamaCppEmbeddingModelOptions | GPT4AllTextCompletionModelOptions | GPT4AllEmbeddingModelOptions | TransformersJsModelOptions | StableDiffusionModelOptions; export type ModelOptions = BuiltInModelOptions | CustomEngineModelOptions; export interface EngineEmbeddingResult { embeddings: Float32Array[]; inputTokens: number; } export interface ChatCompletionResult extends EngineChatCompletionResult { id: string; model: string; } export interface EngineChatCompletionResult { message: AssistantMessage; finishReason: CompletionFinishReason; promptTokens: number; completionTokens: number; contextTokens: number; } export interface EngineTextCompletionResult { text: string; finishReason?: CompletionFinishReason; promptTokens: number; completionTokens: number; contextTokens: number; } export interface EngineImageToTextResult { text: string; } export interface EngineTextToImageResult { images: Image[]; seed: number; } export interface EngineImageToImageResult { images: Image[]; seed: number; } export interface EngineSpeechToTextResult { text: string; }