/** * @license * Copyright 2025 Steven Roussey * SPDX-License-Identifier: Apache-2.0 */ import { CreateWorkflow, IExecuteContext, Task } from "@workglow/task-graph"; import type { IRunConfig, TaskConfig } from "@workglow/task-graph"; import { DataPortSchema } from "@workglow/util/schema"; import type { Capability } from "../capability/Capabilities"; export declare const ChunkingStrategy: { readonly FIXED: "fixed"; readonly SENTENCE: "sentence"; readonly PARAGRAPH: "paragraph"; readonly SEMANTIC: "semantic"; }; export type ChunkingStrategy = (typeof ChunkingStrategy)[keyof typeof ChunkingStrategy]; export type TextChunkerTaskInput = { doc_id?: string | undefined; strategy?: "fixed" | "sentence" | "paragraph" | "semantic" | undefined; chunkSize?: number | undefined; chunkOverlap?: number | undefined; text: string; }; export type TextChunkerTaskOutput = { doc_id?: string | undefined; text: string[]; chunks: { [x: string]: unknown; leafNodeId?: string | undefined; summary?: string | undefined; entities?: { type: string; text: string; score: number; }[] | undefined; parentSummaries?: string[] | undefined; sectionTitles?: string[] | undefined; doc_title?: string | undefined; text: string; doc_id: string; chunkId: string; nodePath: string[]; depth: number; }[]; count: number; }; export type TextChunkerTaskConfig = TaskConfig; /** * Task for chunking plain text into smaller segments with configurable strategies. * Emits `ChunkRecord[]` so the output is interchangeable with HierarchicalChunkerTask * and can feed directly into TextEmbeddingTask → ChunkVectorUpsertTask. * * Deterministic: identical inputs produce identical `chunkId`s (no random UUIDs), * so this task is safe to mark cacheable. */ export declare class TextChunkerTask extends Task { static type: string; /** Pure-compute chunking task — no provider capability required. */ static readonly requires: readonly Capability[]; static category: string; static title: string; static description: string; static cacheable: boolean; static inputSchema(): DataPortSchema; static outputSchema(): DataPortSchema; execute(input: TextChunkerTaskInput, context: IExecuteContext): Promise; /** Fixed-size chunking with overlap */ private chunkFixed; /** Sentence-based chunking that respects sentence boundaries */ private chunkBySentence; /** Paragraph-based chunking that respects paragraph boundaries */ private chunkByParagraph; } export declare const textChunker: (input: TextChunkerTaskInput, config?: TextChunkerTaskConfig, runConfig?: Partial) => Promise; declare module "@workglow/task-graph" { interface Workflow { textChunker: CreateWorkflow; } } //# sourceMappingURL=TextChunkerTask.d.ts.map