import { BaseDocumentTransformer, Document } from "@langchain/core/documents"; import * as tiktoken from "js-tiktoken"; //#region src/text_splitter.d.ts interface TextSplitterParams { chunkSize: number; chunkOverlap: number; keepSeparator: boolean; lengthFunction?: ((text: string) => number) | ((text: string) => Promise); } type TextSplitterChunkHeaderOptions = { chunkHeader?: string; chunkOverlapHeader?: string; appendChunkOverlapHeader?: boolean; }; declare abstract class TextSplitter extends BaseDocumentTransformer implements TextSplitterParams { lc_namespace: string[]; chunkSize: number; chunkOverlap: number; keepSeparator: boolean; lengthFunction: ((text: string) => number) | ((text: string) => Promise); constructor(fields?: Partial); transformDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; abstract splitText(text: string): Promise; protected splitOnSeparator(text: string, separator: string): string[]; createDocuments(texts: string[], // eslint-disable-next-line @typescript-eslint/no-explicit-any metadatas?: Record[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; private numberOfNewLines; splitDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; private joinDocs; mergeSplits(splits: string[], separator: string): Promise; } interface CharacterTextSplitterParams extends TextSplitterParams { separator: string; } declare class CharacterTextSplitter extends TextSplitter implements CharacterTextSplitterParams { static lc_name(): string; separator: string; constructor(fields?: Partial); splitText(text: string): Promise; } interface RecursiveCharacterTextSplitterParams extends TextSplitterParams { separators: string[]; } declare const SupportedTextSplitterLanguages: readonly ["cpp", "go", "java", "js", "php", "proto", "python", "rst", "ruby", "rust", "scala", "swift", "markdown", "latex", "html", "sol"]; type SupportedTextSplitterLanguage = (typeof SupportedTextSplitterLanguages)[number]; declare class RecursiveCharacterTextSplitter extends TextSplitter implements RecursiveCharacterTextSplitterParams { static lc_name(): string; separators: string[]; constructor(fields?: Partial); private _splitText; splitText(text: string): Promise; static fromLanguage(language: SupportedTextSplitterLanguage, options?: Partial): RecursiveCharacterTextSplitter; static getSeparatorsForLanguage(language: SupportedTextSplitterLanguage): string[]; } interface TokenTextSplitterParams extends TextSplitterParams { encodingName: tiktoken.TiktokenEncoding; allowedSpecial: "all" | Array; disallowedSpecial: "all" | Array; } /** * Implementation of splitter which looks at tokens. */ declare class TokenTextSplitter extends TextSplitter implements TokenTextSplitterParams { static lc_name(): string; encodingName: tiktoken.TiktokenEncoding; allowedSpecial: "all" | Array; disallowedSpecial: "all" | Array; private tokenizer; constructor(fields?: Partial); splitText(text: string): Promise; } type MarkdownTextSplitterParams = TextSplitterParams; declare class MarkdownTextSplitter extends RecursiveCharacterTextSplitter implements MarkdownTextSplitterParams { constructor(fields?: Partial); } type LatexTextSplitterParams = TextSplitterParams; declare class LatexTextSplitter extends RecursiveCharacterTextSplitter implements LatexTextSplitterParams { constructor(fields?: Partial); } //#endregion export { CharacterTextSplitter, CharacterTextSplitterParams, LatexTextSplitter, LatexTextSplitterParams, MarkdownTextSplitter, MarkdownTextSplitterParams, RecursiveCharacterTextSplitter, RecursiveCharacterTextSplitterParams, SupportedTextSplitterLanguage, SupportedTextSplitterLanguages, TextSplitter, TextSplitterChunkHeaderOptions, TextSplitterParams, TokenTextSplitter, TokenTextSplitterParams }; //# sourceMappingURL=text_splitter.d.ts.map