import type { Context } from "../context.ts"; import { type SnakeToCamel } from "../util/snake-to-camel.ts"; import { type AiSearchNamespace } from "./ai-search-namespace.ts"; import { AiSearchToken } from "./ai-search-token.ts"; import { type CloudflareApi, type CloudflareApiOptions } from "./api.ts"; import { type R2Bucket, type R2BucketJurisdiction } from "./bucket.ts"; interface BaseAiSearchProps extends CloudflareApiOptions { /** * Name of the AI Search instance * @default `${app}-${stage}-${id}` * @minLength 1 * @maxLength 32 */ name?: string; /** * Data source for indexing. * * Accepts three forms: * - **R2Bucket (shorthand)**: pass an `R2Bucket` resource directly for * default indexing. `prefix`, `includePaths`, and `excludePaths` cannot * be set in this form — use the full R2 config form below. * - **R2 config**: `{ type: "r2", bucket, prefix?, includePaths?, excludePaths?, jurisdiction? }`. * - **Web crawler**: `{ type: "web-crawler", domain, ... }`. * * When omitted, creates a built-in storage instance for manual file uploads * (via the Items API or the AI Search binding). */ source?: R2Bucket | AiSearchR2Source | AiSearchWebCrawlerSource; /** * The namespace this instance belongs to. * Can be a namespace name string or an AiSearchNamespace resource. * * @remarks * Single-instance Worker bindings (`bindings: { MY: aiSearch }`) can only * bind instances in the `default` namespace. To bind instances in a * non-default namespace, use an `AiSearchNamespace` binding instead and * access the instance via `env.NS.get(name)`. * * Changing `namespace` on an existing instance triggers a replace * (delete + create) because namespaces are immutable on the Cloudflare * side. * * @default "default" */ namespace?: string | AiSearchNamespace; /** * Controls which storage backends are used during indexing. * Defaults to vector-only. Set both `vector` and `keyword` to `true` for hybrid search. */ indexMethod?: { vector?: boolean; keyword?: boolean; }; /** * Fusion method for combining vector and keyword results. * * @default "rrf" */ fusionMethod?: "max" | "rrf"; /** * Text generation model for AI responses * * @default "@cf/meta/llama-3.3-70b-instruct-fp8-fast" */ aiSearchModel?: AiSearch.Model; /** * Embedding model for vectorization * * @default "@cf/baai/bge-m3" */ embeddingModel?: AiSearch.EmbeddingModel; /** * Enable chunking of source documents * * @default true */ chunk?: boolean; /** * Size of each chunk (minimum 64) * * @default 256 */ chunkSize?: number; /** * Overlap between chunks (0-30) * * @default 10 */ chunkOverlap?: number; /** * Maximum search results (1-50) * * @default 10 */ maxNumResults?: number; /** * Minimum match score (0-1) * * @default 0.4 */ scoreThreshold?: number; /** * Enable result reranking * * @default false */ reranking?: boolean; /** * Reranking model * * @default "@cf/baai/bge-reranker-base" */ rerankingModel?: AiSearch.RerankingModel; /** * Enable query rewriting for better retrieval * * @default false */ rewriteQuery?: boolean; /** * Query rewriting model * * @default "@cf/meta/llama-3.3-70b-instruct-fp8-fast" */ rewriteModel?: AiSearch.Model; /** * Enable similarity caching * * @default false */ cache?: boolean; /** * Cache similarity threshold * * @default "close_enough" */ cacheThreshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; /** * Custom metadata */ metadata?: Record; /** * Whether to index the source documents when the AI Search instance is created. * Only applicable when a source is provided. * @default true */ indexOnCreate?: boolean; /** * Whether to delete the AI Search instance when removed from Alchemy * @default true */ delete?: boolean; /** * Whether to adopt the AI Search instance if it already exists * @default false */ adopt?: boolean; } export type AiSearchProps = BaseAiSearchProps & ({ token?: AiSearchToken; } | { tokenId: string; }); export interface AiSearchR2Source { /** * Source type */ type: "r2"; /** * R2 bucket - can be bucket name string or R2Bucket resource */ bucket: string | R2Bucket; /** * Jurisdiction for the R2 bucket * @default "default" */ jurisdiction?: R2BucketJurisdiction; /** * Prefix for included items from the R2 bucket */ prefix?: string; /** * Path patterns to include in the R2 bucket (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ includePaths?: string[]; /** * Path patterns to exclude from the R2 bucket (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ excludePaths?: string[]; } export interface AiSearchWebCrawlerSource { /** * Source type */ type: "web-crawler"; /** * Domain to crawl. Must be a domain that is onboarded to your Cloudflare account * (added as a zone with active nameservers pointing to Cloudflare). * * Can be provided as just the domain (e.g., "docs.example.com") or with protocol * (e.g., "https://docs.example.com") - the protocol will be stripped automatically. * * @example "docs.example.com" * @example "https://example.com" // Protocol will be stripped */ domain: string; /** * Path patterns to include in crawling (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ includePaths?: string[]; /** * Path patterns to exclude from crawling (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ excludePaths?: string[]; parseOptions?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; use_browser_rendering?: boolean; }; parseType?: "sitemap" | "feed-rss"; storeOptions?: { storage_id: string; jurisdiction?: R2BucketJurisdiction; storage_type?: "r2"; }; } /** * Type guard for AiSearch */ export declare function isAiSearch(resource: unknown): resource is AiSearch; export type AiSearch = SnakeToCamel & { /** * The instance name on the Cloudflare side. Equal to `id`. This is what * gets emitted as `instance_name` in single-instance `ai_search` bindings. */ name: string; /** * The namespace this instance belongs to. * * Optional for backwards compatibility with state files that predate * namespace support; at write-time this is always populated (defaults to * `"default"` when the user did not specify a namespace). */ namespace?: string; }; /** * An AI Search instance: a managed search index with optional built-in * storage and optional external data source (R2 or web crawler). * * @see https://developers.cloudflare.com/ai-search/ * * @example * ## Built-in storage (no source) * * Creates an instance whose content is uploaded directly via the items API. * * ```ts * const kb = await AiSearch("knowledge-base", { * name: "knowledge-base", * }); * ``` * * @example * ## R2-backed instance * * ```ts * const bucket = await R2Bucket("docs"); * const search = await AiSearch("docs-search", { * name: "docs-search", * source: bucket, * }); * ``` * * @example * ## Instance in a custom namespace * * ```ts * const ns = await AiSearchNamespace("tenants", { name: "tenants" }); * const search = await AiSearch("tenant-a", { * name: "tenant-a", * namespace: ns, * }); * ``` */ export declare const AiSearch: (((this: any, id: string, props?: {}) => never) & (new (_: never) => never)) | ((this: Context, id: string, props: AiSearchProps) => Promise); export declare namespace AiSearch { type Model = "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/meta/llama-3.1-8b-instruct-fast" | "@cf/meta/llama-3.1-8b-instruct-fp8" | "@cf/meta/llama-4-scout-17b-16e-instruct" | "@cf/qwen/qwen3-30b-a3b-fp8" | "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" | "@cf/moonshotai/kimi-k2-instruct" | "anthropic/claude-3-7-sonnet" | "anthropic/claude-sonnet-4" | "anthropic/claude-opus-4" | "anthropic/claude-3-5-haiku" | "cerebras/qwen-3-235b-a22b-instruct" | "cerebras/qwen-3-235b-a22b-thinking" | "cerebras/llama-3.3-70b" | "cerebras/llama-4-maverick-17b-128e-instruct" | "cerebras/llama-4-scout-17b-16e-instruct" | "cerebras/gpt-oss-120b" | "google-ai-studio/gemini-2.5-flash" | "google-ai-studio/gemini-2.5-pro" | "grok/grok-4" | "groq/llama-3.3-70b-versatile" | "groq/llama-3.1-8b-instant" | "openai/gpt-5" | "openai/gpt-5-mini" | "openai/gpt-5-nano" | (string & {}); type EmbeddingModel = "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | "@cf/google/embeddinggemma-300m" | "google-ai-studio/gemini-embedding-001" | "openai/text-embedding-3-small" | "openai/text-embedding-3-large" | (string & {}); type RerankingModel = "@cf/baai/bge-reranker-base" | (string & {}); interface ApiPayload { id: string; source?: string; type?: "r2" | "web-crawler"; ai_gateway_id?: string; ai_search_model?: Model; cache?: boolean; cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; chunk?: boolean; chunk_overlap?: number; chunk_size?: number; custom_metadata?: Array<{ data_type: "text" | "number" | "boolean"; /** * @minLength 1 * @maxLength 64 */ field_name: string; }>; embedding_model?: EmbeddingModel; hybrid_search_enabled?: boolean; index_method?: { vector?: boolean; keyword?: boolean; }; fusion_method?: "max" | "rrf"; max_num_results?: number; metadata?: { created_from_aisearch_wizard?: boolean; worker_domain?: string; }; public_endpoint_params?: { authorized_hosts?: string[]; chat_completions_endpoint?: { disabled?: boolean; }; enabled?: boolean; mcp?: { disabled?: boolean; }; rate_limit?: { /** * Maximum: 3,600,000, Minimum: 60,000 */ period_ms?: number; /** * Minimum: 1 */ requests?: number; technique?: "fixed" | "sliding"; }; search_endpoint?: { disabled?: boolean; }; }; reranking?: boolean; reranking_model?: RerankingModel; rewrite_model?: Model; rewrite_query?: boolean; /** * Maximum: 1, Minimum: 0, Default: 0.4 */ score_threshold?: number; source_params?: { exclude_items?: string[]; include_items?: string[]; prefix?: string; r2_jurisdiction?: string; web_crawler?: { /** * Default: {"parse_type":"sitemap"} */ parse_options?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; use_browser_rendering?: boolean; }; parse_type?: "sitemap" | "feed-rss"; store_options?: { storage_id: string; r2_jurisdiction?: string; storage_type?: "r2"; }; }; }; token_id?: string; } interface ApiResponse { id: string; account_id: string; account_tag: string; created_at: string; internal_id: string; modified_at: string; source?: string; type?: "r2" | "web-crawler"; vectorize_name: string; namespace?: string; ai_gateway_id?: string; ai_search_model?: Model; cache?: boolean; cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; chunk?: boolean; chunk_overlap?: number; chunk_size?: number; created_by?: string; custom_metadata?: Array<{ data_type: "text" | "number" | "boolean"; field_name: string; }>; embedding_model?: EmbeddingModel; enable?: boolean; engine_version?: number; hybrid_search_enabled?: boolean; index_method?: { vector?: boolean; keyword?: boolean; }; fusion_method?: "max" | "rrf"; last_activity?: string; max_num_results?: number; metadata?: { created_from_aisearch_wizard?: boolean; worker_domain?: string; }; modified_by?: string; paused?: boolean; public_endpoint_id?: string; public_endpoint_params?: { authorized_hosts?: string[]; chat_completions_endpoint?: { disabled?: boolean; }; enabled?: boolean; mcp?: { disabled?: boolean; }; rate_limit?: { period_ms?: number; requests?: number; technique?: "fixed" | "sliding"; }; search_endpoint?: { disabled?: boolean; }; }; reranking?: boolean; reranking_model?: RerankingModel; rewrite_model?: Model; rewrite_query?: boolean; score_threshold?: number; source_params?: { exclude_items?: string[]; include_items?: string[]; prefix?: string; r2_jurisdiction?: string; web_crawler?: { parse_options?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; use_browser_rendering?: boolean; }; parse_type?: "sitemap" | "feed-rss"; store_options?: { storage_id: string; r2_jurisdiction?: string; storage_type?: "r2"; }; }; }; status?: "waiting" | "ready" | "indexing" | "error"; summarization?: boolean; summarization_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/meta/llama-3.1-8b-instruct-fast" | "@cf/meta/llama-3.1-8b-instruct-fp8" | "@cf/meta/llama-4-scout-17b-16e-instruct" | "@cf/qwen/qwen3-30b-a3b-fp8" | "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" | "@cf/moonshotai/kimi-k2-instruct" | "anthropic/claude-3-7-sonnet" | "anthropic/claude-sonnet-4" | "anthropic/claude-opus-4" | "anthropic/claude-3-5-haiku" | "cerebras/qwen-3-235b-a22b-instruct" | "cerebras/qwen-3-235b-a22b-thinking" | "cerebras/llama-3.3-70b" | "cerebras/llama-4-maverick-17b-128e-instruct" | "cerebras/llama-4-scout-17b-16e-instruct" | "cerebras/gpt-oss-120b" | "google-ai-studio/gemini-2.5-flash" | "google-ai-studio/gemini-2.5-pro" | "grok/grok-4" | "groq/llama-3.3-70b-versatile" | "groq/llama-3.1-8b-instant" | "openai/gpt-5" | "openai/gpt-5-mini" | "openai/gpt-5-nano" | (string & {}); system_prompt_ai_search?: string; system_prompt_index_summarization?: string; system_prompt_rewrite_query?: string; token_id?: string; vectorize_active_namespace?: string; } } export declare function listAiSearchInstances(api: CloudflareApi, namespace?: string): Promise; export declare function createAiSearchInstance(api: CloudflareApi, namespace: string, payload: AiSearch.ApiPayload): Promise; export declare function getAiSearchInstance(api: CloudflareApi, namespace: string, id: string): Promise; export declare function updateAiSearchInstance(api: CloudflareApi, namespace: string, id: string, payload: AiSearch.ApiPayload): Promise; export declare function deleteAiSearchInstance(api: CloudflareApi, namespace: string, id: string): Promise; interface AiSearchJobApiResponse { id: string; source: "user" | "schedule"; end_reason: string | null; ended_at: string | null; last_seen_at: string | null; started_at: string | null; } export declare function listAiSearchJobs(api: CloudflareApi, namespace: string, aiSearchId: string): Promise; export declare function createAiSearchJob(api: CloudflareApi, namespace: string, aiSearchId: string): Promise; export declare function getAiSearchJob(api: CloudflareApi, namespace: string, aiSearchId: string, jobId: string): Promise; interface AiSearchJobLogItem { id: number; created_at: number; message: string; message_type: number; } export declare function listAiSearchJobLogs(api: CloudflareApi, namespace: string, aiSearchId: string, jobId: string): Promise; export declare function runAiSearchJob(api: CloudflareApi, namespace: string, aiSearchId: string, log: (message: string) => void): Promise; export {}; //# sourceMappingURL=ai-search.d.ts.map