import type { Context } from "../context.ts"; import { Resource, ResourceKind } from "../resource.ts"; import { logger } from "../util/logger.ts"; import { poll } from "../util/poll.ts"; import { sleep } from "../util/sleep.ts"; import { snakeToCamelObjectDeep, type SnakeToCamel, } from "../util/snake-to-camel.ts"; import { type AiSearchNamespace, isAiSearchNamespace, } from "./ai-search-namespace.ts"; import { AiSearchToken } from "./ai-search-token.ts"; import { CloudflareApiError, isCloudflareApiError } from "./api-error.ts"; import { extractCloudflareResult, type CloudflareApiErrorPayload, } from "./api-response.ts"; import { createCloudflareApi, type CloudflareApi, type CloudflareApiOptions, } from "./api.ts"; import { getBucket, isBucket, type R2Bucket, type R2BucketJurisdiction, } from "./bucket.ts"; import { deleteIndex } from "./vectorize-index.ts"; interface BaseAiSearchProps extends CloudflareApiOptions { /** * Name of the AI Search instance * @default `${app}-${stage}-${id}` * @minLength 1 * @maxLength 32 */ name?: string; /** * Data source for indexing. * * Accepts three forms: * - **R2Bucket (shorthand)**: pass an `R2Bucket` resource directly for * default indexing. `prefix`, `includePaths`, and `excludePaths` cannot * be set in this form — use the full R2 config form below. * - **R2 config**: `{ type: "r2", bucket, prefix?, includePaths?, excludePaths?, jurisdiction? }`. * - **Web crawler**: `{ type: "web-crawler", domain, ... }`. * * When omitted, creates a built-in storage instance for manual file uploads * (via the Items API or the AI Search binding). */ source?: R2Bucket | AiSearchR2Source | AiSearchWebCrawlerSource; /** * The namespace this instance belongs to. * Can be a namespace name string or an AiSearchNamespace resource. * * @remarks * Single-instance Worker bindings (`bindings: { MY: aiSearch }`) can only * bind instances in the `default` namespace. To bind instances in a * non-default namespace, use an `AiSearchNamespace` binding instead and * access the instance via `env.NS.get(name)`. * * Changing `namespace` on an existing instance triggers a replace * (delete + create) because namespaces are immutable on the Cloudflare * side. * * @default "default" */ namespace?: string | AiSearchNamespace; /** * Controls which storage backends are used during indexing. * Defaults to vector-only. Set both `vector` and `keyword` to `true` for hybrid search. */ indexMethod?: { vector?: boolean; keyword?: boolean }; /** * Fusion method for combining vector and keyword results. * * @default "rrf" */ fusionMethod?: "max" | "rrf"; /** * Text generation model for AI responses * * @default "@cf/meta/llama-3.3-70b-instruct-fp8-fast" */ aiSearchModel?: AiSearch.Model; /** * Embedding model for vectorization * * @default "@cf/baai/bge-m3" */ embeddingModel?: AiSearch.EmbeddingModel; /** * Enable chunking of source documents * * @default true */ chunk?: boolean; /** * Size of each chunk (minimum 64) * * @default 256 */ chunkSize?: number; /** * Overlap between chunks (0-30) * * @default 10 */ chunkOverlap?: number; /** * Maximum search results (1-50) * * @default 10 */ maxNumResults?: number; /** * Minimum match score (0-1) * * @default 0.4 */ scoreThreshold?: number; /** * Enable result reranking * * @default false */ reranking?: boolean; /** * Reranking model * * @default "@cf/baai/bge-reranker-base" */ rerankingModel?: AiSearch.RerankingModel; /** * Enable query rewriting for better retrieval * * @default false */ rewriteQuery?: boolean; /** * Query rewriting model * * @default "@cf/meta/llama-3.3-70b-instruct-fp8-fast" */ rewriteModel?: AiSearch.Model; /** * Enable similarity caching * * @default false */ cache?: boolean; /** * Cache similarity threshold * * @default "close_enough" */ cacheThreshold?: | "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; /** * Custom metadata */ metadata?: Record; /** * Whether to index the source documents when the AI Search instance is created. * Only applicable when a source is provided. * @default true */ indexOnCreate?: boolean; /** * Whether to delete the AI Search instance when removed from Alchemy * @default true */ delete?: boolean; /** * Whether to adopt the AI Search instance if it already exists * @default false */ adopt?: boolean; } export type AiSearchProps = BaseAiSearchProps & ( | { token?: AiSearchToken; } | { tokenId: string; } ); export interface AiSearchR2Source { /** * Source type */ type: "r2"; /** * R2 bucket - can be bucket name string or R2Bucket resource */ bucket: string | R2Bucket; /** * Jurisdiction for the R2 bucket * @default "default" */ jurisdiction?: R2BucketJurisdiction; /** * Prefix for included items from the R2 bucket */ prefix?: string; /** * Path patterns to include in the R2 bucket (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ includePaths?: string[]; /** * Path patterns to exclude from the R2 bucket (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ excludePaths?: string[]; } export interface AiSearchWebCrawlerSource { /** * Source type */ type: "web-crawler"; /** * Domain to crawl. Must be a domain that is onboarded to your Cloudflare account * (added as a zone with active nameservers pointing to Cloudflare). * * Can be provided as just the domain (e.g., "docs.example.com") or with protocol * (e.g., "https://docs.example.com") - the protocol will be stripped automatically. * * @example "docs.example.com" * @example "https://example.com" // Protocol will be stripped */ domain: string; /** * Path patterns to include in crawling (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ includePaths?: string[]; /** * Path patterns to exclude from crawling (up to 10 patterns). * Supports wildcards: `*` matches any characters except `/`, `**` matches any characters including `/`. */ excludePaths?: string[]; parseOptions?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; use_browser_rendering?: boolean; }; parseType?: "sitemap" | "feed-rss"; storeOptions?: { storage_id: string; jurisdiction?: R2BucketJurisdiction; storage_type?: "r2"; }; } /** * Type guard for AiSearch */ export function isAiSearch(resource: unknown): resource is AiSearch { return ( typeof resource === "object" && resource !== null && (resource as any)[ResourceKind] === "cloudflare::AiSearch" ); } export type AiSearch = SnakeToCamel & { /** * The instance name on the Cloudflare side. Equal to `id`. This is what * gets emitted as `instance_name` in single-instance `ai_search` bindings. */ name: string; /** * The namespace this instance belongs to. * * Optional for backwards compatibility with state files that predate * namespace support; at write-time this is always populated (defaults to * `"default"` when the user did not specify a namespace). */ namespace?: string; }; /** * An AI Search instance: a managed search index with optional built-in * storage and optional external data source (R2 or web crawler). * * @see https://developers.cloudflare.com/ai-search/ * * @example * ## Built-in storage (no source) * * Creates an instance whose content is uploaded directly via the items API. * * ```ts * const kb = await AiSearch("knowledge-base", { * name: "knowledge-base", * }); * ``` * * @example * ## R2-backed instance * * ```ts * const bucket = await R2Bucket("docs"); * const search = await AiSearch("docs-search", { * name: "docs-search", * source: bucket, * }); * ``` * * @example * ## Instance in a custom namespace * * ```ts * const ns = await AiSearchNamespace("tenants", { name: "tenants" }); * const search = await AiSearch("tenant-a", { * name: "tenant-a", * namespace: ns, * }); * ``` */ export const AiSearch = Resource( "cloudflare::AiSearch", async function ( this: Context, id: string, props: AiSearchProps, ): Promise { const adopt = props.adopt ?? this.scope.adopt; // Resolve namespace: AiSearchNamespace resource → string, default to "default" const namespace = resolveNamespace(props.namespace); // NOTE: AI Search is an always-remote binding (no Miniflare-native // implementation). `alchemy dev` wires the worker binding via // `remote-binding-proxy`, which requires the instance to actually exist // on Cloudflare at preview-token creation time — a locally mocked // resource would cause the Worker deploy to fail with error 10360 // ("instance … not found"). Follow the same pattern as Vectorize and // skip the `scope.local` mock branch entirely. const api = await createCloudflareApi(props); const validateBucketSource = async ( bucket: R2Bucket | string, jurisdiction: R2BucketJurisdiction = "default", ) => { let name: string; if (typeof bucket === "string") { name = bucket; } else { if (this.scope.local && !bucket.dev?.remote) { throw new Error( [ `AI Search "${id}" depends on an R2Bucket that is running locally, but AI Search requires the bucket to be deployed.`, "Add `dev: { remote: true }` to the R2Bucket to use it with AI Search.", ].join("\n"), ); } name = bucket.name; } try { await getBucket(api, name, { jurisdiction }); } catch (error) { throw new Error( `Failed to validate R2 bucket "${name}" (${jurisdiction}) for AI search "${id}": ${ error instanceof Error ? error.message : String(error) }`, { cause: error }, ); } }; const normalizeSource = async ( source: | R2Bucket | AiSearchR2Source | AiSearchWebCrawlerSource | undefined, ): Promise< | (AiSearchR2Source & { bucket: string }) | AiSearchWebCrawlerSource | undefined > => { if (!source) return undefined; if (isBucket(source)) { await validateBucketSource(source, source.jurisdiction); return { type: "r2", bucket: source.name, jurisdiction: source.jurisdiction, }; } else if (source.type === "r2" && isBucket(source.bucket)) { await validateBucketSource(source.bucket, source.bucket.jurisdiction); return { ...source, type: "r2", bucket: source.bucket.name, jurisdiction: source.bucket.jurisdiction, }; } else if (source.type === "web-crawler") { await validateWebCrawlerSourceDomain(api, source.domain); return source; } else { await validateBucketSource(source.bucket, source.jurisdiction); return source as AiSearchR2Source & { bucket: string }; } }; const validateTokenId = (id: string): void => { if ( !/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test( id, ) ) { throw new Error( `Invalid token ID: "${id}"\n` + "The token ID must be a valid UUID for an AI Search service token.\n" + "See https://developers.cloudflare.com/ai-search/get-started/api/#2-create-a-service-api-token", ); } }; /** * Only resolve token for R2 sources. Web-crawler and built-in storage * instances do not require a token. */ const normalizeTokenId = async ( source: | (AiSearchR2Source & { bucket: string }) | AiSearchWebCrawlerSource | undefined, ): Promise => { // Token only needed for R2 sources if (!source || source.type !== "r2") { return undefined; } if ("tokenId" in props) { validateTokenId(props.tokenId); return props.tokenId; } else if ("token" in props && props.token) { validateTokenId(props.token.tokenId); return props.token.tokenId; } else { // Auto-created tokens are an implementation detail of this resource — // `adopt: true` so updates reuse a prior auto-created token with the // same stable child id. We forward `delete: false` when the user // preserves the instance, because Cloudflare rejects token deletion // while the token is still referenced by the preserved instance // (error 7076 `token_in_use_by_instances`). When the instance IS // being deleted normally, the token deletes with it. const token = await AiSearchToken("token", { baseUrl: props.baseUrl, profile: props.profile, apiKey: props.apiKey, apiToken: props.apiToken, accountId: props.accountId, email: props.email, adopt: true, ...(props.delete === false ? { delete: false } : {}), }); return token.tokenId; } }; if (this.phase === "delete") { if (props.delete !== false && this.output?.id) { if (this.output.vectorizeName) { await deleteIndex(api, this.output.vectorizeName); } const deleteNs = this.output.namespace ?? "default"; await deleteAiSearchInstance(api, deleteNs, this.output.id); } return this.destroy(); } const name = props.name ?? this.scope.createPhysicalName(id, "-", 32); if (name.length < 1 || name.length > 32) { throw new Error( `AI Search instance name must be 1-32 characters, got ${name.length} ("${name}")`, ); } const source = await normalizeSource(props.source); const tokenId = await normalizeTokenId(source); const payload: AiSearch.ApiPayload = { id: name, source: source ? source.type === "r2" ? source.bucket : source.domain : undefined, type: source?.type, ai_search_model: props.aiSearchModel, source_params: source ? { include_items: source.includePaths, exclude_items: source.excludePaths, ...(source.type === "r2" ? { r2_jurisdiction: source.jurisdiction !== "default" ? source.jurisdiction : undefined, prefix: source.prefix, } : { web_crawler: { parse_type: source.parseType, parse_options: source.parseOptions, store_options: source.storeOptions, }, }), } : undefined, embedding_model: props.embeddingModel, chunk: props.chunk, chunk_size: props.chunkSize, chunk_overlap: props.chunkOverlap, max_num_results: props.maxNumResults, score_threshold: props.scoreThreshold, reranking: props.reranking, reranking_model: props.rerankingModel, rewrite_query: props.rewriteQuery, rewrite_model: props.rewriteModel, cache: props.cache, cache_threshold: props.cacheThreshold, metadata: props.metadata, token_id: tokenId, index_method: props.indexMethod, fusion_method: props.fusionMethod, }; let instance: AiSearch.ApiResponse; if (this.phase === "update" && this.output?.id) { const replace = "source" in this.output && (payload.type !== this.output.type || payload.source !== this.output.source); // the development version of this resource had different properties, so check those to avoid an unnecessary replacement const replaceLegacy = "sourceType" in this.output && (payload.type !== this.output.sourceType || (payload.type === "r2" && "sourceBucket" in this.output && payload.source !== this.output.sourceBucket) || (payload.type === "web-crawler" && "sourceDomain" in this.output && payload.source !== this.output.sourceDomain)); // Namespace is immutable: moving an instance between namespaces must // replace (delete old, create new) rather than attempt an in-place // update against a non-existent `PUT /namespaces/{new}/instances/{id}`. // Default to "default" for legacy state files that predate the // namespace prop. const currentNamespace = ("namespace" in this.output && this.output.namespace) || "default"; const namespaceChanged = currentNamespace !== namespace; if (replace || replaceLegacy || namespaceChanged) { return this.replace(true); } instance = await updateAiSearchInstance( api, namespace, this.output.id, payload, ); } else { // Pre-flight adopt: if `adopt: true` and an instance already exists, // adopt it directly via GET→PUT. This avoids racing on 400/7022 error // codes and the fragile `errorData as CloudflareApiErrorPayload[]` cast. if (adopt) { const existing = await getAiSearchInstance(api, namespace, name).catch( (e: unknown) => { if (e instanceof CloudflareApiError && e.status === 404) { return undefined; } throw e; }, ); if (existing) { instance = await updateAiSearchInstance( api, namespace, existing.id, payload, ); } else { instance = await createAiSearchInstance(api, namespace, payload); } } else { try { instance = await createAiSearchInstance(api, namespace, payload); } catch (error) { // Wrap "already exists" errors with a clearer message pointing at // the adoption path (AGENTS.md convention). const errorData = Array.isArray( (error as CloudflareApiError | undefined)?.errorData, ) ? ((error as CloudflareApiError) .errorData as CloudflareApiErrorPayload[]) : []; const isAlreadyExistsError = error instanceof CloudflareApiError && error.status === 400 && errorData.some((e) => e.code === 7022); if (isAlreadyExistsError) { throw new Error( `AI Search instance "${name}" already exists in namespace "${namespace}". Use \`adopt: true\` to adopt it.`, { cause: error }, ); } throw error; } } if (props.indexOnCreate !== false && source) { await runAiSearchJob(api, namespace, instance.id, (message) => logger.task(id, { prefix: "index", prefixColor: "gray", resource: id, message, }), ); } else if (props.indexOnCreate === true && !source) { logger.warn( `AI Search "${id}": \`indexOnCreate: true\` has no effect because no \`source\` was provided.`, ); } } return { ...snakeToCamelObjectDeep(instance), name: instance.id, // The API response may include a `namespace` field; explicitly use the // resolved local value to ensure the output always matches the prop // (and defaults to "default" when unspecified). namespace, }; }, ); /** * Resolve a namespace prop to a string name. */ function resolveNamespace(ns: string | AiSearchNamespace | undefined): string { if (!ns) return "default"; if (typeof ns === "string") return ns; if (isAiSearchNamespace(ns)) return ns.namespace; return "default"; } /** * Validate that a domain string is a valid domain format (not a URL). * Throws a helpful error if the input looks like a URL. */ async function validateWebCrawlerSourceDomain( api: CloudflareApi, domain: string, ): Promise { if (domain.includes("://")) { throw new Error( `Invalid domain format "${domain}". Provide just the domain (e.g., "docs.example.com"), not a URL. ` + `For URL-based crawling, use AiCrawler instead of AiSearch.`, ); } if (domain.includes("/")) { throw new Error( `Invalid domain format "${domain}". Provide just the domain without paths (e.g., "docs.example.com"). ` + `Use includePaths to filter specific paths, or use AiCrawler for URL-based crawling.`, ); } // The Cloudflare dashboard uses this undocumented endpoint to validate domains. // If we don't validate here, the create request fails with a 500 error. const response = await api.post( `/accounts/${api.accountId}/ai-search/domains`, { domain, }, ); // The error message is more like an error code. This mapping is from the Cloudflare dashboard. const errorMap = { not_a_valid_domain: "Not a valid domain.", invalid_domain: "Invalid domain. The domain needs to belong to this account.", fail_to_find_domain_info: "Failed to find domain information.", missing_sitemap: "Sitemap not found. Please check your robots.txt.", domain_not_owned_by_user: "The domain needs to belong to this account.", forbidden_robots_txt: "Failed to fetch robots.txt: The file is inaccessible.", forbidden_sitemap: "Failed to fetch your sitemap: The file is inaccessible.", }; const json = (await response.json()) as { success: boolean; errors: Array<{ code: number; message: string }>; }; if (json.success) return; throw new Error( [ `Failed to validate domain "${domain}" (${response.status}):`, ...json.errors.map( (e) => `- [${e.code}] ${ e.message in errorMap ? errorMap[e.message as keyof typeof errorMap] : e.message }`, ), "Learn more: https://developers.cloudflare.com/ai-search/configuration/data-source/website/", ].join("\n"), ); } export declare namespace AiSearch { type Model = | "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/meta/llama-3.1-8b-instruct-fast" | "@cf/meta/llama-3.1-8b-instruct-fp8" | "@cf/meta/llama-4-scout-17b-16e-instruct" | "@cf/qwen/qwen3-30b-a3b-fp8" | "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" | "@cf/moonshotai/kimi-k2-instruct" | "anthropic/claude-3-7-sonnet" | "anthropic/claude-sonnet-4" | "anthropic/claude-opus-4" | "anthropic/claude-3-5-haiku" | "cerebras/qwen-3-235b-a22b-instruct" | "cerebras/qwen-3-235b-a22b-thinking" | "cerebras/llama-3.3-70b" | "cerebras/llama-4-maverick-17b-128e-instruct" | "cerebras/llama-4-scout-17b-16e-instruct" | "cerebras/gpt-oss-120b" | "google-ai-studio/gemini-2.5-flash" | "google-ai-studio/gemini-2.5-pro" | "grok/grok-4" | "groq/llama-3.3-70b-versatile" | "groq/llama-3.1-8b-instant" | "openai/gpt-5" | "openai/gpt-5-mini" | "openai/gpt-5-nano" | (string & {}); type EmbeddingModel = | "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | "@cf/google/embeddinggemma-300m" | "google-ai-studio/gemini-embedding-001" | "openai/text-embedding-3-small" | "openai/text-embedding-3-large" | (string & {}); type RerankingModel = "@cf/baai/bge-reranker-base" | (string & {}); interface ApiPayload { id: string; source?: string; type?: "r2" | "web-crawler"; ai_gateway_id?: string; ai_search_model?: Model; cache?: boolean; cache_threshold?: | "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; chunk?: boolean; chunk_overlap?: number; chunk_size?: number; custom_metadata?: Array<{ data_type: "text" | "number" | "boolean"; /** * @minLength 1 * @maxLength 64 */ field_name: string; }>; embedding_model?: EmbeddingModel; hybrid_search_enabled?: boolean; index_method?: { vector?: boolean; keyword?: boolean }; fusion_method?: "max" | "rrf"; max_num_results?: number; metadata?: { created_from_aisearch_wizard?: boolean; worker_domain?: string; }; public_endpoint_params?: { authorized_hosts?: string[]; chat_completions_endpoint?: { disabled?: boolean; }; enabled?: boolean; mcp?: { disabled?: boolean; }; rate_limit?: { /** * Maximum: 3,600,000, Minimum: 60,000 */ period_ms?: number; /** * Minimum: 1 */ requests?: number; technique?: "fixed" | "sliding"; }; search_endpoint?: { disabled?: boolean; }; }; reranking?: boolean; reranking_model?: RerankingModel; rewrite_model?: Model; rewrite_query?: boolean; /** * Maximum: 1, Minimum: 0, Default: 0.4 */ score_threshold?: number; source_params?: { exclude_items?: string[]; include_items?: string[]; prefix?: string; r2_jurisdiction?: string; // Default: "default" web_crawler?: { /** * Default: {"parse_type":"sitemap"} */ parse_options?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; // Only valid when parse_type is 'sitemap' use_browser_rendering?: boolean; }; parse_type?: "sitemap" | "feed-rss"; // Default: "sitemap" store_options?: { storage_id: string; r2_jurisdiction?: string; // Default: "default" storage_type?: "r2"; }; }; }; token_id?: string; } interface ApiResponse { id: string; account_id: string; account_tag: string; created_at: string; internal_id: string; modified_at: string; source?: string; type?: "r2" | "web-crawler"; vectorize_name: string; namespace?: string; ai_gateway_id?: string; ai_search_model?: Model; cache?: boolean; // default: true cache_threshold?: | "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"; // default: "close_enough" chunk?: boolean; // default: true chunk_overlap?: number; // maximum: 30, minimum: 0, default: 10 chunk_size?: number; // minimum: 64, default: 256 created_by?: string; custom_metadata?: Array<{ data_type: "text" | "number" | "boolean"; field_name: string; }>; embedding_model?: EmbeddingModel; enable?: boolean; engine_version?: number; // default: 1 hybrid_search_enabled?: boolean; index_method?: { vector?: boolean; keyword?: boolean }; fusion_method?: "max" | "rrf"; last_activity?: string; max_num_results?: number; // maximum: 50, minimum: 1, default: 10 metadata?: { created_from_aisearch_wizard?: boolean; worker_domain?: string; }; modified_by?: string; paused?: boolean; public_endpoint_id?: string; public_endpoint_params?: { authorized_hosts?: string[]; chat_completions_endpoint?: { disabled?: boolean; }; enabled?: boolean; mcp?: { disabled?: boolean; }; rate_limit?: { period_ms?: number; // maximum: 3600000, minimum: 60000 requests?: number; technique?: "fixed" | "sliding"; }; search_endpoint?: { disabled?: boolean; }; }; reranking?: boolean; reranking_model?: RerankingModel; rewrite_model?: Model; rewrite_query?: boolean; score_threshold?: number; source_params?: { exclude_items?: string[]; include_items?: string[]; prefix?: string; r2_jurisdiction?: string; // default: "default" web_crawler?: { parse_options?: { include_headers?: Record; include_images?: boolean; specific_sitemaps?: string[]; // valid with 'sitemap' parse_type use_browser_rendering?: boolean; }; parse_type?: "sitemap" | "feed-rss"; // default: "sitemap" store_options?: { storage_id: string; r2_jurisdiction?: string; // default: "default" storage_type?: "r2"; }; }; }; status?: "waiting" | "ready" | "indexing" | "error"; summarization?: boolean; summarization_model?: | "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/meta/llama-3.1-8b-instruct-fast" | "@cf/meta/llama-3.1-8b-instruct-fp8" | "@cf/meta/llama-4-scout-17b-16e-instruct" | "@cf/qwen/qwen3-30b-a3b-fp8" | "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" | "@cf/moonshotai/kimi-k2-instruct" | "anthropic/claude-3-7-sonnet" | "anthropic/claude-sonnet-4" | "anthropic/claude-opus-4" | "anthropic/claude-3-5-haiku" | "cerebras/qwen-3-235b-a22b-instruct" | "cerebras/qwen-3-235b-a22b-thinking" | "cerebras/llama-3.3-70b" | "cerebras/llama-4-maverick-17b-128e-instruct" | "cerebras/llama-4-scout-17b-16e-instruct" | "cerebras/gpt-oss-120b" | "google-ai-studio/gemini-2.5-flash" | "google-ai-studio/gemini-2.5-pro" | "grok/grok-4" | "groq/llama-3.3-70b-versatile" | "groq/llama-3.1-8b-instant" | "openai/gpt-5" | "openai/gpt-5-mini" | "openai/gpt-5-nano" | (string & {}); system_prompt_ai_search?: string; system_prompt_index_summarization?: string; system_prompt_rewrite_query?: string; token_id?: string; vectorize_active_namespace?: string; } } // ─── Namespace-scoped API Helper Functions ─────────────────────────────────── /** * Base path for namespace-scoped instance operations */ function aiSearchInstanceBasePath( api: CloudflareApi, namespace: string, ): string { return `/accounts/${api.accountId}/ai-search/namespaces/${namespace}/instances`; } export async function listAiSearchInstances( api: CloudflareApi, namespace = "default", ): Promise { return await extractCloudflareResult( "list AI Search instances", api.get(aiSearchInstanceBasePath(api, namespace)), ); } export async function createAiSearchInstance( api: CloudflareApi, namespace: string, payload: AiSearch.ApiPayload, ): Promise { return await extractCloudflareResult( `create AI Search instance "${payload.id}"`, api.post(aiSearchInstanceBasePath(api, namespace), payload), ); } export async function getAiSearchInstance( api: CloudflareApi, namespace: string, id: string, ): Promise { return await extractCloudflareResult( `get AI Search instance "${id}"`, api.get(`${aiSearchInstanceBasePath(api, namespace)}/${id}`), ); } export async function updateAiSearchInstance( api: CloudflareApi, namespace: string, id: string, payload: AiSearch.ApiPayload, ): Promise { return await extractCloudflareResult( `update AI Search instance "${id}"`, api.put(`${aiSearchInstanceBasePath(api, namespace)}/${id}`, payload), ); } export async function deleteAiSearchInstance( api: CloudflareApi, namespace: string, id: string, ): Promise { try { await extractCloudflareResult( `delete AI Search instance "${id}"`, api.delete(`${aiSearchInstanceBasePath(api, namespace)}/${id}`), ); } catch (error) { if (error instanceof CloudflareApiError && error.status === 404) { return; } throw error; } // Cloudflare's DELETE returns 204 before the instance fully disappears // from the backing services. A subsequent GET can return the stale row // for a bounded window — same-colo is invalidated immediately via the // edge cache, cross-colo is bounded by a 60s KV TTL. // // Actively wait here so the destroy phase presents a strongly- // consistent "instance is gone" guarantee to its callers (child token // delete, user-facing teardown assertions, dependent resources). await poll({ description: `wait for AI Search instance "${id}" deletion to propagate`, fn: () => api.get(`${aiSearchInstanceBasePath(api, namespace)}/${id}`), predicate: (res) => res.status === 404, initialDelay: 500, maxDelay: 5000, timeout: 90_000, }); } // ─── Job API Helper Functions ──────────────────────────────────────────────── interface AiSearchJobApiResponse { id: string; source: "user" | "schedule"; end_reason: string | null; ended_at: string | null; last_seen_at: string | null; started_at: string | null; } export async function listAiSearchJobs( api: CloudflareApi, namespace: string, aiSearchId: string, ): Promise { return await extractCloudflareResult( `list AI Search jobs for instance "${aiSearchId}"`, api.get(`${aiSearchInstanceBasePath(api, namespace)}/${aiSearchId}/jobs`), ); } export async function createAiSearchJob( api: CloudflareApi, namespace: string, aiSearchId: string, ): Promise { return await extractCloudflareResult( `create AI Search job for instance "${aiSearchId}"`, api.post( `${aiSearchInstanceBasePath(api, namespace)}/${aiSearchId}/jobs`, {}, ), ); } export async function getAiSearchJob( api: CloudflareApi, namespace: string, aiSearchId: string, jobId: string, ): Promise { return await extractCloudflareResult( `get AI Search job "${jobId}" for instance "${aiSearchId}"`, api.get( `${aiSearchInstanceBasePath(api, namespace)}/${aiSearchId}/jobs/${jobId}`, ), ); } interface AiSearchJobLogItem { id: number; created_at: number; message: string; message_type: number; } export async function listAiSearchJobLogs( api: CloudflareApi, namespace: string, aiSearchId: string, jobId: string, ): Promise { try { return await extractCloudflareResult( `list AI Search job logs for job "${jobId}" for instance "${aiSearchId}"`, api.get( `${aiSearchInstanceBasePath(api, namespace)}/${aiSearchId}/jobs/${jobId}/logs?per_page=500`, ), ); } catch (error) { if ( isCloudflareApiError(error, { code: 7002 }) // ai_search_not_found ) { return []; } throw error; } } export async function runAiSearchJob( api: CloudflareApi, namespace: string, aiSearchId: string, log: (message: string) => void, ): Promise { log("Preparing to index..."); const job = await createAiSearchJob(api, namespace, aiSearchId); let lastLogId = 0; let done = false; const resultPromise = poll({ description: `run AI Search job "${job.id}" for instance "${aiSearchId}"`, fn: () => getAiSearchJob(api, namespace, aiSearchId, job.id), predicate: (result) => result.ended_at !== null, }); pollLogs(); const result = await resultPromise; done = true; log(`Sync completed: ${result.end_reason}`); async function pollLogs() { const logs = await listAiSearchJobLogs(api, namespace, aiSearchId, job.id); for (let i = logs.length - 1; i >= 0; i--) { const item = logs[i]; if (item.id > lastLogId) { lastLogId = item.id; log(item.message); } } if (!done) { await sleep(3000); await pollLogs(); } } }