Namespaces

List namespaces.

client.aiSearch.namespaces.list(, ?): V4PagePaginationArray<NamespaceListResponse { created_at, name, description } >

GET/accounts/{account_id}/ai-search/namespaces

Create namespace.

client.aiSearch.namespaces.create(, ?): NamespaceCreateResponse { created_at, name, description }

POST/accounts/{account_id}/ai-search/namespaces

Read namespace.

client.aiSearch.namespaces.read(, , ?): NamespaceReadResponse { created_at, name, description }

GET/accounts/{account_id}/ai-search/namespaces/{name}

Update namespace.

client.aiSearch.namespaces.update(, , ?): NamespaceUpdateResponse { created_at, name, description }

PUT/accounts/{account_id}/ai-search/namespaces/{name}

Delete namespace.

client.aiSearch.namespaces.delete(, , ?): NamespaceDeleteResponse

DELETE/accounts/{account_id}/ai-search/namespaces/{name}

Multi-Instance Search

client.aiSearch.namespaces.search(, , ?): NamespaceSearchResponse { chunks, search_query, errors }

POST/accounts/{account_id}/ai-search/namespaces/{name}/search

Multi-Instance Chat Completions

client.aiSearch.namespaces.chatCompletions(, , ?): NamespaceChatCompletionsResponse { choices, chunks, id, 3 more }

POST/accounts/{account_id}/ai-search/namespaces/{name}/chat/completions

ModelsExpand Collapse

NamespaceListResponse { created_at, name, description }

created_at: string

formatdate-time

description?: string | null

Optional description for the namespace. Max 256 characters.

maxLength256

NamespaceCreateResponse { created_at, name, description }

created_at: string

formatdate-time

description?: string | null

Optional description for the namespace. Max 256 characters.

maxLength256

NamespaceReadResponse { created_at, name, description }

created_at: string

formatdate-time

description?: string | null

Optional description for the namespace. Max 256 characters.

maxLength256

NamespaceUpdateResponse { created_at, name, description }

created_at: string

formatdate-time

description?: string | null

Optional description for the namespace. Max 256 characters.

maxLength256

NamespaceDeleteResponse = unknown

NamespaceSearchResponse { chunks, search_query, errors }

chunks: Array<Chunk>

id: string

instance_id: string

score: number

maximum1

minimum0

text: string

type: string

item?: Item { key, metadata, timestamp }

key: string

metadata?: Record<string, unknown>

timestamp?: number

scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }

fusion_method?: "rrf" | "max"

One of the following:

"rrf"

"max"

keyword_rank?: number

keyword_score?: number

minimum0

reranking_score?: number

maximum1

minimum0

vector_rank?: number

vector_score?: number

maximum1

minimum0

search_query: string

errors?: Array<Error>

instance_id: string

message: string

NamespaceChatCompletionsResponse { choices, chunks, id, 3 more }

choices: Array<Choice>

message: Message { content, role }

content: string | null

role: "system" | "developer" | "user" | 2 more

One of the following:

"system"

"developer"

"user"

"assistant"

"tool"

index?: number

chunks: Array<Chunk>

id: string

instance_id: string

score: number

maximum1

minimum0

text: string

type: string

item?: Item { key, metadata, timestamp }

key: string

metadata?: Record<string, unknown>

timestamp?: number

scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }

fusion_method?: "rrf" | "max"

One of the following:

"rrf"

"max"

keyword_rank?: number

keyword_score?: number

minimum0

reranking_score?: number

maximum1

minimum0

vector_rank?: number

vector_score?: number

maximum1

minimum0

id?: string

errors?: Array<Error>

instance_id: string

message: string

model?: string

object?: string

NamespacesInstances

List instances.

client.aiSearch.namespaces.instances.list(, , ?): V4PagePaginationArray<InstanceListResponse { id, created_at, modified_at, 36 more } >

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances

Create new instance.

client.aiSearch.namespaces.instances.create(, , ?): InstanceCreateResponse { id, created_at, modified_at, 36 more }

POST/accounts/{account_id}/ai-search/namespaces/{name}/instances

Read instance.

client.aiSearch.namespaces.instances.read(, , , ?): InstanceReadResponse { id, created_at, modified_at, 36 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}

Update instance.

client.aiSearch.namespaces.instances.update(, , , ?): InstanceUpdateResponse { id, created_at, modified_at, 36 more }

PUT/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}

Delete instance.

client.aiSearch.namespaces.instances.delete(, , , ?): InstanceDeleteResponse { id, created_at, modified_at, 36 more }

DELETE/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}

Stats

client.aiSearch.namespaces.instances.stats(, , , ?): InstanceStatsResponse { completed, degraded, engine, 8 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/stats

Search

client.aiSearch.namespaces.instances.search(, , , ?): InstanceSearchResponse { chunks, search_query }

POST/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/search

Chat Completions

client.aiSearch.namespaces.instances.chatCompletions(, , , ?): InstanceChatCompletionsResponse { choices, chunks, id, 2 more }

POST/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/chat/completions

ModelsExpand Collapse

InstanceListResponse { id, created_at, modified_at, 36 more }

id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64

minLength1

created_at: string

formatdate-time

modified_at: string

formatdate-time

ai_gateway_id?: string | null

ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

cache?: boolean

cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"

One of the following:

"super_strict_match"

"close_enough"

"flexible_friend"

"anything_goes"

cache_ttl?: 600 | 1800 | 3600 | 7 more

Cache entry TTL in seconds. Allowed values: 600 (10min), 1800 (30min), 3600 (1h), 7200 (2h), 21600 (6h), 43200 (12h), 86400 (24h), 172800 (48h), 259200 (72h), 518400 (6d).

One of the following:

600

1800

3600

7200

21600

43200

86400

172800

259200

518400

chunk_overlap?: number

maximum30

minimum0

chunk_size?: number

minimum64

created_by?: string | null

custom_metadata?: Array<CustomMetadata>

data_type: "text" | "number" | "boolean" | "datetime"

One of the following:

"text"

"number"

"boolean"

"datetime"

field_name: string

maxLength64

minLength1

embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null

One of the following:

"@cf/qwen/qwen3-embedding-0.6b"

"@cf/baai/bge-m3"

"@cf/baai/bge-large-en-v1.5"

"@cf/google/embeddinggemma-300m"

"google-ai-studio/gemini-embedding-001"

"google-ai-studio/gemini-embedding-2-preview"

"openai/text-embedding-3-small"

"openai/text-embedding-3-large"

enable?: boolean

engine_version?: number

fusion_method?: "max" | "rrf"

One of the following:

"max"

"rrf"

Deprecatedhybrid_search_enabled?: boolean

Deprecated — use index_method instead.

index_method?: IndexMethod { keyword, vector }

Controls which storage backends are used during indexing. Defaults to vector-only.

keyword: boolean

Enable keyword (BM25) storage backend.

vector: boolean

Enable vector (embedding) storage backend.

indexing_options?: IndexingOptions | null

keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:

"porter"

"trigram"

last_activity?: string | null

formatdate-time

max_num_results?: number

maximum50

minimum1

metadata?: Metadata { created_from_aisearch_wizard, worker_domain }

created_from_aisearch_wizard?: boolean

worker_domain?: string

modified_by?: string | null

namespace?: string | null

paused?: boolean

public_endpoint_id?: string | null

public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }

authorized_hosts?: Array<string>

chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }

disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean

mcp?: Mcp { description, disabled }

description?: string

disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }

period_ms?: number

maximum3600000

minimum60000

requests?: number

minimum1

technique?: "fixed" | "sliding"

One of the following:

"fixed"

"sliding"

search_endpoint?: SearchEndpoint { disabled }

disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean

reranking_model?: "@cf/baai/bge-reranker-base" | "" | null

One of the following:

"@cf/baai/bge-reranker-base"

retrieval_options?: RetrievalOptions | null

boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to ‘asc’ for numeric/datetime fields and ‘exists’ for text/boolean fields. Fields must match ‘timestamp’ or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use ‘timestamp’ for document freshness, or any custom_metadata field. Numeric and datetime fields support all four directions (asc, desc, exists, not_exists); text/boolean fields only support exists/not_exists.

maxLength64

minLength1

direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. ‘desc’ = higher values rank higher (e.g. newer timestamps). ‘asc’ = lower values rank higher. ‘exists’ = boost chunks that have the field. ‘not_exists’ = boost chunks that lack the field. Optional — defaults to ‘asc’ for numeric/datetime fields, ‘exists’ for text/boolean fields.

One of the following:

"asc"

"desc"

"exists"

"not_exists"

keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. ‘and’ restricts candidates to documents containing all query terms; ‘or’ includes any document containing at least one term, ranked by BM25 relevance. Defaults to ‘and’.

One of the following:

"and"

"or"

rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

rewrite_query?: boolean

score_threshold?: number

maximum1

minimum0

source?: string | null

source_params?: SourceParams | null

exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string

r2_jurisdiction?: string

web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }

crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }

depth?: number

maximum100000

minimum1

include_external_links?: boolean

include_subdomains?: boolean

max_age?: number

maximum604800

minimum0

source?: "all" | "sitemaps" | "links"

One of the following:

"all"

"sitemaps"

"links"

parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }

content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed. Omit the field to disable content selection — empty arrays are rejected.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200

minLength1

selector: string

CSS selector to extract content from pages matching the path pattern. Must not contain disallowed characters (;, `, $, {, }, ). Must target a single element; if multiple elements match, the selector is ignored and the full page is used.

maxLength200

minLength1

include_headers?: Record<string, string>

Up to 5 custom HTTP headers sent with each crawl request. Names must be RFC-7230 token characters (no spaces, colons, or control characters); values must be HTAB + printable ASCII (no CR/LF).

include_images?: boolean

specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is ‘sitemap’.

use_browser_rendering?: boolean

parse_type?: "sitemap" | "feed-rss" | "crawl"

One of the following:

"sitemap"

"feed-rss"

"crawl"

store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }

storage_id: string

r2_jurisdiction?: string

storage_type?: Provider { }

status?: string

sync_interval?: 900 | 1800 | 3600 | 5 more

Interval between automatic syncs, in seconds. Allowed values: 900 (15min), 1800 (30min), 3600 (1h), 7200 (2h), 14400 (4h), 21600 (6h), 43200 (12h), 86400 (24h).

One of the following:

900

1800

3600

7200

14400

21600

43200

86400

token_id?: string

formatuuid

type?: "r2" | "web-crawler" | null

One of the following:

"r2"

"web-crawler"

InstanceCreateResponse { id, created_at, modified_at, 36 more }

id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64

minLength1

created_at: string

formatdate-time

modified_at: string

formatdate-time

ai_gateway_id?: string | null

ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

cache?: boolean

cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"

One of the following:

"super_strict_match"

"close_enough"

"flexible_friend"

"anything_goes"

cache_ttl?: 600 | 1800 | 3600 | 7 more

Cache entry TTL in seconds. Allowed values: 600 (10min), 1800 (30min), 3600 (1h), 7200 (2h), 21600 (6h), 43200 (12h), 86400 (24h), 172800 (48h), 259200 (72h), 518400 (6d).

One of the following:

600

1800

3600

7200

21600

43200

86400

172800

259200

518400

chunk_overlap?: number

maximum30

minimum0

chunk_size?: number

minimum64

created_by?: string | null

custom_metadata?: Array<CustomMetadata>

data_type: "text" | "number" | "boolean" | "datetime"

One of the following:

"text"

"number"

"boolean"

"datetime"

field_name: string

maxLength64

minLength1

embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null

One of the following:

"@cf/qwen/qwen3-embedding-0.6b"

"@cf/baai/bge-m3"

"@cf/baai/bge-large-en-v1.5"

"@cf/google/embeddinggemma-300m"

"google-ai-studio/gemini-embedding-001"

"google-ai-studio/gemini-embedding-2-preview"

"openai/text-embedding-3-small"

"openai/text-embedding-3-large"

enable?: boolean

engine_version?: number

fusion_method?: "max" | "rrf"

One of the following:

"max"

"rrf"

Deprecatedhybrid_search_enabled?: boolean

Deprecated — use index_method instead.

index_method?: IndexMethod { keyword, vector }

Controls which storage backends are used during indexing. Defaults to vector-only.

keyword: boolean

Enable keyword (BM25) storage backend.

vector: boolean

Enable vector (embedding) storage backend.

indexing_options?: IndexingOptions | null

keyword_tokenizer?: "porter" | "trigram"

One of the following:

"porter"

"trigram"

last_activity?: string | null

formatdate-time

max_num_results?: number

maximum50

minimum1

metadata?: Metadata { created_from_aisearch_wizard, worker_domain }

created_from_aisearch_wizard?: boolean

worker_domain?: string

modified_by?: string | null

namespace?: string | null

paused?: boolean

public_endpoint_id?: string | null

public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }

authorized_hosts?: Array<string>

chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }

disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean

mcp?: Mcp { description, disabled }

description?: string

disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }

period_ms?: number

maximum3600000

minimum60000

requests?: number

minimum1

technique?: "fixed" | "sliding"

One of the following:

"fixed"

"sliding"

search_endpoint?: SearchEndpoint { disabled }

disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean

reranking_model?: "@cf/baai/bge-reranker-base" | "" | null

One of the following:

"@cf/baai/bge-reranker-base"

retrieval_options?: RetrievalOptions | null

boost_by?: Array<BoostBy>

field: string

maxLength64

minLength1

direction?: "asc" | "desc" | "exists" | "not_exists"

One of the following:

"asc"

"desc"

"exists"

"not_exists"

keyword_match_mode?: "and" | "or"

One of the following:

"and"

"or"

rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

rewrite_query?: boolean

score_threshold?: number

maximum1

minimum0

source?: string | null

source_params?: SourceParams | null

exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string

r2_jurisdiction?: string

web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }

crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }

depth?: number

maximum100000

minimum1

include_external_links?: boolean

include_subdomains?: boolean

max_age?: number

maximum604800

minimum0

source?: "all" | "sitemaps" | "links"

One of the following:

"all"

"sitemaps"

"links"

parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }

content_selector?: Array<ContentSelector>

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200

minLength1

selector: string

maxLength200

minLength1

include_headers?: Record<string, string>

Up to 5 custom HTTP headers sent with each crawl request. Names must be RFC-7230 token characters (no spaces, colons, or control characters); values must be HTAB + printable ASCII (no CR/LF).

include_images?: boolean

specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is ‘sitemap’.

use_browser_rendering?: boolean

parse_type?: "sitemap" | "feed-rss" | "crawl"

One of the following:

"sitemap"

"feed-rss"

"crawl"

store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }

storage_id: string

r2_jurisdiction?: string

storage_type?: Provider { }

status?: string

sync_interval?: 900 | 1800 | 3600 | 5 more

Interval between automatic syncs, in seconds. Allowed values: 900 (15min), 1800 (30min), 3600 (1h), 7200 (2h), 14400 (4h), 21600 (6h), 43200 (12h), 86400 (24h).

One of the following:

900

1800

3600

7200

14400

21600

43200

86400

token_id?: string

formatuuid

type?: "r2" | "web-crawler" | null

One of the following:

"r2"

"web-crawler"

InstanceReadResponse { id, created_at, modified_at, 36 more }

id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64

minLength1

created_at: string

formatdate-time

modified_at: string

formatdate-time

ai_gateway_id?: string | null

ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

cache?: boolean

cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"

One of the following:

"super_strict_match"

"close_enough"

"flexible_friend"

"anything_goes"

cache_ttl?: 600 | 1800 | 3600 | 7 more

Cache entry TTL in seconds. Allowed values: 600 (10min), 1800 (30min), 3600 (1h), 7200 (2h), 21600 (6h), 43200 (12h), 86400 (24h), 172800 (48h), 259200 (72h), 518400 (6d).

One of the following:

600

1800

3600

7200

21600

43200

86400

172800

259200

518400

chunk_overlap?: number

maximum30

minimum0

chunk_size?: number

minimum64

created_by?: string | null

custom_metadata?: Array<CustomMetadata>

data_type: "text" | "number" | "boolean" | "datetime"

One of the following:

"text"

"number"

"boolean"

"datetime"

field_name: string

maxLength64

minLength1

embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null

One of the following:

"@cf/qwen/qwen3-embedding-0.6b"

"@cf/baai/bge-m3"

"@cf/baai/bge-large-en-v1.5"

"@cf/google/embeddinggemma-300m"

"google-ai-studio/gemini-embedding-001"

"google-ai-studio/gemini-embedding-2-preview"

"openai/text-embedding-3-small"

"openai/text-embedding-3-large"

enable?: boolean

engine_version?: number

fusion_method?: "max" | "rrf"

One of the following:

"max"

"rrf"

Deprecatedhybrid_search_enabled?: boolean

Deprecated — use index_method instead.

index_method?: IndexMethod { keyword, vector }

Controls which storage backends are used during indexing. Defaults to vector-only.

keyword: boolean

Enable keyword (BM25) storage backend.

vector: boolean

Enable vector (embedding) storage backend.

indexing_options?: IndexingOptions | null

keyword_tokenizer?: "porter" | "trigram"

One of the following:

"porter"

"trigram"

last_activity?: string | null

formatdate-time

max_num_results?: number

maximum50

minimum1

metadata?: Metadata { created_from_aisearch_wizard, worker_domain }

created_from_aisearch_wizard?: boolean

worker_domain?: string

modified_by?: string | null

namespace?: string | null

paused?: boolean

public_endpoint_id?: string | null

public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }

authorized_hosts?: Array<string>

chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }

disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean

mcp?: Mcp { description, disabled }

description?: string

disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }

period_ms?: number

maximum3600000

minimum60000

requests?: number

minimum1

technique?: "fixed" | "sliding"

One of the following:

"fixed"

"sliding"

search_endpoint?: SearchEndpoint { disabled }

disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean

reranking_model?: "@cf/baai/bge-reranker-base" | "" | null

One of the following:

"@cf/baai/bge-reranker-base"

retrieval_options?: RetrievalOptions | null

boost_by?: Array<BoostBy>

field: string

maxLength64

minLength1

direction?: "asc" | "desc" | "exists" | "not_exists"

One of the following:

"asc"

"desc"

"exists"

"not_exists"

keyword_match_mode?: "and" | "or"

One of the following:

"and"

"or"

rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

rewrite_query?: boolean

score_threshold?: number

maximum1

minimum0

source?: string | null

source_params?: SourceParams | null

exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string

r2_jurisdiction?: string

web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }

crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }

depth?: number

maximum100000

minimum1

include_external_links?: boolean

include_subdomains?: boolean

max_age?: number

maximum604800

minimum0

source?: "all" | "sitemaps" | "links"

One of the following:

"all"

"sitemaps"

"links"

parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }

content_selector?: Array<ContentSelector>

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200

minLength1

selector: string

maxLength200

minLength1

include_headers?: Record<string, string>

Up to 5 custom HTTP headers sent with each crawl request. Names must be RFC-7230 token characters (no spaces, colons, or control characters); values must be HTAB + printable ASCII (no CR/LF).

include_images?: boolean

specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is ‘sitemap’.

use_browser_rendering?: boolean

parse_type?: "sitemap" | "feed-rss" | "crawl"

One of the following:

"sitemap"

"feed-rss"

"crawl"

store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }

storage_id: string

r2_jurisdiction?: string

storage_type?: Provider { }

status?: string

sync_interval?: 900 | 1800 | 3600 | 5 more

Interval between automatic syncs, in seconds. Allowed values: 900 (15min), 1800 (30min), 3600 (1h), 7200 (2h), 14400 (4h), 21600 (6h), 43200 (12h), 86400 (24h).

One of the following:

900

1800

3600

7200

14400

21600

43200

86400

token_id?: string

formatuuid

type?: "r2" | "web-crawler" | null

One of the following:

"r2"

"web-crawler"

InstanceUpdateResponse { id, created_at, modified_at, 36 more }

id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64

minLength1

created_at: string

formatdate-time

modified_at: string

formatdate-time

ai_gateway_id?: string | null

ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

cache?: boolean

cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"

One of the following:

"super_strict_match"

"close_enough"

"flexible_friend"

"anything_goes"

cache_ttl?: 600 | 1800 | 3600 | 7 more

Cache entry TTL in seconds. Allowed values: 600 (10min), 1800 (30min), 3600 (1h), 7200 (2h), 21600 (6h), 43200 (12h), 86400 (24h), 172800 (48h), 259200 (72h), 518400 (6d).

One of the following:

600

1800

3600

7200

21600

43200

86400

172800

259200

518400

chunk_overlap?: number

maximum30

minimum0

chunk_size?: number

minimum64

created_by?: string | null

custom_metadata?: Array<CustomMetadata>

data_type: "text" | "number" | "boolean" | "datetime"

One of the following:

"text"

"number"

"boolean"

"datetime"

field_name: string

maxLength64

minLength1

embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null

One of the following:

"@cf/qwen/qwen3-embedding-0.6b"

"@cf/baai/bge-m3"

"@cf/baai/bge-large-en-v1.5"

"@cf/google/embeddinggemma-300m"

"google-ai-studio/gemini-embedding-001"

"google-ai-studio/gemini-embedding-2-preview"

"openai/text-embedding-3-small"

"openai/text-embedding-3-large"

enable?: boolean

engine_version?: number

fusion_method?: "max" | "rrf"

One of the following:

"max"

"rrf"

Deprecatedhybrid_search_enabled?: boolean

Deprecated — use index_method instead.

index_method?: IndexMethod { keyword, vector }

Controls which storage backends are used during indexing. Defaults to vector-only.

keyword: boolean

Enable keyword (BM25) storage backend.

vector: boolean

Enable vector (embedding) storage backend.

indexing_options?: IndexingOptions | null

keyword_tokenizer?: "porter" | "trigram"

One of the following:

"porter"

"trigram"

last_activity?: string | null

formatdate-time

max_num_results?: number

maximum50

minimum1

metadata?: Metadata { created_from_aisearch_wizard, worker_domain }

created_from_aisearch_wizard?: boolean

worker_domain?: string

modified_by?: string | null

namespace?: string | null

paused?: boolean

public_endpoint_id?: string | null

public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }

authorized_hosts?: Array<string>

chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }

disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean

mcp?: Mcp { description, disabled }

description?: string

disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }

period_ms?: number

maximum3600000

minimum60000

requests?: number

minimum1

technique?: "fixed" | "sliding"

One of the following:

"fixed"

"sliding"

search_endpoint?: SearchEndpoint { disabled }

disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean

reranking_model?: "@cf/baai/bge-reranker-base" | "" | null

One of the following:

"@cf/baai/bge-reranker-base"

retrieval_options?: RetrievalOptions | null

boost_by?: Array<BoostBy>

field: string

maxLength64

minLength1

direction?: "asc" | "desc" | "exists" | "not_exists"

One of the following:

"asc"

"desc"

"exists"

"not_exists"

keyword_match_mode?: "and" | "or"

One of the following:

"and"

"or"

rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

rewrite_query?: boolean

score_threshold?: number

maximum1

minimum0

source?: string | null

source_params?: SourceParams | null

exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string

r2_jurisdiction?: string

web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }

crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }

depth?: number

maximum100000

minimum1

include_external_links?: boolean

include_subdomains?: boolean

max_age?: number

maximum604800

minimum0

source?: "all" | "sitemaps" | "links"

One of the following:

"all"

"sitemaps"

"links"

parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }

content_selector?: Array<ContentSelector>

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200

minLength1

selector: string

maxLength200

minLength1

include_headers?: Record<string, string>

Up to 5 custom HTTP headers sent with each crawl request. Names must be RFC-7230 token characters (no spaces, colons, or control characters); values must be HTAB + printable ASCII (no CR/LF).

include_images?: boolean

specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is ‘sitemap’.

use_browser_rendering?: boolean

parse_type?: "sitemap" | "feed-rss" | "crawl"

One of the following:

"sitemap"

"feed-rss"

"crawl"

store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }

storage_id: string

r2_jurisdiction?: string

storage_type?: Provider { }

status?: string

sync_interval?: 900 | 1800 | 3600 | 5 more

Interval between automatic syncs, in seconds. Allowed values: 900 (15min), 1800 (30min), 3600 (1h), 7200 (2h), 14400 (4h), 21600 (6h), 43200 (12h), 86400 (24h).

One of the following:

900

1800

3600

7200

14400

21600

43200

86400

token_id?: string

formatuuid

type?: "r2" | "web-crawler" | null

One of the following:

"r2"

"web-crawler"

InstanceDeleteResponse { id, created_at, modified_at, 36 more }

id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64

minLength1

created_at: string

formatdate-time

modified_at: string

formatdate-time

ai_gateway_id?: string | null

ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

cache?: boolean

cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"

One of the following:

"super_strict_match"

"close_enough"

"flexible_friend"

"anything_goes"

cache_ttl?: 600 | 1800 | 3600 | 7 more

Cache entry TTL in seconds. Allowed values: 600 (10min), 1800 (30min), 3600 (1h), 7200 (2h), 21600 (6h), 43200 (12h), 86400 (24h), 172800 (48h), 259200 (72h), 518400 (6d).

One of the following:

600

1800

3600

7200

21600

43200

86400

172800

259200

518400

chunk_overlap?: number

maximum30

minimum0

chunk_size?: number

minimum64

created_by?: string | null

custom_metadata?: Array<CustomMetadata>

data_type: "text" | "number" | "boolean" | "datetime"

One of the following:

"text"

"number"

"boolean"

"datetime"

field_name: string

maxLength64

minLength1

embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null

One of the following:

"@cf/qwen/qwen3-embedding-0.6b"

"@cf/baai/bge-m3"

"@cf/baai/bge-large-en-v1.5"

"@cf/google/embeddinggemma-300m"

"google-ai-studio/gemini-embedding-001"

"google-ai-studio/gemini-embedding-2-preview"

"openai/text-embedding-3-small"

"openai/text-embedding-3-large"

enable?: boolean

engine_version?: number

fusion_method?: "max" | "rrf"

One of the following:

"max"

"rrf"

Deprecatedhybrid_search_enabled?: boolean

Deprecated — use index_method instead.

index_method?: IndexMethod { keyword, vector }

Controls which storage backends are used during indexing. Defaults to vector-only.

keyword: boolean

Enable keyword (BM25) storage backend.

vector: boolean

Enable vector (embedding) storage backend.

indexing_options?: IndexingOptions | null

keyword_tokenizer?: "porter" | "trigram"

One of the following:

"porter"

"trigram"

last_activity?: string | null

formatdate-time

max_num_results?: number

maximum50

minimum1

metadata?: Metadata { created_from_aisearch_wizard, worker_domain }

created_from_aisearch_wizard?: boolean

worker_domain?: string

modified_by?: string | null

namespace?: string | null

paused?: boolean

public_endpoint_id?: string | null

public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }

authorized_hosts?: Array<string>

chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }

disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean

mcp?: Mcp { description, disabled }

description?: string

disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }

period_ms?: number

maximum3600000

minimum60000

requests?: number

minimum1

technique?: "fixed" | "sliding"

One of the following:

"fixed"

"sliding"

search_endpoint?: SearchEndpoint { disabled }

disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean

reranking_model?: "@cf/baai/bge-reranker-base" | "" | null

One of the following:

"@cf/baai/bge-reranker-base"

retrieval_options?: RetrievalOptions | null

boost_by?: Array<BoostBy>

field: string

maxLength64

minLength1

direction?: "asc" | "desc" | "exists" | "not_exists"

One of the following:

"asc"

"desc"

"exists"

"not_exists"

keyword_match_mode?: "and" | "or"

One of the following:

"and"

"or"

rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 27 more | null

One of the following:

"@cf/meta/llama-3.3-70b-instruct-fp8-fast"

"@cf/zai-org/glm-4.7-flash"

"@cf/meta/llama-3.1-8b-instruct-fast"

"@cf/meta/llama-3.1-8b-instruct-fp8"

"@cf/meta/llama-4-scout-17b-16e-instruct"

"@cf/qwen/qwen3-30b-a3b-fp8"

"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"

"@cf/moonshotai/kimi-k2-instruct"

"@cf/google/gemma-3-12b-it"

"@cf/google/gemma-4-26b-a4b-it"

"@cf/moonshotai/kimi-k2.5"

"anthropic/claude-3-7-sonnet"

"anthropic/claude-sonnet-4"

"anthropic/claude-opus-4"

"anthropic/claude-3-5-haiku"

"cerebras/qwen-3-235b-a22b-instruct"

"cerebras/qwen-3-235b-a22b-thinking"

"cerebras/llama-3.3-70b"

"cerebras/llama-4-maverick-17b-128e-instruct"

"cerebras/llama-4-scout-17b-16e-instruct"

"cerebras/gpt-oss-120b"

"google-ai-studio/gemini-2.5-flash"

"google-ai-studio/gemini-2.5-pro"

"grok/grok-4"

"groq/llama-3.3-70b-versatile"

"groq/llama-3.1-8b-instant"

"openai/gpt-5"

"openai/gpt-5-mini"

"openai/gpt-5-nano"

rewrite_query?: boolean

score_threshold?: number

maximum1

minimum0

source?: string | null

source_params?: SourceParams | null

exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string

r2_jurisdiction?: string

web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }

crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }

depth?: number

maximum100000

minimum1

include_external_links?: boolean

include_subdomains?: boolean

max_age?: number

maximum604800

minimum0

source?: "all" | "sitemaps" | "links"

One of the following:

"all"

"sitemaps"

"links"

parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }

content_selector?: Array<ContentSelector>

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200

minLength1

selector: string

maxLength200

minLength1

include_headers?: Record<string, string>

Up to 5 custom HTTP headers sent with each crawl request. Names must be RFC-7230 token characters (no spaces, colons, or control characters); values must be HTAB + printable ASCII (no CR/LF).

include_images?: boolean

specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is ‘sitemap’.

use_browser_rendering?: boolean

parse_type?: "sitemap" | "feed-rss" | "crawl"

One of the following:

"sitemap"

"feed-rss"

"crawl"

store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }

storage_id: string

r2_jurisdiction?: string

storage_type?: Provider { }

status?: string

sync_interval?: 900 | 1800 | 3600 | 5 more

Interval between automatic syncs, in seconds. Allowed values: 900 (15min), 1800 (30min), 3600 (1h), 7200 (2h), 14400 (4h), 21600 (6h), 43200 (12h), 86400 (24h).

One of the following:

900

1800

3600

7200

14400

21600

43200

86400

token_id?: string

formatuuid

type?: "r2" | "web-crawler" | null

One of the following:

"r2"

"web-crawler"

InstanceStatsResponse { completed, degraded, engine, 8 more }

completed?: number

degraded?: boolean

True when status counts are unavailable (e.g. legacy stats query exceeded D1 statement-size limit). Counts are omitted in this case.

engine?: Engine { r2, vectorize }

Engine-specific metadata. Present only for managed (v3) instances.

r2?: R2 { metadataSizeBytes, objectCount, payloadSizeBytes }

R2 bucket storage usage in bytes.

metadataSizeBytes: number

objectCount: number

payloadSizeBytes: number

vectorize?: Vectorize { dimensions, vectorsCount }

Vectorize index metadata (dimensions, vector count).

dimensions: number

vectorsCount: number

error?: number

file_embed_errors?: Record<string, unknown>

index_source_errors?: Record<string, unknown>

last_activity?: string

formatdate-time

outdated?: number

queued?: number

running?: number

skipped?: number

InstanceSearchResponse { chunks, search_query }

chunks: Array<Chunk>

id: string

score: number

maximum1

minimum0

text: string

type: string

item?: Item { key, metadata, timestamp }

key: string

metadata?: Record<string, unknown>

timestamp?: number

scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }

fusion_method?: "rrf" | "max"

One of the following:

"rrf"

"max"

keyword_rank?: number

keyword_score?: number

minimum0

reranking_score?: number

maximum1

minimum0

vector_rank?: number

vector_score?: number

maximum1

minimum0

search_query: string

InstanceChatCompletionsResponse { choices, chunks, id, 2 more }

choices: Array<Choice>

message: Message { content, role }

content: string | null

role: "system" | "developer" | "user" | 2 more

One of the following:

"system"

"developer"

"user"

"assistant"

"tool"

index?: number

chunks: Array<Chunk>

id: string

score: number

maximum1

minimum0

text: string

type: string

item?: Item { key, metadata, timestamp }

key: string

metadata?: Record<string, unknown>

timestamp?: number

scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }

fusion_method?: "rrf" | "max"

One of the following:

"rrf"

"max"

keyword_rank?: number

keyword_score?: number

minimum0

reranking_score?: number

maximum1

minimum0

vector_rank?: number

vector_score?: number

maximum1

minimum0

id?: string

model?: string

object?: string

NamespacesInstancesJobs

List Jobs

client.aiSearch.namespaces.instances.jobs.list(, , , ?): V4PagePaginationArray<JobListResponse { id, source, description, 4 more } >

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/jobs

Create new job

client.aiSearch.namespaces.instances.jobs.create(, , , ?): JobCreateResponse { id, source, description, 4 more }

POST/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/jobs

Get a Job Details

client.aiSearch.namespaces.instances.jobs.get(, , , , ?): JobGetResponse { id, source, description, 4 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/jobs/{job_id}

Change Job Status

client.aiSearch.namespaces.instances.jobs.update(, , , , ?): JobUpdateResponse { id, source, description, 4 more }

PATCH/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/jobs/{job_id}

List Job Logs

client.aiSearch.namespaces.instances.jobs.logs(, , , , ?): JobLogsResponse { id, created_at, message, message_type }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/jobs/{job_id}/logs

ModelsExpand Collapse

JobListResponse { id, source, description, 4 more }

id: string

source: "user" | "schedule"

One of the following:

"user"

"schedule"

description?: string

end_reason?: string

ended_at?: string

last_seen_at?: string

started_at?: string

JobCreateResponse { id, source, description, 4 more }

id: string

source: "user" | "schedule"

One of the following:

"user"

"schedule"

description?: string

end_reason?: string

ended_at?: string

last_seen_at?: string

started_at?: string

JobGetResponse { id, source, description, 4 more }

id: string

source: "user" | "schedule"

One of the following:

"user"

"schedule"

description?: string

end_reason?: string

ended_at?: string

last_seen_at?: string

started_at?: string

JobUpdateResponse { id, source, description, 4 more }

id: string

source: "user" | "schedule"

One of the following:

"user"

"schedule"

description?: string

end_reason?: string

ended_at?: string

last_seen_at?: string

started_at?: string

JobLogsResponse = Array<JobLogsResponseItem>

id: number

created_at: number

message: string

message_type: number

NamespacesInstancesItems

Items List.

client.aiSearch.namespaces.instances.items.list(, , , ?): V4PagePaginationArray<ItemListResponse { id, checksum, chunks_count, 9 more } >

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items

Upload Item.

client.aiSearch.namespaces.instances.items.upload(, , , ?): ItemUploadResponse { id, checksum, chunks_count, 9 more }

POST/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items

Create or Update Item.

client.aiSearch.namespaces.instances.items.createOrUpdate(, , , ?): ItemCreateOrUpdateResponse { id, checksum, chunks_count, 9 more }

PUT/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items

Get Item.

client.aiSearch.namespaces.instances.items.get(, , , , ?): ItemGetResponse { id, checksum, chunks_count, 9 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}

Sync Item.

client.aiSearch.namespaces.instances.items.sync(, , , , ?): ItemSyncResponse { id, checksum, chunks_count, 9 more }

PATCH/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}

Delete Item.

client.aiSearch.namespaces.instances.items.delete(, , , , ?): ItemDeleteResponse { key }

DELETE/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}

Download Item Content.

client.aiSearch.namespaces.instances.items.download(, , , , ?): Response

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}/download

Item Logs.

client.aiSearch.namespaces.instances.items.logs(, , , , ?): ItemLogsResponse { action, chunkCount, errorType, 4 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}/logs

List Item Chunks.

client.aiSearch.namespaces.instances.items.chunks(, , , , ?): ItemChunksResponse { id, item, text, 2 more }

GET/accounts/{account_id}/ai-search/namespaces/{name}/instances/{id}/items/{item_id}/chunks

ModelsExpand Collapse

ItemListResponse { id, checksum, chunks_count, 9 more }

id: string

checksum: string

chunks_count: number | null

created_at: string

formatdate-time

file_size: number | null

key: string

last_seen_at: string

formatdate-time

namespace: string

next_action: "INDEX" | "DELETE" | null

One of the following:

"INDEX"

"DELETE"

source_id: string | null

Identifies which data source this item belongs to. “builtin” for uploaded files, “{type}:{source}” for external sources, null for legacy items.

status: "queued" | "running" | "completed" | 3 more

One of the following:

"queued"

"running"

"completed"

"error"

"skipped"

"outdated"

error?: string

ItemUploadResponse { id, checksum, chunks_count, 9 more }

id: string

checksum: string

chunks_count: number | null

created_at: string

formatdate-time

file_size: number | null

key: string

last_seen_at: string

formatdate-time

namespace: string

next_action: "INDEX" | "DELETE" | null

One of the following:

"INDEX"

"DELETE"

source_id: string | null

Identifies which data source this item belongs to. “builtin” for uploaded files, “{type}:{source}” for external sources, null for legacy items.

status: "queued" | "running" | "completed" | 3 more

One of the following:

"queued"

"running"

"completed"

"error"

"skipped"

"outdated"

error?: string

ItemCreateOrUpdateResponse { id, checksum, chunks_count, 9 more }

id: string

checksum: string

chunks_count: number | null

created_at: string

formatdate-time

file_size: number | null

key: string

last_seen_at: string

formatdate-time

namespace: string

next_action: "INDEX" | "DELETE" | null

One of the following:

"INDEX"

"DELETE"

source_id: string | null

Identifies which data source this item belongs to. “builtin” for uploaded files, “{type}:{source}” for external sources, null for legacy items.

status: "queued" | "running" | "completed" | 3 more

One of the following:

"queued"

"running"

"completed"

"error"

"skipped"

"outdated"

error?: string

ItemGetResponse { id, checksum, chunks_count, 9 more }

id: string

checksum: string

chunks_count: number | null

created_at: string

formatdate-time

file_size: number | null

key: string

last_seen_at: string

formatdate-time

namespace: string

next_action: "INDEX" | "DELETE" | null

One of the following:

"INDEX"

"DELETE"

source_id: string | null

Identifies which data source this item belongs to. “builtin” for uploaded files, “{type}:{source}” for external sources, null for legacy items.

status: "queued" | "running" | "completed" | 3 more

One of the following:

"queued"

"running"

"completed"

"error"

"skipped"

"outdated"

error?: string

ItemSyncResponse { id, checksum, chunks_count, 9 more }

id: string

checksum: string

chunks_count: number | null

created_at: string

formatdate-time

file_size: number | null

key: string

last_seen_at: string

formatdate-time

namespace: string

next_action: "INDEX" | "DELETE" | null

One of the following:

"INDEX"

"DELETE"

source_id: string | null

Identifies which data source this item belongs to. “builtin” for uploaded files, “{type}:{source}” for external sources, null for legacy items.

status: "queued" | "running" | "completed" | 3 more

One of the following:

"queued"

"running"

"completed"

"error"

"skipped"

"outdated"

error?: string

ItemDeleteResponse { key }

key: string

ItemLogsResponse = Array<ItemLogsResponseItem>

action: string

chunkCount: number | null

errorType: string | null

fileKey: string

message: string | null

processingTimeMs: number | null

timestamp: string

formatdate-time

ItemChunksResponse = Array<ItemChunksResponseItem>

id: string

item: Item { key, metadata, timestamp }

key: string

metadata?: Record<string, unknown>

timestamp?: number

text: string

end_byte?: number

start_byte?: number