Skip to content
Start here

AI Search

AI SearchInstances

List instances.
client.aiSearch.instances.list(InstanceListParams { account_id, namespace, order_by, 4 more } params, RequestOptionsoptions?): V4PagePaginationArray<InstanceListResponse { id, created_at, modified_at, 33 more } >
GET/accounts/{account_id}/ai-search/instances
Create new instances.
client.aiSearch.instances.create(InstanceCreateParams { account_id, id, ai_gateway_id, 24 more } params, RequestOptionsoptions?): InstanceCreateResponse { id, created_at, modified_at, 33 more }
POST/accounts/{account_id}/ai-search/instances
Read instances.
client.aiSearch.instances.read(stringid, InstanceReadParams { account_id } params, RequestOptionsoptions?): InstanceReadResponse { id, created_at, modified_at, 33 more }
GET/accounts/{account_id}/ai-search/instances/{id}
Update instances.
client.aiSearch.instances.update(stringid, InstanceUpdateParams { account_id, ai_gateway_id, ai_search_model, 27 more } params, RequestOptionsoptions?): InstanceUpdateResponse { id, created_at, modified_at, 33 more }
PUT/accounts/{account_id}/ai-search/instances/{id}
Delete instances.
client.aiSearch.instances.delete(stringid, InstanceDeleteParams { account_id } params, RequestOptionsoptions?): InstanceDeleteResponse { id, created_at, modified_at, 33 more }
DELETE/accounts/{account_id}/ai-search/instances/{id}
Stats
client.aiSearch.instances.stats(stringid, InstanceStatsParams { account_id } params, RequestOptionsoptions?): InstanceStatsResponse { completed, error, file_embed_errors, 6 more }
GET/accounts/{account_id}/ai-search/instances/{id}/stats
Search
client.aiSearch.instances.search(stringid, InstanceSearchParams { account_id, messages, ai_search_options } params, RequestOptionsoptions?): InstanceSearchResponse { chunks, search_query }
POST/accounts/{account_id}/ai-search/instances/{id}/search
Chat Completions
client.aiSearch.instances.chatCompletions(stringid, InstanceChatCompletionsParams { account_id, messages, ai_search_options, 2 more } params, RequestOptionsoptions?): InstanceChatCompletionsResponse { choices, chunks, id, 2 more }
POST/accounts/{account_id}/ai-search/instances/{id}/chat/completions
ModelsExpand Collapse
InstanceListResponse { id, created_at, modified_at, 33 more }
id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64
minLength1
created_at: string
formatdate-time
modified_at: string
formatdate-time
ai_gateway_id?: string | null
ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
cache?: boolean
cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"
One of the following:
"super_strict_match"
"close_enough"
"flexible_friend"
"anything_goes"
chunk_overlap?: number
maximum30
minimum0
chunk_size?: number
minimum64
created_by?: string | null
custom_metadata?: Array<CustomMetadata>
data_type: "text" | "number" | "boolean" | "datetime"
One of the following:
"text"
"number"
"boolean"
"datetime"
field_name: string
maxLength64
minLength1
embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null
One of the following:
"@cf/qwen/qwen3-embedding-0.6b"
"@cf/baai/bge-m3"
"@cf/baai/bge-large-en-v1.5"
"@cf/google/embeddinggemma-300m"
"google-ai-studio/gemini-embedding-001"
"google-ai-studio/gemini-embedding-2-preview"
"openai/text-embedding-3-small"
"openai/text-embedding-3-large"
""
enable?: boolean
engine_version?: number
fusion_method?: "max" | "rrf"
One of the following:
"max"
"rrf"
hybrid_search_enabled?: boolean
indexing_options?: IndexingOptions | null
keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:
"porter"
"trigram"
last_activity?: string | null
formatdate-time
max_num_results?: number
maximum50
minimum1
metadata?: Metadata { created_from_aisearch_wizard, worker_domain }
created_from_aisearch_wizard?: boolean
worker_domain?: string
modified_by?: string | null
namespace?: string | null
maxLength32
minLength1
paused?: boolean
public_endpoint_id?: string | null
public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }
authorized_hosts?: Array<string>
chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }
disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean
mcp?: Mcp { description, disabled }
description?: string
disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }
period_ms?: number
maximum3600000
minimum60000
requests?: number
minimum1
technique?: "fixed" | "sliding"
One of the following:
"fixed"
"sliding"
search_endpoint?: SearchEndpoint { disabled }
disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean
reranking_model?: "@cf/baai/bge-reranker-base" | "" | null
One of the following:
"@cf/baai/bge-reranker-base"
""
retrieval_options?: RetrievalOptions | null
boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to 'asc' for numeric fields and 'exists' for text/boolean fields. Fields must match 'timestamp' or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use 'timestamp' for document freshness, or any custom_metadata field. Numeric and datetime fields support asc/desc directions; text/boolean fields support exists/not_exists.

maxLength64
minLength1
direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. 'desc' = higher values rank higher (e.g. newer timestamps). 'asc' = lower values rank higher. 'exists' = boost chunks that have the field. 'not_exists' = boost chunks that lack the field. Optional ��� defaults to 'asc' for numeric/datetime fields, 'exists' for text/boolean fields.

One of the following:
"asc"
"desc"
"exists"
"not_exists"
keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. 'and' restricts candidates to documents containing all query terms; 'or' includes any document containing at least one term, ranked by BM25 relevance. Defaults to 'and'. Legacy values 'exact_match' and 'fuzzy_match' are accepted and map to 'and' and 'or' respectively.

One of the following:
"and"
"or"
rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
rewrite_query?: boolean
score_threshold?: number
maximum1
minimum0
source?: string | null
source_params?: SourceParams | null
exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string
r2_jurisdiction?: string
web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }
crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }
depth?: number
maximum100000
minimum1
include_subdomains?: boolean
max_age?: number
maximum604800
minimum0
source?: "all" | "sitemaps" | "links"
One of the following:
"all"
"sitemaps"
"links"
parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }
content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200
selector: string

CSS selector to extract content from pages matching the path pattern. Supports standard CSS selectors including class, ID, element, and attribute selectors.

maxLength200
include_headers?: Record<string, string>
include_images?: boolean
specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is 'sitemap'.

use_browser_rendering?: boolean
parse_type?: "sitemap" | "feed-rss" | "crawl"
One of the following:
"sitemap"
"feed-rss"
"crawl"
store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }
storage_id: string
r2_jurisdiction?: string
storage_type?: Provider { }
status?: string
token_id?: string
formatuuid
type?: "r2" | "web-crawler" | null
One of the following:
"r2"
"web-crawler"
InstanceCreateResponse { id, created_at, modified_at, 33 more }
id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64
minLength1
created_at: string
formatdate-time
modified_at: string
formatdate-time
ai_gateway_id?: string | null
ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
cache?: boolean
cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"
One of the following:
"super_strict_match"
"close_enough"
"flexible_friend"
"anything_goes"
chunk_overlap?: number
maximum30
minimum0
chunk_size?: number
minimum64
created_by?: string | null
custom_metadata?: Array<CustomMetadata>
data_type: "text" | "number" | "boolean" | "datetime"
One of the following:
"text"
"number"
"boolean"
"datetime"
field_name: string
maxLength64
minLength1
embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null
One of the following:
"@cf/qwen/qwen3-embedding-0.6b"
"@cf/baai/bge-m3"
"@cf/baai/bge-large-en-v1.5"
"@cf/google/embeddinggemma-300m"
"google-ai-studio/gemini-embedding-001"
"google-ai-studio/gemini-embedding-2-preview"
"openai/text-embedding-3-small"
"openai/text-embedding-3-large"
""
enable?: boolean
engine_version?: number
fusion_method?: "max" | "rrf"
One of the following:
"max"
"rrf"
hybrid_search_enabled?: boolean
indexing_options?: IndexingOptions | null
keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:
"porter"
"trigram"
last_activity?: string | null
formatdate-time
max_num_results?: number
maximum50
minimum1
metadata?: Metadata { created_from_aisearch_wizard, worker_domain }
created_from_aisearch_wizard?: boolean
worker_domain?: string
modified_by?: string | null
namespace?: string | null
maxLength32
minLength1
paused?: boolean
public_endpoint_id?: string | null
public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }
authorized_hosts?: Array<string>
chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }
disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean
mcp?: Mcp { description, disabled }
description?: string
disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }
period_ms?: number
maximum3600000
minimum60000
requests?: number
minimum1
technique?: "fixed" | "sliding"
One of the following:
"fixed"
"sliding"
search_endpoint?: SearchEndpoint { disabled }
disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean
reranking_model?: "@cf/baai/bge-reranker-base" | "" | null
One of the following:
"@cf/baai/bge-reranker-base"
""
retrieval_options?: RetrievalOptions | null
boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to 'asc' for numeric fields and 'exists' for text/boolean fields. Fields must match 'timestamp' or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use 'timestamp' for document freshness, or any custom_metadata field. Numeric and datetime fields support asc/desc directions; text/boolean fields support exists/not_exists.

maxLength64
minLength1
direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. 'desc' = higher values rank higher (e.g. newer timestamps). 'asc' = lower values rank higher. 'exists' = boost chunks that have the field. 'not_exists' = boost chunks that lack the field. Optional ��� defaults to 'asc' for numeric/datetime fields, 'exists' for text/boolean fields.

One of the following:
"asc"
"desc"
"exists"
"not_exists"
keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. 'and' restricts candidates to documents containing all query terms; 'or' includes any document containing at least one term, ranked by BM25 relevance. Defaults to 'and'. Legacy values 'exact_match' and 'fuzzy_match' are accepted and map to 'and' and 'or' respectively.

One of the following:
"and"
"or"
rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
rewrite_query?: boolean
score_threshold?: number
maximum1
minimum0
source?: string | null
source_params?: SourceParams | null
exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string
r2_jurisdiction?: string
web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }
crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }
depth?: number
maximum100000
minimum1
include_subdomains?: boolean
max_age?: number
maximum604800
minimum0
source?: "all" | "sitemaps" | "links"
One of the following:
"all"
"sitemaps"
"links"
parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }
content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200
selector: string

CSS selector to extract content from pages matching the path pattern. Supports standard CSS selectors including class, ID, element, and attribute selectors.

maxLength200
include_headers?: Record<string, string>
include_images?: boolean
specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is 'sitemap'.

use_browser_rendering?: boolean
parse_type?: "sitemap" | "feed-rss" | "crawl"
One of the following:
"sitemap"
"feed-rss"
"crawl"
store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }
storage_id: string
r2_jurisdiction?: string
storage_type?: Provider { }
status?: string
token_id?: string
formatuuid
type?: "r2" | "web-crawler" | null
One of the following:
"r2"
"web-crawler"
InstanceReadResponse { id, created_at, modified_at, 33 more }
id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64
minLength1
created_at: string
formatdate-time
modified_at: string
formatdate-time
ai_gateway_id?: string | null
ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
cache?: boolean
cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"
One of the following:
"super_strict_match"
"close_enough"
"flexible_friend"
"anything_goes"
chunk_overlap?: number
maximum30
minimum0
chunk_size?: number
minimum64
created_by?: string | null
custom_metadata?: Array<CustomMetadata>
data_type: "text" | "number" | "boolean" | "datetime"
One of the following:
"text"
"number"
"boolean"
"datetime"
field_name: string
maxLength64
minLength1
embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null
One of the following:
"@cf/qwen/qwen3-embedding-0.6b"
"@cf/baai/bge-m3"
"@cf/baai/bge-large-en-v1.5"
"@cf/google/embeddinggemma-300m"
"google-ai-studio/gemini-embedding-001"
"google-ai-studio/gemini-embedding-2-preview"
"openai/text-embedding-3-small"
"openai/text-embedding-3-large"
""
enable?: boolean
engine_version?: number
fusion_method?: "max" | "rrf"
One of the following:
"max"
"rrf"
hybrid_search_enabled?: boolean
indexing_options?: IndexingOptions | null
keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:
"porter"
"trigram"
last_activity?: string | null
formatdate-time
max_num_results?: number
maximum50
minimum1
metadata?: Metadata { created_from_aisearch_wizard, worker_domain }
created_from_aisearch_wizard?: boolean
worker_domain?: string
modified_by?: string | null
namespace?: string | null
maxLength32
minLength1
paused?: boolean
public_endpoint_id?: string | null
public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }
authorized_hosts?: Array<string>
chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }
disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean
mcp?: Mcp { description, disabled }
description?: string
disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }
period_ms?: number
maximum3600000
minimum60000
requests?: number
minimum1
technique?: "fixed" | "sliding"
One of the following:
"fixed"
"sliding"
search_endpoint?: SearchEndpoint { disabled }
disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean
reranking_model?: "@cf/baai/bge-reranker-base" | "" | null
One of the following:
"@cf/baai/bge-reranker-base"
""
retrieval_options?: RetrievalOptions | null
boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to 'asc' for numeric fields and 'exists' for text/boolean fields. Fields must match 'timestamp' or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use 'timestamp' for document freshness, or any custom_metadata field. Numeric and datetime fields support asc/desc directions; text/boolean fields support exists/not_exists.

maxLength64
minLength1
direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. 'desc' = higher values rank higher (e.g. newer timestamps). 'asc' = lower values rank higher. 'exists' = boost chunks that have the field. 'not_exists' = boost chunks that lack the field. Optional ��� defaults to 'asc' for numeric/datetime fields, 'exists' for text/boolean fields.

One of the following:
"asc"
"desc"
"exists"
"not_exists"
keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. 'and' restricts candidates to documents containing all query terms; 'or' includes any document containing at least one term, ranked by BM25 relevance. Defaults to 'and'. Legacy values 'exact_match' and 'fuzzy_match' are accepted and map to 'and' and 'or' respectively.

One of the following:
"and"
"or"
rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
rewrite_query?: boolean
score_threshold?: number
maximum1
minimum0
source?: string | null
source_params?: SourceParams | null
exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string
r2_jurisdiction?: string
web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }
crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }
depth?: number
maximum100000
minimum1
include_subdomains?: boolean
max_age?: number
maximum604800
minimum0
source?: "all" | "sitemaps" | "links"
One of the following:
"all"
"sitemaps"
"links"
parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }
content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200
selector: string

CSS selector to extract content from pages matching the path pattern. Supports standard CSS selectors including class, ID, element, and attribute selectors.

maxLength200
include_headers?: Record<string, string>
include_images?: boolean
specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is 'sitemap'.

use_browser_rendering?: boolean
parse_type?: "sitemap" | "feed-rss" | "crawl"
One of the following:
"sitemap"
"feed-rss"
"crawl"
store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }
storage_id: string
r2_jurisdiction?: string
storage_type?: Provider { }
status?: string
token_id?: string
formatuuid
type?: "r2" | "web-crawler" | null
One of the following:
"r2"
"web-crawler"
InstanceUpdateResponse { id, created_at, modified_at, 33 more }
id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64
minLength1
created_at: string
formatdate-time
modified_at: string
formatdate-time
ai_gateway_id?: string | null
ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
cache?: boolean
cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"
One of the following:
"super_strict_match"
"close_enough"
"flexible_friend"
"anything_goes"
chunk_overlap?: number
maximum30
minimum0
chunk_size?: number
minimum64
created_by?: string | null
custom_metadata?: Array<CustomMetadata>
data_type: "text" | "number" | "boolean" | "datetime"
One of the following:
"text"
"number"
"boolean"
"datetime"
field_name: string
maxLength64
minLength1
embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null
One of the following:
"@cf/qwen/qwen3-embedding-0.6b"
"@cf/baai/bge-m3"
"@cf/baai/bge-large-en-v1.5"
"@cf/google/embeddinggemma-300m"
"google-ai-studio/gemini-embedding-001"
"google-ai-studio/gemini-embedding-2-preview"
"openai/text-embedding-3-small"
"openai/text-embedding-3-large"
""
enable?: boolean
engine_version?: number
fusion_method?: "max" | "rrf"
One of the following:
"max"
"rrf"
hybrid_search_enabled?: boolean
indexing_options?: IndexingOptions | null
keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:
"porter"
"trigram"
last_activity?: string | null
formatdate-time
max_num_results?: number
maximum50
minimum1
metadata?: Metadata { created_from_aisearch_wizard, worker_domain }
created_from_aisearch_wizard?: boolean
worker_domain?: string
modified_by?: string | null
namespace?: string | null
maxLength32
minLength1
paused?: boolean
public_endpoint_id?: string | null
public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }
authorized_hosts?: Array<string>
chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }
disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean
mcp?: Mcp { description, disabled }
description?: string
disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }
period_ms?: number
maximum3600000
minimum60000
requests?: number
minimum1
technique?: "fixed" | "sliding"
One of the following:
"fixed"
"sliding"
search_endpoint?: SearchEndpoint { disabled }
disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean
reranking_model?: "@cf/baai/bge-reranker-base" | "" | null
One of the following:
"@cf/baai/bge-reranker-base"
""
retrieval_options?: RetrievalOptions | null
boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to 'asc' for numeric fields and 'exists' for text/boolean fields. Fields must match 'timestamp' or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use 'timestamp' for document freshness, or any custom_metadata field. Numeric and datetime fields support asc/desc directions; text/boolean fields support exists/not_exists.

maxLength64
minLength1
direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. 'desc' = higher values rank higher (e.g. newer timestamps). 'asc' = lower values rank higher. 'exists' = boost chunks that have the field. 'not_exists' = boost chunks that lack the field. Optional ��� defaults to 'asc' for numeric/datetime fields, 'exists' for text/boolean fields.

One of the following:
"asc"
"desc"
"exists"
"not_exists"
keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. 'and' restricts candidates to documents containing all query terms; 'or' includes any document containing at least one term, ranked by BM25 relevance. Defaults to 'and'. Legacy values 'exact_match' and 'fuzzy_match' are accepted and map to 'and' and 'or' respectively.

One of the following:
"and"
"or"
rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
rewrite_query?: boolean
score_threshold?: number
maximum1
minimum0
source?: string | null
source_params?: SourceParams | null
exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string
r2_jurisdiction?: string
web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }
crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }
depth?: number
maximum100000
minimum1
include_subdomains?: boolean
max_age?: number
maximum604800
minimum0
source?: "all" | "sitemaps" | "links"
One of the following:
"all"
"sitemaps"
"links"
parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }
content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200
selector: string

CSS selector to extract content from pages matching the path pattern. Supports standard CSS selectors including class, ID, element, and attribute selectors.

maxLength200
include_headers?: Record<string, string>
include_images?: boolean
specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is 'sitemap'.

use_browser_rendering?: boolean
parse_type?: "sitemap" | "feed-rss" | "crawl"
One of the following:
"sitemap"
"feed-rss"
"crawl"
store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }
storage_id: string
r2_jurisdiction?: string
storage_type?: Provider { }
status?: string
token_id?: string
formatuuid
type?: "r2" | "web-crawler" | null
One of the following:
"r2"
"web-crawler"
InstanceDeleteResponse { id, created_at, modified_at, 33 more }
id: string

AI Search instance ID. Lowercase alphanumeric, hyphens, and underscores.

maxLength64
minLength1
created_at: string
formatdate-time
modified_at: string
formatdate-time
ai_gateway_id?: string | null
ai_search_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
cache?: boolean
cache_threshold?: "super_strict_match" | "close_enough" | "flexible_friend" | "anything_goes"
One of the following:
"super_strict_match"
"close_enough"
"flexible_friend"
"anything_goes"
chunk_overlap?: number
maximum30
minimum0
chunk_size?: number
minimum64
created_by?: string | null
custom_metadata?: Array<CustomMetadata>
data_type: "text" | "number" | "boolean" | "datetime"
One of the following:
"text"
"number"
"boolean"
"datetime"
field_name: string
maxLength64
minLength1
embedding_model?: "@cf/qwen/qwen3-embedding-0.6b" | "@cf/baai/bge-m3" | "@cf/baai/bge-large-en-v1.5" | 6 more | null
One of the following:
"@cf/qwen/qwen3-embedding-0.6b"
"@cf/baai/bge-m3"
"@cf/baai/bge-large-en-v1.5"
"@cf/google/embeddinggemma-300m"
"google-ai-studio/gemini-embedding-001"
"google-ai-studio/gemini-embedding-2-preview"
"openai/text-embedding-3-small"
"openai/text-embedding-3-large"
""
enable?: boolean
engine_version?: number
fusion_method?: "max" | "rrf"
One of the following:
"max"
"rrf"
hybrid_search_enabled?: boolean
indexing_options?: IndexingOptions | null
keyword_tokenizer?: "porter" | "trigram"

Tokenizer used for keyword search indexing. porter provides word-level tokenization with Porter stemming (good for natural language queries). trigram enables character-level substring matching (good for partial matches, code, identifiers). Changing this triggers a full re-index. Defaults to porter.

One of the following:
"porter"
"trigram"
last_activity?: string | null
formatdate-time
max_num_results?: number
maximum50
minimum1
metadata?: Metadata { created_from_aisearch_wizard, worker_domain }
created_from_aisearch_wizard?: boolean
worker_domain?: string
modified_by?: string | null
namespace?: string | null
maxLength32
minLength1
paused?: boolean
public_endpoint_id?: string | null
public_endpoint_params?: PublicEndpointParams { authorized_hosts, chat_completions_endpoint, enabled, 3 more }
authorized_hosts?: Array<string>
chat_completions_endpoint?: ChatCompletionsEndpoint { disabled }
disabled?: boolean

Disable chat completions endpoint for this public endpoint

enabled?: boolean
mcp?: Mcp { description, disabled }
description?: string
disabled?: boolean

Disable MCP endpoint for this public endpoint

rate_limit?: RateLimit { period_ms, requests, technique }
period_ms?: number
maximum3600000
minimum60000
requests?: number
minimum1
technique?: "fixed" | "sliding"
One of the following:
"fixed"
"sliding"
search_endpoint?: SearchEndpoint { disabled }
disabled?: boolean

Disable search endpoint for this public endpoint

reranking?: boolean
reranking_model?: "@cf/baai/bge-reranker-base" | "" | null
One of the following:
"@cf/baai/bge-reranker-base"
""
retrieval_options?: RetrievalOptions | null
boost_by?: Array<BoostBy>

Metadata fields to boost search results by. Each entry specifies a metadata field and an optional direction. Direction defaults to 'asc' for numeric fields and 'exists' for text/boolean fields. Fields must match 'timestamp' or a defined custom_metadata field.

field: string

Metadata field name to boost by. Use 'timestamp' for document freshness, or any custom_metadata field. Numeric and datetime fields support asc/desc directions; text/boolean fields support exists/not_exists.

maxLength64
minLength1
direction?: "asc" | "desc" | "exists" | "not_exists"

Boost direction. 'desc' = higher values rank higher (e.g. newer timestamps). 'asc' = lower values rank higher. 'exists' = boost chunks that have the field. 'not_exists' = boost chunks that lack the field. Optional ��� defaults to 'asc' for numeric/datetime fields, 'exists' for text/boolean fields.

One of the following:
"asc"
"desc"
"exists"
"not_exists"
keyword_match_mode?: "and" | "or"

Controls which documents are candidates for BM25 scoring. 'and' restricts candidates to documents containing all query terms; 'or' includes any document containing at least one term, ranked by BM25 relevance. Defaults to 'and'. Legacy values 'exact_match' and 'fuzzy_match' are accepted and map to 'and' and 'or' respectively.

One of the following:
"and"
"or"
rewrite_model?: "@cf/meta/llama-3.3-70b-instruct-fp8-fast" | "@cf/zai-org/glm-4.7-flash" | "@cf/meta/llama-3.1-8b-instruct-fast" | 25 more | null
One of the following:
"@cf/meta/llama-3.3-70b-instruct-fp8-fast"
"@cf/zai-org/glm-4.7-flash"
"@cf/meta/llama-3.1-8b-instruct-fast"
"@cf/meta/llama-3.1-8b-instruct-fp8"
"@cf/meta/llama-4-scout-17b-16e-instruct"
"@cf/qwen/qwen3-30b-a3b-fp8"
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
"@cf/moonshotai/kimi-k2-instruct"
"@cf/google/gemma-3-12b-it"
"anthropic/claude-3-7-sonnet"
"anthropic/claude-sonnet-4"
"anthropic/claude-opus-4"
"anthropic/claude-3-5-haiku"
"cerebras/qwen-3-235b-a22b-instruct"
"cerebras/qwen-3-235b-a22b-thinking"
"cerebras/llama-3.3-70b"
"cerebras/llama-4-maverick-17b-128e-instruct"
"cerebras/llama-4-scout-17b-16e-instruct"
"cerebras/gpt-oss-120b"
"google-ai-studio/gemini-2.5-flash"
"google-ai-studio/gemini-2.5-pro"
"grok/grok-4"
"groq/llama-3.3-70b-versatile"
"groq/llama-3.1-8b-instant"
"openai/gpt-5"
"openai/gpt-5-mini"
"openai/gpt-5-nano"
""
rewrite_query?: boolean
score_threshold?: number
maximum1
minimum0
source?: string | null
source_params?: SourceParams | null
exclude_items?: Array<string>

List of path patterns to exclude. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /admin/** matches /admin/users and /admin/settings/advanced)

include_items?: Array<string>

List of path patterns to include. Uses micromatch glob syntax: * matches within a path segment, ** matches across path segments (e.g., /blog/** matches /blog/post and /blog/2024/post)

prefix?: string
r2_jurisdiction?: string
web_crawler?: WebCrawler { crawl_options, parse_options, parse_type, store_options }
crawl_options?: CrawlOptions { depth, include_external_links, include_subdomains, 2 more }
depth?: number
maximum100000
minimum1
include_subdomains?: boolean
max_age?: number
maximum604800
minimum0
source?: "all" | "sitemaps" | "links"
One of the following:
"all"
"sitemaps"
"links"
parse_options?: ParseOptions { content_selector, include_headers, include_images, 2 more }
content_selector?: Array<ContentSelector>

List of path-to-selector mappings for extracting specific content from crawled pages. Each entry pairs a URL glob pattern with a CSS selector. The first matching path wins. Only the matched HTML fragment is stored and indexed.

path: string

Glob pattern to match against the page URL path. Uses standard glob syntax: * matches within a segment, ** crosses directories.

maxLength200
selector: string

CSS selector to extract content from pages matching the path pattern. Supports standard CSS selectors including class, ID, element, and attribute selectors.

maxLength200
include_headers?: Record<string, string>
include_images?: boolean
specific_sitemaps?: Array<string>

List of specific sitemap URLs to use for crawling. Only valid when parse_type is 'sitemap'.

use_browser_rendering?: boolean
parse_type?: "sitemap" | "feed-rss" | "crawl"
One of the following:
"sitemap"
"feed-rss"
"crawl"
store_options?: StoreOptions { storage_id, r2_jurisdiction, storage_type }
storage_id: string
r2_jurisdiction?: string
storage_type?: Provider { }
status?: string
token_id?: string
formatuuid
type?: "r2" | "web-crawler" | null
One of the following:
"r2"
"web-crawler"
InstanceStatsResponse { completed, error, file_embed_errors, 6 more }
completed?: number
error?: number
file_embed_errors?: Record<string, unknown>
index_source_errors?: Record<string, unknown>
last_activity?: string
formatdate-time
outdated?: number
queued?: number
running?: number
skipped?: number
InstanceSearchResponse { chunks, search_query }
chunks: Array<Chunk>
id: string
score: number
maximum1
minimum0
text: string
type: string
item?: Item { key, metadata, timestamp }
key: string
metadata?: Record<string, unknown>
timestamp?: number
scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }
fusion_method?: "rrf" | "max"
One of the following:
"rrf"
"max"
keyword_rank?: number
keyword_score?: number
minimum0
reranking_score?: number
maximum1
minimum0
vector_rank?: number
vector_score?: number
maximum1
minimum0
search_query: string
InstanceChatCompletionsResponse { choices, chunks, id, 2 more }
choices: Array<Choice>
message: Message { content, role }
content: string | null
role: "system" | "developer" | "user" | 2 more
One of the following:
"system"
"developer"
"user"
"assistant"
"tool"
index?: number
chunks: Array<Chunk>
id: string
score: number
maximum1
minimum0
text: string
type: string
item?: Item { key, metadata, timestamp }
key: string
metadata?: Record<string, unknown>
timestamp?: number
scoring_details?: ScoringDetails { fusion_method, keyword_rank, keyword_score, 3 more }
fusion_method?: "rrf" | "max"
One of the following:
"rrf"
"max"
keyword_rank?: number
keyword_score?: number
minimum0
reranking_score?: number
maximum1
minimum0
vector_rank?: number
vector_score?: number
maximum1
minimum0
id?: string
model?: string
object?: string

AI SearchInstancesItems

AI SearchInstancesJobs

List Jobs
client.aiSearch.instances.jobs.list(stringid, JobListParams { account_id, page, per_page } params, RequestOptionsoptions?): V4PagePaginationArray<JobListResponse { id, source, description, 4 more } >
GET/accounts/{account_id}/ai-search/instances/{id}/jobs
Create new job
client.aiSearch.instances.jobs.create(stringid, JobCreateParams { account_id, description } params, RequestOptionsoptions?): JobCreateResponse { id, source, description, 4 more }
POST/accounts/{account_id}/ai-search/instances/{id}/jobs
Get a Job Details
client.aiSearch.instances.jobs.get(stringid, stringjobId, JobGetParams { account_id } params, RequestOptionsoptions?): JobGetResponse { id, source, description, 4 more }
GET/accounts/{account_id}/ai-search/instances/{id}/jobs/{job_id}
List Job Logs
client.aiSearch.instances.jobs.logs(stringid, stringjobId, JobLogsParams { account_id, page, per_page } params, RequestOptionsoptions?): JobLogsResponse { id, created_at, message, message_type }
GET/accounts/{account_id}/ai-search/instances/{id}/jobs/{job_id}/logs
ModelsExpand Collapse
JobListResponse { id, source, description, 4 more }
id: string
source: "user" | "schedule"
One of the following:
"user"
"schedule"
description?: string
end_reason?: string
ended_at?: string
last_seen_at?: string
started_at?: string
JobCreateResponse { id, source, description, 4 more }
id: string
source: "user" | "schedule"
One of the following:
"user"
"schedule"
description?: string
end_reason?: string
ended_at?: string
last_seen_at?: string
started_at?: string
JobGetResponse { id, source, description, 4 more }
id: string
source: "user" | "schedule"
One of the following:
"user"
"schedule"
description?: string
end_reason?: string
ended_at?: string
last_seen_at?: string
started_at?: string
JobLogsResponse = Array<JobLogsResponseItem>
id: number
created_at: number
message: string
message_type: number

AI SearchTokens

List tokens.
client.aiSearch.tokens.list(TokenListParams { account_id, order_by, order_by_direction, 2 more } params, RequestOptionsoptions?): V4PagePaginationArray<TokenListResponse { id, cf_api_id, created_at, 6 more } >
GET/accounts/{account_id}/ai-search/tokens
Create new tokens.
client.aiSearch.tokens.create(TokenCreateParams { account_id, cf_api_id, cf_api_key, name } params, RequestOptionsoptions?): TokenCreateResponse { id, cf_api_id, created_at, 6 more }
POST/accounts/{account_id}/ai-search/tokens
Read tokens.
client.aiSearch.tokens.read(stringid, TokenReadParams { account_id } params, RequestOptionsoptions?): TokenReadResponse { id, cf_api_id, created_at, 6 more }
GET/accounts/{account_id}/ai-search/tokens/{id}
Update tokens.
client.aiSearch.tokens.update(stringid, TokenUpdateParams { account_id, cf_api_id, cf_api_key, name } params, RequestOptionsoptions?): TokenUpdateResponse { id, cf_api_id, created_at, 6 more }
PUT/accounts/{account_id}/ai-search/tokens/{id}
Delete tokens.
client.aiSearch.tokens.delete(stringid, TokenDeleteParams { account_id } params, RequestOptionsoptions?): TokenDeleteResponse { id, cf_api_id, created_at, 6 more }
DELETE/accounts/{account_id}/ai-search/tokens/{id}
ModelsExpand Collapse
TokenListResponse { id, cf_api_id, created_at, 6 more }
id: string
formatuuid
cf_api_id: string
created_at: string
formatdate-time
modified_at: string
formatdate-time
name: string
created_by?: string | null
enabled?: boolean
legacy?: boolean
modified_by?: string | null
TokenCreateResponse { id, cf_api_id, created_at, 6 more }
id: string
formatuuid
cf_api_id: string
created_at: string
formatdate-time
modified_at: string
formatdate-time
name: string
created_by?: string | null
enabled?: boolean
legacy?: boolean
modified_by?: string | null
TokenReadResponse { id, cf_api_id, created_at, 6 more }
id: string
formatuuid
cf_api_id: string
created_at: string
formatdate-time
modified_at: string
formatdate-time
name: string
created_by?: string | null
enabled?: boolean
legacy?: boolean
modified_by?: string | null
TokenUpdateResponse { id, cf_api_id, created_at, 6 more }
id: string
formatuuid
cf_api_id: string
created_at: string
formatdate-time
modified_at: string
formatdate-time
name: string
created_by?: string | null
enabled?: boolean
legacy?: boolean
modified_by?: string | null
TokenDeleteResponse { id, cf_api_id, created_at, 6 more }
id: string
formatuuid
cf_api_id: string
created_at: string
formatdate-time
modified_at: string
formatdate-time
name: string
created_by?: string | null
enabled?: boolean
legacy?: boolean
modified_by?: string | null