glm-4.7-flash
Text Generation • zai-orgGLM-4.7-Flash is a fast and efficient multilingual text generation model with a 131,072 token context window. Optimized for dialogue, instruction-following, and multi-turn tool calling across 100+ languages.
| Model Info | |
|---|---|
| Context Window ↗ | 131,072 tokens |
| Function calling ↗ | Yes |
| Unit Pricing | $0.06 per M input tokens, $0.40 per M output tokens |
Playground
Try out this model with Workers AI LLM Playground. It does not require any setup or authentication and an instant way to preview and test a model directly in the browser.
Launch the LLM PlaygroundUsage
export interface Env { AI: Ai;}
export default { async fetch(request, env): Promise<Response> {
const messages = [ { role: "system", content: "You are a friendly assistant" }, { role: "user", content: "What is the origin of the phrase Hello, World", }, ];
const stream = await env.AI.run("@cf/zai-org/glm-4.7-flash", { messages, stream: true, });
return new Response(stream, { headers: { "content-type": "text/event-stream" }, }); },} satisfies ExportedHandler<Env>;export interface Env { AI: Ai;}
export default { async fetch(request, env): Promise<Response> {
const messages = [ { role: "system", content: "You are a friendly assistant" }, { role: "user", content: "What is the origin of the phrase Hello, World", }, ]; const response = await env.AI.run("@cf/zai-org/glm-4.7-flash", { messages });
return Response.json(response); },} satisfies ExportedHandler<Env>;import osimport requests
ACCOUNT_ID = "your-account-id"AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
prompt = "Tell me all about PEP-8"response = requests.post( f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/zai-org/glm-4.7-flash", headers={"Authorization": f"Bearer {AUTH_TOKEN}"}, json={ "messages": [ {"role": "system", "content": "You are a friendly assistant"}, {"role": "user", "content": prompt} ] })result = response.json()print(result)curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/run/@cf/zai-org/glm-4.7-flash \ -X POST \ -H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \ -d '{ "messages": [{ "role": "system", "content": "You are a friendly assistant" }, { "role": "user", "content": "Why is pizza so good" }]}'Parameters
* indicates a required field
Input
-
0object-
promptstring required min 1The input text prompt for the model to generate a response.
-
modelstringID of the model to use (e.g. '@cf/zai-org/glm-4.7-flash, etc').
-
audioobjectParameters for audio output. Required when modalities includes 'audio'.
-
voiceone of required-
0string -
1object-
idstring required
-
-
-
formatstring required
-
-
frequency_penalty-
0number 0 min -2 max 2Penalizes new tokens based on their existing frequency in the text so far.
-
1null 0Penalizes new tokens based on their existing frequency in the text so far.
-
-
logit_bias-
0objectModify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100.
-
1nullModify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100.
-
-
logprobs-
0booleanWhether to return log probabilities of the output tokens.
-
1nullWhether to return log probabilities of the output tokens.
-
-
top_logprobs-
0integer min 0 max 20How many top log probabilities to return at each token position (0-20). Requires logprobs=true.
-
1nullHow many top log probabilities to return at each token position (0-20). Requires logprobs=true.
-
-
max_tokens-
0integerDeprecated in favor of max_completion_tokens. The maximum number of tokens to generate.
-
1nullDeprecated in favor of max_completion_tokens. The maximum number of tokens to generate.
-
-
max_completion_tokens-
0integerAn upper bound for the number of tokens that can be generated for a completion.
-
1nullAn upper bound for the number of tokens that can be generated for a completion.
-
-
metadata-
0objectSet of 16 key-value pairs that can be attached to the object.
-
1nullSet of 16 key-value pairs that can be attached to the object.
-
-
modalities-
0arrayOutput types requested from the model (e.g. ['text'] or ['text', 'audio']).
-
itemsstring
-
-
1nullOutput types requested from the model (e.g. ['text'] or ['text', 'audio']).
-
-
n-
0integer default 1 min 1 max 128How many chat completion choices to generate for each input message.
-
1null default 1How many chat completion choices to generate for each input message.
-
-
parallel_tool_callsboolean default trueWhether to enable parallel function calling during tool use.
-
predictionobject-
typestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring required
-
-
-
-
-
presence_penalty-
0number 0 min -2 max 2Penalizes new tokens based on whether they appear in the text so far.
-
1null 0Penalizes new tokens based on whether they appear in the text so far.
-
-
reasoning_effort-
0stringConstrains effort on reasoning for reasoning models (o1, o3-mini, etc.).
-
1nullConstrains effort on reasoning for reasoning models (o1, o3-mini, etc.).
-
-
response_formatone ofSpecifies the format the model must output.
-
0object-
typestring required
-
-
1object-
typestring required
-
-
2object-
typestring required -
json_schemaobject required-
namestring required -
descriptionstring -
schemaobject -
strict-
0boolean -
1null
-
-
-
-
-
seed-
0integerIf specified, the system will make a best effort to sample deterministically.
-
1nullIf specified, the system will make a best effort to sample deterministically.
-
-
service_tier-
0string default autoSpecifies the processing type used for serving the request.
-
1null default autoSpecifies the processing type used for serving the request.
-
-
stop-
0nullUp to 4 sequences where the API will stop generating further tokens.
-
1stringUp to 4 sequences where the API will stop generating further tokens.
-
2arrayUp to 4 sequences where the API will stop generating further tokens.
-
itemsstring
-
-
-
store-
0booleanWhether to store the output for model distillation / evals.
-
1nullWhether to store the output for model distillation / evals.
-
-
stream-
0booleanIf true, partial message deltas will be sent as server-sent events.
-
1nullIf true, partial message deltas will be sent as server-sent events.
-
-
stream_optionsobject-
include_usageboolean -
include_obfuscationboolean
-
-
temperature-
0number default 1 min 0 max 2Sampling temperature between 0 and 2.
-
1null default 1Sampling temperature between 0 and 2.
-
-
tool_choiceone ofControls which (if any) tool is called by the model. 'none' = no tools, 'auto' = model decides, 'required' = must call a tool.
-
0string -
1objectForce a specific function tool.
-
typestring required -
functionobject required-
namestring required
-
-
-
2objectForce a specific custom tool.
-
typestring required -
customobject required-
namestring required
-
-
-
3objectConstrain to an allowed subset of tools.
-
typestring required -
allowed_toolsobject required-
modestring required -
toolsarray required-
itemsobject
-
-
-
-
-
toolsarrayA list of tools the model may call.
-
itemsone of-
0object-
typestring required -
functionobject required-
namestring requiredThe name of the function to be called.
-
descriptionstringA description of what the function does.
-
parametersobjectThe parameters the function accepts, described as a JSON Schema object.
-
strict-
0booleanWhether to enable strict schema adherence.
-
1nullWhether to enable strict schema adherence.
-
-
-
-
1object-
typestring required -
customobject required-
namestring required -
descriptionstring -
formatone of-
0object-
typestring required
-
-
1object-
typestring required -
grammarobject required-
definitionstring required -
syntaxstring required
-
-
-
-
-
-
-
-
top_p-
0number default 1 min 0 max 1Nucleus sampling: considers the results of the tokens with top_p probability mass.
-
1null default 1Nucleus sampling: considers the results of the tokens with top_p probability mass.
-
-
userstringA unique identifier representing your end-user, for abuse monitoring.
-
web_search_optionsobjectOptions for the web search tool (when using built-in web search).
-
search_context_sizestring default medium -
user_locationobject-
typestring required -
approximateobject required-
citystring -
countrystring -
regionstring -
timezonestring
-
-
-
-
function_call-
0string -
1object-
namestring required
-
-
-
functionsarray-
itemsobject-
namestring requiredThe name of the function to be called.
-
descriptionstringA description of what the function does.
-
parametersobjectThe parameters the function accepts, described as a JSON Schema object.
-
strict-
0booleanWhether to enable strict schema adherence.
-
1nullWhether to enable strict schema adherence.
-
-
-
-
-
1object-
messagesarray requiredA list of messages comprising the conversation so far.
-
itemsone of-
0object-
rolestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring required
-
-
-
-
namestring
-
-
1object-
rolestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring required
-
-
-
-
namestring
-
-
2object-
rolestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring -
image_urlobject-
urlstring -
detailstring default auto
-
-
input_audioobject-
datastring -
formatstring
-
-
fileobject-
file_datastring -
file_idstring -
filenamestring
-
-
-
-
-
namestring
-
-
3object-
rolestring required -
content-
0string -
1null -
2array-
itemsobject-
typestring required -
textstring -
refusalstring
-
-
-
-
refusal-
0string -
1null
-
-
namestring -
audioobject-
idstring required
-
-
tool_callsarray-
itemsone of-
0object-
idstring required -
typestring required -
functionobject required-
namestring required -
argumentsstring requiredJSON-encoded arguments string.
-
-
-
1object-
idstring required -
typestring required -
customobject required-
namestring required -
inputstring required
-
-
-
-
-
function_callobject-
namestring required -
argumentsstring required
-
-
-
4object-
rolestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring required
-
-
-
-
tool_call_idstring required
-
-
5object-
rolestring required -
contentstring required -
namestring required
-
-
-
-
modelstringID of the model to use (e.g. '@cf/zai-org/glm-4.7-flash, etc').
-
audioobjectParameters for audio output. Required when modalities includes 'audio'.
-
voiceone of required-
0string -
1object-
idstring required
-
-
-
formatstring required
-
-
frequency_penalty-
0number 0 min -2 max 2Penalizes new tokens based on their existing frequency in the text so far.
-
1null 0Penalizes new tokens based on their existing frequency in the text so far.
-
-
logit_bias-
0objectModify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100.
-
1nullModify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100.
-
-
logprobs-
0booleanWhether to return log probabilities of the output tokens.
-
1nullWhether to return log probabilities of the output tokens.
-
-
top_logprobs-
0integer min 0 max 20How many top log probabilities to return at each token position (0-20). Requires logprobs=true.
-
1nullHow many top log probabilities to return at each token position (0-20). Requires logprobs=true.
-
-
max_tokens-
0integerDeprecated in favor of max_completion_tokens. The maximum number of tokens to generate.
-
1nullDeprecated in favor of max_completion_tokens. The maximum number of tokens to generate.
-
-
max_completion_tokens-
0integerAn upper bound for the number of tokens that can be generated for a completion.
-
1nullAn upper bound for the number of tokens that can be generated for a completion.
-
-
metadata-
0objectSet of 16 key-value pairs that can be attached to the object.
-
1nullSet of 16 key-value pairs that can be attached to the object.
-
-
modalities-
0arrayOutput types requested from the model (e.g. ['text'] or ['text', 'audio']).
-
itemsstring
-
-
1nullOutput types requested from the model (e.g. ['text'] or ['text', 'audio']).
-
-
n-
0integer default 1 min 1 max 128How many chat completion choices to generate for each input message.
-
1null default 1How many chat completion choices to generate for each input message.
-
-
parallel_tool_callsboolean default trueWhether to enable parallel function calling during tool use.
-
predictionobject-
typestring required -
contentrequired-
0string -
1array-
itemsobject-
typestring required -
textstring required
-
-
-
-
-
presence_penalty-
0number 0 min -2 max 2Penalizes new tokens based on whether they appear in the text so far.
-
1null 0Penalizes new tokens based on whether they appear in the text so far.
-
-
reasoning_effort-
0stringConstrains effort on reasoning for reasoning models (o1, o3-mini, etc.).
-
1nullConstrains effort on reasoning for reasoning models (o1, o3-mini, etc.).
-
-
response_formatone ofSpecifies the format the model must output.
-
0object-
typestring required
-
-
1object-
typestring required
-
-
2object-
typestring required -
json_schemaobject required-
namestring required -
descriptionstring -
schemaobject -
strict-
0boolean -
1null
-
-
-
-
-
seed-
0integerIf specified, the system will make a best effort to sample deterministically.
-
1nullIf specified, the system will make a best effort to sample deterministically.
-
-
service_tier-
0string default autoSpecifies the processing type used for serving the request.
-
1null default autoSpecifies the processing type used for serving the request.
-
-
stop-
0nullUp to 4 sequences where the API will stop generating further tokens.
-
1stringUp to 4 sequences where the API will stop generating further tokens.
-
2arrayUp to 4 sequences where the API will stop generating further tokens.
-
itemsstring
-
-
-
store-
0booleanWhether to store the output for model distillation / evals.
-
1nullWhether to store the output for model distillation / evals.
-
-
stream-
0booleanIf true, partial message deltas will be sent as server-sent events.
-
1nullIf true, partial message deltas will be sent as server-sent events.
-
-
stream_optionsobject-
include_usageboolean -
include_obfuscationboolean
-
-
temperature-
0number default 1 min 0 max 2Sampling temperature between 0 and 2.
-
1null default 1Sampling temperature between 0 and 2.
-
-
tool_choiceone ofControls which (if any) tool is called by the model. 'none' = no tools, 'auto' = model decides, 'required' = must call a tool.
-
0string -
1objectForce a specific function tool.
-
typestring required -
functionobject required-
namestring required
-
-
-
2objectForce a specific custom tool.
-
typestring required -
customobject required-
namestring required
-
-
-
3objectConstrain to an allowed subset of tools.
-
typestring required -
allowed_toolsobject required-
modestring required -
toolsarray required-
itemsobject
-
-
-
-
-
toolsarrayA list of tools the model may call.
-
itemsone of-
0object-
typestring required -
functionobject required-
namestring requiredThe name of the function to be called.
-
descriptionstringA description of what the function does.
-
parametersobjectThe parameters the function accepts, described as a JSON Schema object.
-
strict-
0booleanWhether to enable strict schema adherence.
-
1nullWhether to enable strict schema adherence.
-
-
-
-
1object-
typestring required -
customobject required-
namestring required -
descriptionstring -
formatone of-
0object-
typestring required
-
-
1object-
typestring required -
grammarobject required-
definitionstring required -
syntaxstring required
-
-
-
-
-
-
-
-
top_p-
0number default 1 min 0 max 1Nucleus sampling: considers the results of the tokens with top_p probability mass.
-
1null default 1Nucleus sampling: considers the results of the tokens with top_p probability mass.
-
-
userstringA unique identifier representing your end-user, for abuse monitoring.
-
web_search_optionsobjectOptions for the web search tool (when using built-in web search).
-
search_context_sizestring default medium -
user_locationobject-
typestring required -
approximateobject required-
citystring -
countrystring -
regionstring -
timezonestring
-
-
-
-
function_call-
0string -
1object-
namestring required
-
-
-
functionsarray-
itemsobject-
namestring requiredThe name of the function to be called.
-
descriptionstringA description of what the function does.
-
parametersobjectThe parameters the function accepts, described as a JSON Schema object.
-
strict-
0booleanWhether to enable strict schema adherence.
-
1nullWhether to enable strict schema adherence.
-
-
-
-
Output
-
0object-
idstring requiredA unique identifier for the chat completion.
-
objectstring required -
createdinteger requiredUnix timestamp (seconds) of when the completion was created.
-
modelstring requiredThe model used for the chat completion.
-
choicesarray required-
itemsobject-
indexinteger required -
messageobject required-
rolestring required -
contentrequired-
0string -
1null
-
-
refusalrequired-
0string -
1null
-
-
annotationsarray-
itemsobject-
typestring required -
url_citationobject required-
urlstring required -
titlestring required -
start_indexinteger required -
end_indexinteger required
-
-
-
-
audioobject-
idstring required -
datastring requiredBase64 encoded audio bytes.
-
expires_atinteger required -
transcriptstring required
-
-
tool_callsarray-
itemsone of-
0object-
idstring required -
typestring required -
functionobject required-
namestring required -
argumentsstring requiredJSON-encoded arguments string.
-
-
-
1object-
idstring required -
typestring required -
customobject required-
namestring required -
inputstring required
-
-
-
-
-
function_call-
0object-
namestring required -
argumentsstring required
-
-
1null
-
-
-
finish_reasonstring required -
logprobsrequired-
0object-
content-
0array-
itemsobject-
tokenstring required -
logprobnumber required -
bytesrequired-
0array-
itemsinteger
-
-
1null
-
-
top_logprobsarray required-
itemsobject-
tokenstring required -
logprobnumber required -
bytesrequired-
0array-
itemsinteger
-
-
1null
-
-
-
-
-
-
1null
-
-
refusal-
0array-
itemsobject-
tokenstring required -
logprobnumber required -
bytesrequired-
0array-
itemsinteger
-
-
1null
-
-
top_logprobsarray required-
itemsobject-
tokenstring required -
logprobnumber required -
bytesrequired-
0array-
itemsinteger
-
-
1null
-
-
-
-
-
-
1null
-
-
-
1null
-
-
-
-
usageobject-
prompt_tokensinteger required -
completion_tokensinteger required -
total_tokensinteger required -
prompt_tokens_detailsobject-
cached_tokensinteger -
audio_tokensinteger
-
-
completion_tokens_detailsobject-
reasoning_tokensinteger -
audio_tokensinteger -
accepted_prediction_tokensinteger -
rejected_prediction_tokensinteger
-
-
-
system_fingerprint-
0string -
1null
-
-
service_tier-
0string -
1null
-
-
-
1string
API Schemas
The following schemas are based on JSON Schema
{ "type": "object", "oneOf": [ { "title": "Prompt", "properties": { "prompt": { "type": "string", "minLength": 1, "description": "The input text prompt for the model to generate a response." }, "model": { "type": "string", "description": "ID of the model to use (e.g. '@cf/zai-org/glm-4.7-flash, etc')." }, "audio": { "anyOf": [ { "type": "object", "description": "Parameters for audio output. Required when modalities includes 'audio'.", "properties": { "voice": { "oneOf": [ { "type": "string" }, { "type": "object", "properties": { "id": { "type": "string" } }, "required": [ "id" ] } ] }, "format": { "type": "string", "enum": [ "wav", "aac", "mp3", "flac", "opus", "pcm16" ] } }, "required": [ "voice", "format" ] } ] }, "frequency_penalty": { "anyOf": [ { "type": "number", "minimum": -2, "maximum": 2 }, { "type": "null" } ], "default": 0, "description": "Penalizes new tokens based on their existing frequency in the text so far." }, "logit_bias": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "description": "Modify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100." }, "logprobs": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to return log probabilities of the output tokens." }, "top_logprobs": { "anyOf": [ { "type": "integer", "minimum": 0, "maximum": 20 }, { "type": "null" } ], "description": "How many top log probabilities to return at each token position (0-20). Requires logprobs=true." }, "max_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "Deprecated in favor of max_completion_tokens. The maximum number of tokens to generate." }, "max_completion_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "An upper bound for the number of tokens that can be generated for a completion." }, "metadata": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "description": "Set of 16 key-value pairs that can be attached to the object." }, "modalities": { "anyOf": [ { "type": "array", "items": { "type": "string", "enum": [ "text", "audio" ] } }, { "type": "null" } ], "description": "Output types requested from the model (e.g. ['text'] or ['text', 'audio'])." }, "n": { "anyOf": [ { "type": "integer", "minimum": 1, "maximum": 128 }, { "type": "null" } ], "default": 1, "description": "How many chat completion choices to generate for each input message." }, "parallel_tool_calls": { "type": "boolean", "default": true, "description": "Whether to enable parallel function calling during tool use." }, "prediction": { "anyOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "content" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } }, "required": [ "type", "text" ] } } ] } }, "required": [ "type", "content" ] } ] }, "presence_penalty": { "anyOf": [ { "type": "number", "minimum": -2, "maximum": 2 }, { "type": "null" } ], "default": 0, "description": "Penalizes new tokens based on whether they appear in the text so far." }, "reasoning_effort": { "anyOf": [ { "type": "string", "enum": [ "low", "medium", "high" ] }, { "type": "null" } ], "description": "Constrains effort on reasoning for reasoning models (o1, o3-mini, etc.)." }, "response_format": { "anyOf": [ { "description": "Specifies the format the model must output.", "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "json_object" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "json_schema" ] }, "json_schema": { "type": "object", "properties": { "name": { "type": "string" }, "description": { "type": "string" }, "schema": { "type": "object" }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ] } }, "required": [ "name" ] } }, "required": [ "type", "json_schema" ] } ] } ] }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "If specified, the system will make a best effort to sample deterministically." }, "service_tier": { "anyOf": [ { "type": "string", "enum": [ "auto", "default", "flex", "scale", "priority" ] }, { "type": "null" } ], "default": "auto", "description": "Specifies the processing type used for serving the request." }, "stop": { "description": "Up to 4 sequences where the API will stop generating further tokens.", "anyOf": [ { "type": "null" }, { "type": "string" }, { "type": "array", "items": { "type": "string" }, "minItems": 1, "maxItems": 4 } ] }, "store": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to store the output for model distillation / evals." }, "stream": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "If true, partial message deltas will be sent as server-sent events." }, "stream_options": { "anyOf": [ { "type": "object", "properties": { "include_usage": { "type": "boolean" }, "include_obfuscation": { "type": "boolean" } } } ] }, "temperature": { "anyOf": [ { "type": "number", "minimum": 0, "maximum": 2 }, { "type": "null" } ], "default": 1, "description": "Sampling temperature between 0 and 2." }, "tool_choice": { "anyOf": [ { "description": "Controls which (if any) tool is called by the model. 'none' = no tools, 'auto' = model decides, 'required' = must call a tool.", "oneOf": [ { "type": "string", "enum": [ "none", "auto", "required" ] }, { "type": "object", "description": "Force a specific function tool.", "properties": { "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } }, "required": [ "type", "function" ] }, { "type": "object", "description": "Force a specific custom tool.", "properties": { "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } }, "required": [ "type", "custom" ] }, { "type": "object", "description": "Constrain to an allowed subset of tools.", "properties": { "type": { "type": "string", "enum": [ "allowed_tools" ] }, "allowed_tools": { "type": "object", "properties": { "mode": { "type": "string", "enum": [ "auto", "required" ] }, "tools": { "type": "array", "items": { "type": "object" } } }, "required": [ "mode", "tools" ] } }, "required": [ "type", "allowed_tools" ] } ] } ] }, "tools": { "type": "array", "description": "A list of tools the model may call.", "items": { "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of the function to be called." }, "description": { "type": "string", "description": "A description of what the function does." }, "parameters": { "type": "object", "description": "The parameters the function accepts, described as a JSON Schema object." }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to enable strict schema adherence." } }, "required": [ "name" ] } }, "required": [ "type", "function" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" }, "description": { "type": "string" }, "format": { "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "grammar" ] }, "grammar": { "type": "object", "properties": { "definition": { "type": "string" }, "syntax": { "type": "string", "enum": [ "lark", "regex" ] } }, "required": [ "definition", "syntax" ] } }, "required": [ "type", "grammar" ] } ] } }, "required": [ "name" ] } }, "required": [ "type", "custom" ] } ] } }, "top_p": { "anyOf": [ { "type": "number", "minimum": 0, "maximum": 1 }, { "type": "null" } ], "default": 1, "description": "Nucleus sampling: considers the results of the tokens with top_p probability mass." }, "user": { "type": "string", "description": "A unique identifier representing your end-user, for abuse monitoring." }, "web_search_options": { "anyOf": [ { "type": "object", "description": "Options for the web search tool (when using built-in web search).", "properties": { "search_context_size": { "type": "string", "enum": [ "low", "medium", "high" ], "default": "medium" }, "user_location": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "approximate" ] }, "approximate": { "type": "object", "properties": { "city": { "type": "string" }, "country": { "type": "string" }, "region": { "type": "string" }, "timezone": { "type": "string" } } } }, "required": [ "type", "approximate" ] } } } ] }, "function_call": { "anyOf": [ { "type": "string", "enum": [ "none", "auto" ] }, { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } ] }, "functions": { "type": "array", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of the function to be called." }, "description": { "type": "string", "description": "A description of what the function does." }, "parameters": { "type": "object", "description": "The parameters the function accepts, described as a JSON Schema object." }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to enable strict schema adherence." } }, "required": [ "name" ] }, "minItems": 1, "maxItems": 128 } }, "required": [ "prompt" ] }, { "title": "Messages", "properties": { "messages": { "type": "array", "description": "A list of messages comprising the conversation so far.", "items": { "oneOf": [ { "type": "object", "properties": { "role": { "type": "string", "enum": [ "developer" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } }, "required": [ "type", "text" ] } } ] }, "name": { "type": "string" } }, "required": [ "role", "content" ] }, { "type": "object", "properties": { "role": { "type": "string", "enum": [ "system" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } }, "required": [ "type", "text" ] } } ] }, "name": { "type": "string" } }, "required": [ "role", "content" ] }, { "type": "object", "properties": { "role": { "type": "string", "enum": [ "user" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text", "image_url", "input_audio", "file" ] }, "text": { "type": "string" }, "image_url": { "type": "object", "properties": { "url": { "type": "string" }, "detail": { "type": "string", "enum": [ "auto", "low", "high" ], "default": "auto" } } }, "input_audio": { "type": "object", "properties": { "data": { "type": "string" }, "format": { "type": "string", "enum": [ "wav", "mp3" ] } } }, "file": { "type": "object", "properties": { "file_data": { "type": "string" }, "file_id": { "type": "string" }, "filename": { "type": "string" } } } }, "required": [ "type" ] }, "minItems": 1 } ] }, "name": { "type": "string" } }, "required": [ "role", "content" ] }, { "type": "object", "properties": { "role": { "type": "string", "enum": [ "assistant" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text", "refusal" ] }, "text": { "type": "string" }, "refusal": { "type": "string" } }, "required": [ "type" ] } } ] }, "refusal": { "anyOf": [ { "type": "string" }, { "type": "null" } ] }, "name": { "type": "string" }, "audio": { "anyOf": [ { "type": "object", "properties": { "id": { "type": "string" } }, "required": [ "id" ] } ] }, "tool_calls": { "type": "array", "items": { "oneOf": [ { "type": "object", "properties": { "id": { "type": "string" }, "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string" }, "arguments": { "type": "string", "description": "JSON-encoded arguments string." } }, "required": [ "name", "arguments" ] } }, "required": [ "id", "type", "function" ] }, { "type": "object", "properties": { "id": { "type": "string" }, "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" }, "input": { "type": "string" } }, "required": [ "name", "input" ] } }, "required": [ "id", "type", "custom" ] } ] } }, "function_call": { "anyOf": [ { "type": "object", "properties": { "name": { "type": "string" }, "arguments": { "type": "string" } }, "required": [ "name", "arguments" ] } ] } }, "required": [ "role" ] }, { "type": "object", "properties": { "role": { "type": "string", "enum": [ "tool" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } }, "required": [ "type", "text" ] } } ] }, "tool_call_id": { "type": "string" } }, "required": [ "role", "content", "tool_call_id" ] }, { "type": "object", "properties": { "role": { "type": "string", "enum": [ "function" ] }, "content": { "type": "string" }, "name": { "type": "string" } }, "required": [ "role", "content", "name" ] } ] }, "minItems": 1 }, "model": { "type": "string", "description": "ID of the model to use (e.g. '@cf/zai-org/glm-4.7-flash, etc')." }, "audio": { "anyOf": [ { "type": "object", "description": "Parameters for audio output. Required when modalities includes 'audio'.", "properties": { "voice": { "oneOf": [ { "type": "string" }, { "type": "object", "properties": { "id": { "type": "string" } }, "required": [ "id" ] } ] }, "format": { "type": "string", "enum": [ "wav", "aac", "mp3", "flac", "opus", "pcm16" ] } }, "required": [ "voice", "format" ] } ] }, "frequency_penalty": { "anyOf": [ { "type": "number", "minimum": -2, "maximum": 2 }, { "type": "null" } ], "default": 0, "description": "Penalizes new tokens based on their existing frequency in the text so far." }, "logit_bias": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "description": "Modify the likelihood of specified tokens appearing in the completion. Maps token IDs to bias values from -100 to 100." }, "logprobs": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to return log probabilities of the output tokens." }, "top_logprobs": { "anyOf": [ { "type": "integer", "minimum": 0, "maximum": 20 }, { "type": "null" } ], "description": "How many top log probabilities to return at each token position (0-20). Requires logprobs=true." }, "max_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "Deprecated in favor of max_completion_tokens. The maximum number of tokens to generate." }, "max_completion_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "An upper bound for the number of tokens that can be generated for a completion." }, "metadata": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "description": "Set of 16 key-value pairs that can be attached to the object." }, "modalities": { "anyOf": [ { "type": "array", "items": { "type": "string", "enum": [ "text", "audio" ] } }, { "type": "null" } ], "description": "Output types requested from the model (e.g. ['text'] or ['text', 'audio'])." }, "n": { "anyOf": [ { "type": "integer", "minimum": 1, "maximum": 128 }, { "type": "null" } ], "default": 1, "description": "How many chat completion choices to generate for each input message." }, "parallel_tool_calls": { "type": "boolean", "default": true, "description": "Whether to enable parallel function calling during tool use." }, "prediction": { "anyOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "content" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } }, "required": [ "type", "text" ] } } ] } }, "required": [ "type", "content" ] } ] }, "presence_penalty": { "anyOf": [ { "type": "number", "minimum": -2, "maximum": 2 }, { "type": "null" } ], "default": 0, "description": "Penalizes new tokens based on whether they appear in the text so far." }, "reasoning_effort": { "anyOf": [ { "type": "string", "enum": [ "low", "medium", "high" ] }, { "type": "null" } ], "description": "Constrains effort on reasoning for reasoning models (o1, o3-mini, etc.)." }, "response_format": { "anyOf": [ { "description": "Specifies the format the model must output.", "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "json_object" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "json_schema" ] }, "json_schema": { "type": "object", "properties": { "name": { "type": "string" }, "description": { "type": "string" }, "schema": { "type": "object" }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ] } }, "required": [ "name" ] } }, "required": [ "type", "json_schema" ] } ] } ] }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "description": "If specified, the system will make a best effort to sample deterministically." }, "service_tier": { "anyOf": [ { "type": "string", "enum": [ "auto", "default", "flex", "scale", "priority" ] }, { "type": "null" } ], "default": "auto", "description": "Specifies the processing type used for serving the request." }, "stop": { "description": "Up to 4 sequences where the API will stop generating further tokens.", "anyOf": [ { "type": "null" }, { "type": "string" }, { "type": "array", "items": { "type": "string" }, "minItems": 1, "maxItems": 4 } ] }, "store": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to store the output for model distillation / evals." }, "stream": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "If true, partial message deltas will be sent as server-sent events." }, "stream_options": { "anyOf": [ { "type": "object", "properties": { "include_usage": { "type": "boolean" }, "include_obfuscation": { "type": "boolean" } } } ] }, "temperature": { "anyOf": [ { "type": "number", "minimum": 0, "maximum": 2 }, { "type": "null" } ], "default": 1, "description": "Sampling temperature between 0 and 2." }, "tool_choice": { "anyOf": [ { "description": "Controls which (if any) tool is called by the model. 'none' = no tools, 'auto' = model decides, 'required' = must call a tool.", "oneOf": [ { "type": "string", "enum": [ "none", "auto", "required" ] }, { "type": "object", "description": "Force a specific function tool.", "properties": { "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } }, "required": [ "type", "function" ] }, { "type": "object", "description": "Force a specific custom tool.", "properties": { "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } }, "required": [ "type", "custom" ] }, { "type": "object", "description": "Constrain to an allowed subset of tools.", "properties": { "type": { "type": "string", "enum": [ "allowed_tools" ] }, "allowed_tools": { "type": "object", "properties": { "mode": { "type": "string", "enum": [ "auto", "required" ] }, "tools": { "type": "array", "items": { "type": "object" } } }, "required": [ "mode", "tools" ] } }, "required": [ "type", "allowed_tools" ] } ] } ] }, "tools": { "type": "array", "description": "A list of tools the model may call.", "items": { "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of the function to be called." }, "description": { "type": "string", "description": "A description of what the function does." }, "parameters": { "type": "object", "description": "The parameters the function accepts, described as a JSON Schema object." }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to enable strict schema adherence." } }, "required": [ "name" ] } }, "required": [ "type", "function" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" }, "description": { "type": "string" }, "format": { "oneOf": [ { "type": "object", "properties": { "type": { "type": "string", "enum": [ "text" ] } }, "required": [ "type" ] }, { "type": "object", "properties": { "type": { "type": "string", "enum": [ "grammar" ] }, "grammar": { "type": "object", "properties": { "definition": { "type": "string" }, "syntax": { "type": "string", "enum": [ "lark", "regex" ] } }, "required": [ "definition", "syntax" ] } }, "required": [ "type", "grammar" ] } ] } }, "required": [ "name" ] } }, "required": [ "type", "custom" ] } ] } }, "top_p": { "anyOf": [ { "type": "number", "minimum": 0, "maximum": 1 }, { "type": "null" } ], "default": 1, "description": "Nucleus sampling: considers the results of the tokens with top_p probability mass." }, "user": { "type": "string", "description": "A unique identifier representing your end-user, for abuse monitoring." }, "web_search_options": { "anyOf": [ { "type": "object", "description": "Options for the web search tool (when using built-in web search).", "properties": { "search_context_size": { "type": "string", "enum": [ "low", "medium", "high" ], "default": "medium" }, "user_location": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "approximate" ] }, "approximate": { "type": "object", "properties": { "city": { "type": "string" }, "country": { "type": "string" }, "region": { "type": "string" }, "timezone": { "type": "string" } } } }, "required": [ "type", "approximate" ] } } } ] }, "function_call": { "anyOf": [ { "type": "string", "enum": [ "none", "auto" ] }, { "type": "object", "properties": { "name": { "type": "string" } }, "required": [ "name" ] } ] }, "functions": { "type": "array", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of the function to be called." }, "description": { "type": "string", "description": "A description of what the function does." }, "parameters": { "type": "object", "description": "The parameters the function accepts, described as a JSON Schema object." }, "strict": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to enable strict schema adherence." } }, "required": [ "name" ] }, "minItems": 1, "maxItems": 128 } }, "required": [ "messages" ] } ]}{ "oneOf": [ { "type": "object", "contentType": "application/json", "properties": { "id": { "type": "string", "description": "A unique identifier for the chat completion." }, "object": { "type": "string" }, "created": { "type": "integer", "description": "Unix timestamp (seconds) of when the completion was created." }, "model": { "type": "string", "description": "The model used for the chat completion." }, "choices": { "type": "array", "items": { "anyOf": [ { "type": "object", "properties": { "index": { "type": "integer" }, "message": { "anyOf": [ { "type": "object", "properties": { "role": { "type": "string", "enum": [ "assistant" ] }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ] }, "refusal": { "anyOf": [ { "type": "string" }, { "type": "null" } ] }, "annotations": { "type": "array", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "url_citation" ] }, "url_citation": { "type": "object", "properties": { "url": { "type": "string" }, "title": { "type": "string" }, "start_index": { "type": "integer" }, "end_index": { "type": "integer" } }, "required": [ "url", "title", "start_index", "end_index" ] } }, "required": [ "type", "url_citation" ] } }, "audio": { "anyOf": [ { "type": "object", "properties": { "id": { "type": "string" }, "data": { "type": "string", "description": "Base64 encoded audio bytes." }, "expires_at": { "type": "integer" }, "transcript": { "type": "string" } }, "required": [ "id", "data", "expires_at", "transcript" ] } ] }, "tool_calls": { "type": "array", "items": { "oneOf": [ { "type": "object", "properties": { "id": { "type": "string" }, "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "properties": { "name": { "type": "string" }, "arguments": { "type": "string", "description": "JSON-encoded arguments string." } }, "required": [ "name", "arguments" ] } }, "required": [ "id", "type", "function" ] }, { "type": "object", "properties": { "id": { "type": "string" }, "type": { "type": "string", "enum": [ "custom" ] }, "custom": { "type": "object", "properties": { "name": { "type": "string" }, "input": { "type": "string" } }, "required": [ "name", "input" ] } }, "required": [ "id", "type", "custom" ] } ] } }, "function_call": { "anyOf": [ { "type": "object", "properties": { "name": { "type": "string" }, "arguments": { "type": "string" } }, "required": [ "name", "arguments" ] }, { "type": "null" } ] } }, "required": [ "role", "content", "refusal" ] } ] }, "finish_reason": { "type": "string", "enum": [ "stop", "length", "tool_calls", "content_filter", "function_call" ] }, "logprobs": { "anyOf": [ { "type": "object", "properties": { "content": { "anyOf": [ { "type": "array", "items": { "type": "object", "properties": { "token": { "type": "string" }, "logprob": { "type": "number" }, "bytes": { "anyOf": [ { "type": "array", "items": { "type": "integer" } }, { "type": "null" } ] }, "top_logprobs": { "type": "array", "items": { "type": "object", "properties": { "token": { "type": "string" }, "logprob": { "type": "number" }, "bytes": { "anyOf": [ { "type": "array", "items": { "type": "integer" } }, { "type": "null" } ] } }, "required": [ "token", "logprob", "bytes" ] } } }, "required": [ "token", "logprob", "bytes", "top_logprobs" ] } }, { "type": "null" } ] }, "refusal": { "anyOf": [ { "type": "array", "items": { "type": "object", "properties": { "token": { "type": "string" }, "logprob": { "type": "number" }, "bytes": { "anyOf": [ { "type": "array", "items": { "type": "integer" } }, { "type": "null" } ] }, "top_logprobs": { "type": "array", "items": { "type": "object", "properties": { "token": { "type": "string" }, "logprob": { "type": "number" }, "bytes": { "anyOf": [ { "type": "array", "items": { "type": "integer" } }, { "type": "null" } ] } }, "required": [ "token", "logprob", "bytes" ] } } }, "required": [ "token", "logprob", "bytes", "top_logprobs" ] } }, { "type": "null" } ] } } }, { "type": "null" } ] } }, "required": [ "index", "message", "finish_reason", "logprobs" ] } ] }, "minItems": 1 }, "usage": { "anyOf": [ { "type": "object", "properties": { "prompt_tokens": { "type": "integer" }, "completion_tokens": { "type": "integer" }, "total_tokens": { "type": "integer" }, "prompt_tokens_details": { "type": "object", "properties": { "cached_tokens": { "type": "integer" }, "audio_tokens": { "type": "integer" } } }, "completion_tokens_details": { "type": "object", "properties": { "reasoning_tokens": { "type": "integer" }, "audio_tokens": { "type": "integer" }, "accepted_prediction_tokens": { "type": "integer" }, "rejected_prediction_tokens": { "type": "integer" } } } }, "required": [ "prompt_tokens", "completion_tokens", "total_tokens" ] } ] }, "system_fingerprint": { "anyOf": [ { "type": "string" }, { "type": "null" } ] }, "service_tier": { "anyOf": [ { "type": "string", "enum": [ "auto", "default", "flex", "scale", "priority" ] }, { "type": "null" } ] } }, "required": [ "id", "object", "created", "model", "choices" ] }, { "type": "string", "contentType": "text/event-stream", "format": "binary" } ]}