Skip to content
AssemblyAI logo

Universal 3 Pro

Automatic Speech RecognitionAssemblyAIProxied

AssemblyAI's Universal 3 Pro speech recognition model for high-accuracy transcription.

Model Info
Terms and Licenselink
More informationlink

Usage

TypeScript
const response = await env.AI.run(
'assemblyai/universal-3-pro',
{
audio_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav',
},
{
gateway: { id: 'default' },
}
)
console.log(response)
Input / Output JSON
{
"audio_url": "https://cdn.openai.com/API/docs/audio/alloy.wav"
}

Examples

With Language Code — Transcribe with an explicit language code
TypeScript
const response = await env.AI.run(
'assemblyai/universal-3-pro',
{
audio_url: 'https://cdn.openai.com/API/docs/audio/echo.wav',
language_code: 'en',
},
{
gateway: { id: 'default' },
}
)
console.log(response)
Input / Output JSON
{
"audio_url": "https://cdn.openai.com/API/docs/audio/echo.wav",
"language_code": "en"
}
With Key Terms — Improve accuracy for domain-specific vocabulary
TypeScript
const response = await env.AI.run(
'assemblyai/universal-3-pro',
{
audio_url: 'https://cdn.openai.com/API/docs/audio/nova.wav',
keyterms_prompt: [
'Kubernetes',
'microservices',
'containerization',
'load balancer',
],
},
{
gateway: { id: 'default' },
}
)
console.log(response)
Input / Output JSON
{
"audio_url": "https://cdn.openai.com/API/docs/audio/nova.wav",
"keyterms_prompt": [
"Kubernetes",
"microservices",
"containerization",
"load balancer"
]
}
Speaker Diarization — Identify different speakers in the audio
TypeScript
const response = await env.AI.run(
'assemblyai/universal-3-pro',
{
audio_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav',
speaker_labels: true,
},
{
gateway: { id: 'default' },
}
)
console.log(response)
Input / Output JSON
{
"audio_url": "https://cdn.openai.com/API/docs/audio/onyx.wav",
"speaker_labels": true
}

Parameters

audio_url
stringrequiredThe URL of the audio file to transcribe. Can be a publicly accessible URL or a data URI (data:audio/...;base64,...). For data URIs, the audio will be uploaded to AssemblyAI automatically.
language_code
stringThe language code for the audio file (e.g., "en", "es", "fr"). Defaults to automatic language detection.
language_detection
booleanEnable automatic language detection. When enabled with speech_models, the system will automatically select the best model for the detected language.
prompt
stringA custom prompt to guide transcription style, formatting, and output characteristics. Maximum 1,500 words.
temperature
numberminimum: 0maximum: 1Controls randomness in model output (0.0-1.0). Lower values make output more deterministic. Default is 0.0.
speaker_labels
booleanEnable speaker diarization to identify different speakers in the audio.
speakers_expected
integerminimum: 1maximum: 50Expected number of speakers for speaker diarization.
auto_chapters
booleanEnable automatic chapter detection.
entity_detection
booleanEnable detection of entities like names, organizations, and locations.
sentiment_analysis
booleanEnable sentiment analysis for each sentence.
auto_highlights
booleanEnable automatic extraction of key phrases and highlights.
content_safety
booleanEnable content safety detection for sensitive content.
iab_categories
booleanEnable IAB (Interactive Advertising Bureau) content taxonomy classification.
disfluencies
booleanInclude filler words like "um", "uh", etc. in the transcript.
multichannel
booleanProcess each audio channel separately for multi-channel audio files.
dual_channel
booleanProcess audio as dual-channel (stereo) for better accuracy.
webhook_url
stringformat: uriURL to receive webhook notifications when transcription is complete.
audio_start_from
integerminimum: 0Timestamp (in milliseconds) to start transcription from.
audio_end_at
integerminimum: 0Timestamp (in milliseconds) to end transcription at.
boost_param
stringenum: low, default, highHow much to boost the words in word_boost.
filter_profanity
booleanFilter profanity from the transcription.
redact_pii
booleanRedact personally identifiable information.
redact_pii_audio
booleanGenerate a redacted audio file with PII removed.
redact_pii_sub
stringenum: entity_name, hashStrategy for substituting redacted PII.
speech_threshold
numberminimum: 0maximum: 1Confidence threshold for speech detection.

API Schemas

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"audio_url": {
"description": "The URL of the audio file to transcribe. Can be a publicly accessible URL or a data URI (data:audio/...;base64,...). For data URIs, the audio will be uploaded to AssemblyAI automatically.",
"type": "string"
},
"language_code": {
"description": "The language code for the audio file (e.g., \"en\", \"es\", \"fr\"). Defaults to automatic language detection.",
"type": "string"
},
"language_detection": {
"description": "Enable automatic language detection. When enabled with speech_models, the system will automatically select the best model for the detected language.",
"type": "boolean"
},
"prompt": {
"description": "A custom prompt to guide transcription style, formatting, and output characteristics. Maximum 1,500 words.",
"type": "string"
},
"keyterms_prompt": {
"description": "An array of up to 1,000 words or phrases (max 6 words per phrase) to improve transcription accuracy. Cannot be used with the prompt parameter.",
"type": "array",
"items": {
"type": "string"
}
},
"temperature": {
"description": "Controls randomness in model output (0.0-1.0). Lower values make output more deterministic. Default is 0.0.",
"type": "number",
"minimum": 0,
"maximum": 1
},
"speaker_labels": {
"description": "Enable speaker diarization to identify different speakers in the audio.",
"type": "boolean"
},
"speakers_expected": {
"description": "Expected number of speakers for speaker diarization.",
"type": "integer",
"minimum": 1,
"maximum": 50
},
"auto_chapters": {
"description": "Enable automatic chapter detection.",
"type": "boolean"
},
"entity_detection": {
"description": "Enable detection of entities like names, organizations, and locations.",
"type": "boolean"
},
"sentiment_analysis": {
"description": "Enable sentiment analysis for each sentence.",
"type": "boolean"
},
"auto_highlights": {
"description": "Enable automatic extraction of key phrases and highlights.",
"type": "boolean"
},
"content_safety": {
"description": "Enable content safety detection for sensitive content.",
"type": "boolean"
},
"iab_categories": {
"description": "Enable IAB (Interactive Advertising Bureau) content taxonomy classification.",
"type": "boolean"
},
"custom_spelling": {
"description": "Custom spelling rules to replace specific words or phrases in the transcription output.",
"type": "array",
"items": {
"type": "object",
"properties": {
"from": {
"type": "array",
"items": {
"type": "string"
}
},
"to": {
"type": "string"
}
},
"required": [
"from",
"to"
],
"additionalProperties": false
}
},
"disfluencies": {
"description": "Include filler words like \"um\", \"uh\", etc. in the transcript.",
"type": "boolean"
},
"multichannel": {
"description": "Process each audio channel separately for multi-channel audio files.",
"type": "boolean"
},
"dual_channel": {
"description": "Process audio as dual-channel (stereo) for better accuracy.",
"type": "boolean"
},
"webhook_url": {
"description": "URL to receive webhook notifications when transcription is complete.",
"type": "string",
"format": "uri"
},
"audio_start_from": {
"description": "Timestamp (in milliseconds) to start transcription from.",
"type": "integer",
"minimum": 0
},
"audio_end_at": {
"description": "Timestamp (in milliseconds) to end transcription at.",
"type": "integer",
"minimum": 0
},
"word_boost": {
"description": "Array of words to boost recognition accuracy (legacy - use keyterms_prompt instead).",
"type": "array",
"items": {
"type": "string"
}
},
"boost_param": {
"description": "How much to boost the words in word_boost.",
"type": "string",
"enum": [
"low",
"default",
"high"
]
},
"filter_profanity": {
"description": "Filter profanity from the transcription.",
"type": "boolean"
},
"redact_pii": {
"description": "Redact personally identifiable information.",
"type": "boolean"
},
"redact_pii_audio": {
"description": "Generate a redacted audio file with PII removed.",
"type": "boolean"
},
"redact_pii_policies": {
"description": "Specific PII policies to apply for redaction.",
"type": "array",
"items": {
"type": "string"
}
},
"redact_pii_sub": {
"description": "Strategy for substituting redacted PII.",
"type": "string",
"enum": [
"entity_name",
"hash"
]
},
"speech_threshold": {
"description": "Confidence threshold for speech detection.",
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": [
"audio_url"
],
"additionalProperties": false
}