Speech 2.8 HD

Text-to-Speech • MiniMax • Proxied

MiniMax Speech 2.8 HD focuses on studio-grade audio generation with emotion control, multilingual support (40+ languages), and voice cloning.

Model Info
Terms and License	link ↗
More information	link ↗

Usage

const response = await env.AI.run(
  'minimax/speech-2.8-hd',
  {
    text: 'Hello! Welcome to Cloudflare AI Gateway. Let me show you what we can do.',
  },
  {
    gateway: { id: 'default' },
  }
)
console.log(response)

Response 200

Examples

Custom Voice — Use a specific voice and adjust speed

const response = await env.AI.run(
  'minimax/speech-2.8-hd',
  {
    text: 'The weather today is sunny with a high of 72 degrees. Perfect for a walk in the park.',
    voice_id: 'English_expressive_narrator',
    speed: 0.9,
  },
  {
    gateway: { id: 'default' },
  }
)
console.log(response)

Response 200

With Emotion — Apply emotional tone to speech

const response = await env.AI.run(
  'minimax/speech-2.8-hd',
  {
    text: "Congratulations! You've just won the grand prize! This is absolutely incredible news!",
    voice_id: 'English_expressive_narrator',
    emotion: 'happy',
  },
  {
    gateway: { id: 'default' },
  }
)
console.log(response)

Response 200

High Sample Rate — Studio quality at 44.1kHz sample rate

const response = await env.AI.run(
  'minimax/speech-2.8-hd',
  {
    text: 'This recording is generated at studio quality sample rate for the highest possible audio fidelity.',
    sample_rate: 44100,
  },
  {
    gateway: { id: 'default' },
  }
)
console.log(response)

Response 200

text

stringrequiredmaxLength: 10000The text to convert to speech. Maximum 10,000 characters.

voice_id

stringrequireddefault: English_expressive_narratorThe voice ID to use for synthesis

speed

numberrequireddefault: 1minimum: 0.5maximum: 2Speech speed (0.5 to 2)

volume

numberrequireddefault: 1minimum: 0maximum: 10Speech volume (0 to 10)

pitch

integerrequireddefault: 0minimum: -12maximum: 12Pitch adjustment (-12 to 12)

emotion

stringenum: happy, sad, angry, fearful, disgusted, surprised, calm, fluentEmotion control for synthesized speech

format

stringrequireddefault: mp3enum: mp3, flac, wavOutput audio format

▶sample_rate

one of

audio

stringURL to the generated audio file

API Schemas

Input
Output

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "text": {
      "description": "The text to convert to speech. Maximum 10,000 characters.",
      "type": "string",
      "maxLength": 10000
    },
    "voice_id": {
      "description": "The voice ID to use for synthesis",
      "default": "English_expressive_narrator",
      "type": "string"
    },
    "speed": {
      "description": "Speech speed (0.5 to 2)",
      "default": 1,
      "type": "number",
      "minimum": 0.5,
      "maximum": 2
    },
    "volume": {
      "description": "Speech volume (0 to 10)",
      "default": 1,
      "type": "number",
      "minimum": 0,
      "maximum": 10
    },
    "pitch": {
      "description": "Pitch adjustment (-12 to 12)",
      "default": 0,
      "type": "integer",
      "minimum": -12,
      "maximum": 12
    },
    "emotion": {
      "description": "Emotion control for synthesized speech",
      "type": "string",
      "enum": [
        "happy",
        "sad",
        "angry",
        "fearful",
        "disgusted",
        "surprised",
        "calm",
        "fluent"
      ]
    },
    "format": {
      "description": "Output audio format",
      "default": "mp3",
      "type": "string",
      "enum": [
        "mp3",
        "flac",
        "wav"
      ]
    },
    "sample_rate": {
      "description": "Audio sample rate",
      "anyOf": [
        {
          "type": "number",
          "const": 8000
        },
        {
          "type": "number",
          "const": 16000
        },
        {
          "type": "number",
          "const": 22050
        },
        {
          "type": "number",
          "const": 24000
        },
        {
          "type": "number",
          "const": 32000
        },
        {
          "type": "number",
          "const": 44100
        }
      ]
    }
  },
  "required": [
    "text",
    "voice_id",
    "speed",
    "volume",
    "pitch",
    "format"
  ],
  "additionalProperties": false
}

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "audio": {
      "description": "URL to the generated audio file",
      "type": "string"
    }
  },
  "required": [
    "audio"
  ],
  "additionalProperties": false
}