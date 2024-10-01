{ " type " : "object" , " contentType " : "application/json" , " properties " : { " transcription_info " : { " type " : "object" , " properties " : { " language " : { " type " : "string" , " description " : "The language of the audio being transcribed or translated." }, " language_probability " : { " type " : "number" , " description " : "The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1." }, " duration " : { " type " : "number" , " description " : "The total duration of the original audio file, in seconds." }, " duration_after_vad " : { " type " : "number" , " description " : "The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds." } } }, " text " : { " type " : "string" , " description " : "The complete transcription of the audio." }, " word_count " : { " type " : "number" , " description " : "The total number of words in the transcription." }, " segments " : { " type " : "object" , " properties " : { " start " : { " type " : "number" , " description " : "The starting time of the segment within the audio, in seconds." }, " end " : { " type " : "number" , " description " : "The ending time of the segment within the audio, in seconds." }, " text " : { " type " : "string" , " description " : "The transcription of the segment." }, " temperature " : { " type " : "number" , " description " : "The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs." }, " avg_logprob " : { " type " : "number" , " description " : "The average log probability of the predictions for the words in this segment, indicating overall confidence." }, " compression_ratio " : { " type " : "number" , " description " : "The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process." }, " no_speech_prob " : { " type " : "number" , " description " : "The probability that the segment contains no speech, represented as a decimal between 0 and 1." }, " words " : { " type " : "array" , " items " : { " type " : "object" , " properties " : { " word " : { " type " : "string" , " description " : "The individual word transcribed from the audio." }, " start " : { " type " : "number" , " description " : "The starting time of the word within the audio, in seconds." }, " end " : { " type " : "number" , " description " : "The ending time of the word within the audio, in seconds." } } } } } }, " vtt " : { " type " : "string" , " description " : "The transcription in WebVTT format, which includes timing and text information for use in subtitles." } }, " required " : [ "text" ] }