Lightning-Fast Speech Recognition with OpenAI Quality
Experience the powerful speech recognition capabilities of Whisper Turbo directly in your browser
8x faster than Large V3 with only 4 decoder layers, optimized for real-time transcription while maintaining exceptional accuracy.
Transcribe speech in over 99 languages with robust performance across diverse accents and dialects.
MIT licensed with support for popular frameworks like Hugging Face Transformers and MLX for Apple Silicon.
Achieves accuracy equivalent to Large V2 with only ~1% lower WER than Distil-Whisper.
Compatible with existing Whisper Large V3 inference code, requiring no modifications to your pipeline.
Approximately 50% smaller than Large V3 at just 1.6GB, perfect for edge deployment and faster loading.
import whisper
# Load the turbo model
model = whisper.load_model("turbo")
# Load and process audio
audio = whisper.load_audio("speech.mp3")
audio = whisper.pad_or_trim(audio)
# Generate mel spectrogram
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# Detect language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
# Transcribe
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
print(result.text)
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# Load model and processor
model_id = "openai/whisper-large-v3-turbo"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
# Process audio
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch_dtype,
device=device,
)
# Transcribe
result = pipe("audio.mp3")
print(result["text"])
from faster_whisper import WhisperModel, BatchedInferencePipeline
# Initialize model with turbo
model = WhisperModel("turbo", device="cuda", compute_type="float16")
# Enable batched inference for better performance
batched_model = BatchedInferencePipeline(model=model)
# Transcribe with batch processing
segments, info = batched_model.transcribe(
"audio.mp3",
batch_size=16,
language="en"
)
# Print results with timestamps
for segment in segments:
print(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")