import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import gradio as gr device = "cpu" # Free CPU only torch_dtype = torch.float32 model_id = "KBLab/kb-whisper-large" model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch_dtype ).to(device) processor = AutoProcessor.from_pretrained(model_id) pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype, ) def transcribe(audio): result = pipe(audio, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"}) return result["text"] gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath", label="Upload Swedish Audio"), outputs=gr.Textbox(label="Transcribed Text"), title="KB-Whisper Transcriber (Swedish, Free CPU)", description="Transcribes Swedish audio using KBLab's Whisper Large model. Running on free CPU — may be slow." ).launch(share=True)