Spaces:

Issamohammed
/

Transcriber

Running

Issamohammed commited on Apr 16

Commit

884123a

verified ·

1 Parent(s): ebceedf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,35 @@
 import gradio as gr
-from transformers import pipeline
-# Use whisper-base for better Swedish transcription on CPU
-asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=-1)
 def transcribe(audio):
-    result = asr(audio)
     return result["text"]
 gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(source="upload", type="filepath", label="Upload Swedish Audio"),
-    outputs=gr.Textbox(label="Transcribed Text (Swedish)"),
-    title="Swedish Whisper Transcriber",
-    description="This app uses OpenAI Whisper (base) to transcribe spoken Swedish into text for free."
 ).launch()

+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import gradio as gr
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "KBLab/kb-whisper-large"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch_dtype
+).to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    device=device,
+    torch_dtype=torch_dtype,
+)
 def transcribe(audio):
+    result = pipe(audio, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
     return result["text"]
 gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(source="upload", type="filepath", label="Upload Swedish Audio"),
+    outputs=gr.Textbox(label="Transcribed Text"),
+    title="KB-Whisper Swedish Transcriber",
+    description="Transcribes Swedish audio using KBLab's Whisper Large model."
 ).launch()