Issamohammed commited on
Commit
884123a
·
verified ·
1 Parent(s): ebceedf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -7
app.py CHANGED
@@ -1,17 +1,35 @@
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Use whisper-base for better Swedish transcription on CPU
5
- asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=-1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def transcribe(audio):
8
- result = asr(audio)
9
  return result["text"]
10
 
11
  gr.Interface(
12
  fn=transcribe,
13
  inputs=gr.Audio(source="upload", type="filepath", label="Upload Swedish Audio"),
14
- outputs=gr.Textbox(label="Transcribed Text (Swedish)"),
15
- title="Swedish Whisper Transcriber",
16
- description="This app uses OpenAI Whisper (base) to transcribe spoken Swedish into text for free."
17
  ).launch()
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  import gradio as gr
 
4
 
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
+
8
+ model_id = "KBLab/kb-whisper-large"
9
+
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
11
+ model_id, torch_dtype=torch_dtype
12
+ ).to(device)
13
+
14
+ processor = AutoProcessor.from_pretrained(model_id)
15
+
16
+ pipe = pipeline(
17
+ "automatic-speech-recognition",
18
+ model=model,
19
+ tokenizer=processor.tokenizer,
20
+ feature_extractor=processor.feature_extractor,
21
+ device=device,
22
+ torch_dtype=torch_dtype,
23
+ )
24
 
25
  def transcribe(audio):
26
+ result = pipe(audio, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
27
  return result["text"]
28
 
29
  gr.Interface(
30
  fn=transcribe,
31
  inputs=gr.Audio(source="upload", type="filepath", label="Upload Swedish Audio"),
32
+ outputs=gr.Textbox(label="Transcribed Text"),
33
+ title="KB-Whisper Swedish Transcriber",
34
+ description="Transcribes Swedish audio using KBLab's Whisper Large model."
35
  ).launch()