import os
import gradio as gr
from transformers import pipeline

pipeline = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german")
#pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-large")

def transcribe(audio_path : str) -> str:
    transcription = pipeline(audio_path)
    return transcription["text"]


demo = gr.Interface(
    fn=transcribe,
    #inputs="microphone",
    inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
    outputs="text"
)


if __name__ == "__main__":
    demo.launch()