vsrinivas's picture
Rename Transcribe English Audio into any Language.py to app.py
6b97755 verified
raw
history blame
2.39 kB
from transformers import pipeline
from transformers.utils import logging
import torch
import pandas as pd
import time
import gradio as gr
logging.set_verbosity_error()
asr = pipeline(task="automatic-speech-recognition",
# model="./models/distil-whisper/distil-small.en")
model="distil-whisper/distil-small.en")
translator = pipeline(task="translation",
# model="./models/facebook/nllb-200-distilled-600M",
model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252')
flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code']))
flores_200_languages = list(flores_200.keys())
def transcribe_audio(filepath, tgt_language):
target_language = flores_200_df.loc[int(tgt_language),'Language']
print(f"Selected Target Language: {target_language}")
time.sleep(5)
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
english_transcript = asr(
filepath,
# max_new_tokens=256,
chunk_length_s=30,
batch_size=8,
)['text']
print(english_transcript)
output = translator(english_transcript, src_lang="eng_Latn",
tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text']
print(output)
return output
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe_audio,
inputs=[gr.Audio(sources="microphone",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never")
file_transcribe = gr.Interface(
fn=transcribe_audio,
inputs=[gr.Audio(sources="upload",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Speak Through Microphone",
"Upload Audio File"],
)
demo.launch(debug=True)