Spaces:

moustaphasidibe
/

Automatical_speech_prediction

Running

File size: 2,731 Bytes

b59bc30

# importer gradio
import gradio as gr
from transformers import pipeline
# Importer nemo.collections.asr
import nemo.collections.asr as nemo_asr
# Instancier le modèle
asr_canary = nemo_asr.models.ASRModel.from_pretrained("nvidia/canary-1b-flash")
# Instanstier le modèle
asr_whisper = pipeline("automatic-speech-recognition", model="openai/whisper-small")

# Fonction de transcription whisper
def transcrire1(fpath):
    output = asr_whisper(fpath)
    return output["text"]

# Fonction de transcription canary-1b-flash
def transcrire2(fpath, source_lang, target_lang):
    transcriptions = asr_canary.transcribe([fpath],
                                       source_lang = source_lang, target_lang =  target_lang)
    text = transcriptions[0].text

    return text

# Créer les blocs
demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
# Créer un interface ASR whisper avec un microphone
mic_transcrire = gr.Interface(
    fn=transcrire1,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    cache_examples=True,
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    title = 'Transcrire par microphone - Whisper')

# Créer un interface ASR whisper par audio
fich_transcrire = gr.Interface(
    fn=transcrire1,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    title = 'Transcrire un fichier audio - Whisper'
)


# Créer un interface ASR canary avec un microphone
mic_transcrire1 = gr.Interface(
    fn=transcrire2,
    inputs=[gr.Audio(sources="microphone",type="filepath"),
            gr.Dropdown(choices = ['fr', 'en'], label ='Source languge'),
            gr.Dropdown(choices = ['fr', 'en'], label = 'Target language')],
    cache_examples=True,
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    title = 'Transcrire par microphone - Canary')

# Créer un interface ASR canary par audio
fich_transcrire1 = gr.Interface(
    fn=transcrire2,
    inputs=[gr.Audio(sources="upload",type="filepath"),
            gr.Dropdown(choices = ['fr', 'en'], label ='Source languge'),
            gr.Dropdown(choices = ['fr', 'en'], label ='Target language')],
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    title= 'Transcrire un fichier audio - Canary'
)

# Faire un tabbed des interfaces sur demo
with demo:
    gr.TabbedInterface(
        [mic_transcrire,
         fich_transcrire,
         mic_transcrire1,
         fich_transcrire1],
        ["Transcrire Microphone",
         "Transcrire Audio",
         "Transcrire Microphone",
         "Transcrire Audio"],
    )

demo.launch()