import gradio as gr
from transformers import pipeline

asr = pipeline("automatic-speech-recognition", model="B-K/ReVoiceAI-W2V-BERT-Thai-IPA")
g2p = pipeline("translation", model="B-K/umt5-thai-g2p-v2-0.5k")


def respond(
    audio,
    target
):
    target_phoneme = g2p(target)[0]["translation_text"].replace(" ", "")
    
    input_phoneme = asr(audio)[0]["text"]
    return [target_phoneme, input_phoneme]


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.Interface(
    respond,
    inputs=[
        gr.Audio(label="speak", waveform_options=gr.WaveformOptions(sample_rate=16000)),
        gr.Textbox(label="target")
    ],
    outputs="text"
)


if __name__ == "__main__":
    demo.launch()