import gradio as gr
import tensorflow as tf
import numpy as np
import soundfile as sf
import os

MODEL_PATH = "model/clone_tts_model.h5"
TEXT_MAX_LEN = 100
SAMPLE_RATE = 22050

# Load model once
model = tf.keras.models.load_model(MODEL_PATH)

def synthesize(text):
    x_input = np.array([[ord(c) for c in text.ljust(TEXT_MAX_LEN)[:TEXT_MAX_LEN]]])
    audio = model.predict(x_input)[0]
    output_path = "output/generated.wav"
    os.makedirs("output", exist_ok=True)
    sf.write(output_path, audio, SAMPLE_RATE)
    return output_path

demo = gr.Interface(
    fn=synthesize,
    inputs=gr.Textbox(label="Enter Text"),
    outputs=gr.Audio(label="Generated Speech", type="filepath"),
    title="Clone TTS",
    description="A simple Text-to-Speech model trained on the 'clone' dataset."
)

if __name__ == "__main__":
    demo.launch()