import gradio as gr import tensorflow as tf import numpy as np import soundfile as sf import os MODEL_PATH = "model/clone_tts_model.h5" TEXT_MAX_LEN = 100 SAMPLE_RATE = 22050 # Load model once model = tf.keras.models.load_model(MODEL_PATH) def synthesize(text): x_input = np.array([[ord(c) for c in text.ljust(TEXT_MAX_LEN)[:TEXT_MAX_LEN]]]) audio = model.predict(x_input)[0] output_path = "output/generated.wav" os.makedirs("output", exist_ok=True) sf.write(output_path, audio, SAMPLE_RATE) return output_path demo = gr.Interface( fn=synthesize, inputs=gr.Textbox(label="Enter Text"), outputs=gr.Audio(label="Generated Speech", type="filepath"), title="Clone TTS", description="A simple Text-to-Speech model trained on the 'clone' dataset." ) if __name__ == "__main__": demo.launch()