import spaces import tempfile import gradio as gr from f5_tts.api import F5TTS from huggingface_hub import hf_hub_download import os VARIANTS = ["model_1000000"] models = {} # Global inside GPU process @spaces.GPU def generate(text, ref_audio, variant, progress=gr.Progress()): if variant not in models: print(f"⏬ Loading model for variant: {variant}") ckpt_file = hf_hub_download("mrfakename/openf5-v2", f"{variant}.pt", token=os.getenv("HF_TOKEN")) vocab_file = hf_hub_download("mrfakename/openf5-v2", "vocab.txt", token=os.getenv("HF_TOKEN")) models[variant] = F5TTS(ckpt_file=ckpt_file, vocab_file=vocab_file) api = models[variant] with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: api.infer( ref_file=ref_audio, ref_text="", gen_text=text, progress=progress, file_wave=f.name ) print(f.name) return f.name with gr.Blocks() as demo: gr.Markdown("# Demo for OpenF5 TTS\n\nTry out [OpenF5-TTS](https://huggingface.co/mrfakename/OpenF5-TTS)") textbox = gr.Textbox(label="Text") audio = gr.Audio(label="Reference Audio", type="filepath") variant = gr.Radio(choices=VARIANTS, value=VARIANTS[0], label="Variant") btn = gr.Button("Generate", variant="primary") output = gr.Audio(label="Output", type="filepath") btn.click(generate, [textbox, audio, variant], outputs=[output]) demo.queue().launch()