import gradio as gr from docx import Document from TTS.api import TTS import tempfile import os os.environ["LIBROSA_CACHE_DIR"] = "/tmp/librosa_cache" # Load TTS model once #tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=False, gpu=False) tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) def extract_text(docx_file): doc = Document(docx_file) return "\n".join([para.text for para in doc.paragraphs if para.text.strip()]) def generate_audio(docx_file): text = extract_text(docx_file.name) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: tts.tts_to_file(text=text, file_path=temp_audio.name) return temp_audio.name # Gradio UI interface = gr.Interface( fn=generate_audio, inputs=gr.File(file_types=[".docx"], label="Upload your DOCX script"), outputs=gr.Audio(label="Realistic Voiceover", type="filepath"), title="DOCX to Voiceover (Offline, Realistic)", description="Upload a .docx script and get a realistic WAV voiceover using Coqui TTS." ) interface.launch()