import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import os
os.environ["LIBROSA_CACHE_DIR"] = "/tmp/librosa_cache"

# Load TTS model once
#tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=False, gpu=False)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

def extract_text(docx_file):
    doc = Document(docx_file)
    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])

def generate_audio(docx_file):
    text = extract_text(docx_file.name)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
        tts.tts_to_file(text=text, file_path=temp_audio.name)
        return temp_audio.name

# Gradio UI
interface = gr.Interface(
    fn=generate_audio,
    inputs=gr.File(file_types=[".docx"], label="Upload your DOCX script"),
    outputs=gr.Audio(label="Realistic Voiceover", type="filepath"),
    title="DOCX to Voiceover (Offline, Realistic)",
    description="Upload a .docx script and get a realistic WAV voiceover using Coqui TTS."
)

interface.launch()