import gradio as gr
from pdfminer.high_level import extract_text
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files

def read_pdf(file):
    with open(file.name, "rb") as f:
        text = extract_text(f)
    return text

def set_model(model_name_str):
    model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
    global tts
    tts = TTS(model_path)
    speakers = tts.get_speakers()
    speaker_dropdown.choices = speakers
    speaker_dropdown.update(default=speakers[0] if speakers else None)

def synthesize_audio(file, model_name_str, speaker_str):
    text_str = read_pdf(file)
    if len(text_str) > 1024:
        text_str = text_str[:1024]
    samples = tts.synthesize(text_str, speaker_str)
    return (tts.get_sampling_rate(), samples)

def main():
    repo_files = list_repo_files(repo_id="balacoon/tts")
    model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
    model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
    speaker_dropdown = gr.inputs.Dropdown(label="Speaker", choices=[])

    file_input = gr.inputs.File(label="Select a PDF File", type="file")
    text = gr.outputs.Textbox()

    audio = gr.outputs.Audio(label="Generated Audio")

    iface = gr.Interface(
        fn=synthesize_audio,
        inputs=[file_input, model_name_dropdown, speaker_dropdown],
        outputs=audio,
        title="PDF TO SPEECH CONVERTER",
        layout="rows",
        debug=True
    )

    model_name_dropdown.set_action(set_model)

    iface.launch()


if __name__ == "__main__":
    main()