import gradio as gr from pdfminer.high_level import extract_text import logging from typing import cast from balacoon_tts import TTS from huggingface_hub import hf_hub_download, list_repo_files # Global tts module, initialized from a model selected tts = None def read_pdf(file): with open(file.name, "rb") as f: text = extract_text(f) return text def set_model(model_name_str): """ Gets value from `model_name`, loads the model, re-initializes the tts object, and gets a list of speakers that the model supports and sets them to `speaker`. """ model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str) global tts tts = TTS(model_path) speakers = tts.get_speakers() value = speakers[0] if speakers else None return speakers, value def main(): logging.basicConfig(level=logging.INFO) repo_files = list_repo_files(repo_id="balacoon/tts") model_files = [x for x in repo_files if x.endswith("_cpu.addon")] model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files) speaker_dropdown = gr.inputs.Dropdown(label="Speaker", choices=[]) file_input = gr.inputs.File(label="Select a PDF File", type="file") text = gr.outputs.Textbox() def synthesize_audio(file, model_name_str, speaker_str): """ Gets the selected PDF `file`, model name from `model_name`, and speaker name from `speaker`. Synthesizes the audio waveform from the text extracted from the PDF and returns it. """ if file is None or file.name == "": logging.info("No file selected.") return None text_str = read_pdf(file) if len(text_str) > 1024: text_str = text_str[:1024] global tts samples = cast(TTS, tts).synthesize(text_str, speaker_str) return (cast(TTS, tts).get_sampling_rate(), samples) audio = gr.outputs.Audio(label="Generated Audio", type="numpy") iface = gr.Interface( fn=synthesize_audio, inputs=[file_input, model_name_dropdown, speaker_dropdown], outputs=audio, title="PDF TO SPEECH CONVERTER", layout="rows", debug=True ) model_name_dropdown.set_action(set_model) model_name_dropdown.set_action(set_model) iface.launch() if __name__ == "__main__": main()