import logging from pdfminer.high_level import extract_text from huggingface_hub import hf_hub_download from balacoon_tts import TTS import gradio as gr # Global TTS module, initialized from a selected model tts = None def read_pdf(file): text = extract_text(file.name) return text def synthesize_audio(file): text_str = read_pdf(file) if len(text_str) > 1024: text_str = text_str[:1024] samples = tts.synthesize(text_str) return (samples, tts.get_sampling_rate()) def main(): logging.basicConfig(level=logging.INFO) file_input = gr.inputs.File(label="Upload PDF") model_name = "balacoon/tts" # Set the desired model name here model_path = hf_hub_download(repo_id=model_name) global tts tts = TTS(model_path) audio = gr.outputs.Audio(label="Generated Audio", type="numpy") def generate_audio(file): return synthesize_audio(file) iface = gr.Interface( fn=generate_audio, inputs=file_input, outputs=audio, title="PDF TO SPEECH CONVERTER", layout="vertical", debug=True ) iface.launch() if __name__ == "__main__": main()