Spaces:
Runtime error
Runtime error
File size: 2,477 Bytes
4e01295 c764bfb 6fa7850 c764bfb 4e01295 6fa7850 4e01295 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb 6fa7850 c764bfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
from pdfminer.high_level import extract_text
import logging
from typing import cast
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files
# Global tts module, initialized from a model selected
tts = None
def read_pdf(file):
with open(file.name, "rb") as f:
text = extract_text(f)
return text
def main():
logging.basicConfig(level=logging.INFO)
with gr.Interface(fn=None, title="PDF TO SPEECH CONVERTER", layout="rows", debug=True) as iface:
repo_files = list_repo_files(repo_id="balacoon/tts")
model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
model_name = gr.inputs.Dropdown(label="Model", choices=model_files)
speaker = gr.inputs.Dropdown(label="Speaker", choices=[])
def set_model(model_name_str):
"""
Gets value from `model_name`, loads the model,
re-initializes the tts object, and gets a list of
speakers that the model supports and sets them to `speaker`.
"""
model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
global tts
tts = TTS(model_path)
speakers = tts.get_speakers()
value = speakers[-1]
speaker.choices = speakers
speaker.value = value
model_name.onChange(set_model)
file_input = gr.inputs.File(label="Select a PDF File", type="file")
text = gr.outputs.Textbox()
def synthesize_audio(file, model_name_str, speaker_str):
"""
Gets the selected PDF `file`, model name from `model_name`,
and speaker name from `speaker`. Synthesizes the audio waveform
from the text extracted from the PDF and returns it.
"""
if file is None or file.name == "":
logging.info("No file selected.")
return None
text_str = read_pdf(file)
if len(text_str) > 1024:
text_str = text_str[:1024]
global tts
samples = cast(TTS, tts).synthesize(text_str, speaker_str)
return (cast(TTS, tts).get_sampling_rate(), samples)
audio = gr.outputs.Audio(label="Generated Audio")
iface.inputs = [file_input, model_name, speaker]
iface.outputs = audio
iface.fn = synthesize_audio
iface.launch()
if __name__ == "__main__":
main()
|