Spaces:
Runtime error
Runtime error
File size: 2,426 Bytes
4e01295 c764bfb 6fa7850 c764bfb 4e01295 6fa7850 4e01295 c764bfb d1d085b 1c30e88 d1d085b 6fa7850 d1d085b c764bfb cda7031 c764bfb d1d085b c764bfb d1d085b 6fa7850 d1d085b 6fa7850 d1d085b c764bfb 18b6735 c764bfb c5fd4f3 d1d085b c764bfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from pdfminer.high_level import extract_text
import logging
from typing import cast
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files
# Global tts module, initialized from a model selected
tts = None
def read_pdf(file):
with open(file.name, "rb") as f:
text = extract_text(f)
return text
def main():
logging.basicConfig(level=logging.INFO)
repo_files = list_repo_files(repo_id="balacoon/tts")
model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
speaker = gr.inputs.Dropdown(label="Speaker", choices=[])
def set_model(model_name_str):
"""
Gets value from `model_name`, loads the model,
re-initializes the tts object, and gets a list of
speakers that the model supports and sets them to `speaker`.
"""
model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
global tts
tts = TTS(model_path)
speakers = tts.get_speakers()
value = speakers[-1]
speaker.choices = speakers
speaker.value = value
model_name = gr.Interface(
fn=set_model,
inputs=model_name_dropdown,
outputs=None
)
file_input = gr.inputs.File(label="Select a PDF File", type="file")
text = gr.outputs.Textbox()
def synthesize_audio(file, model_name_str, speaker_str):
"""
Gets the selected PDF `file`, model name from `model_name`,
and speaker name from `speaker`. Synthesizes the audio waveform
from the text extracted from the PDF and returns it.
"""
if file is None or file.name == "":
logging.info("No file selected.")
return None
text_str = read_pdf(file)
if len(text_str) > 1024:
text_str = text_str[:1024]
global tts
samples = cast(TTS, tts).synthesize(text_str, speaker_str)
return (cast(TTS, tts).get_sampling_rate(), samples)
audio = gr.outputs.Audio(label="Generated Audio", type="numpy")
iface = gr.Interface(
fn=synthesize_audio,
inputs=[file_input, model_name, speaker],
outputs=audio,
title="PDF TO SPEECH CONVERTER",
layout="rows",
debug=True
)
iface.launch()
if __name__ == "__main__":
main()
|