Spaces:
Runtime error
Runtime error
File size: 2,257 Bytes
4e01295 c764bfb 6fa7850 c764bfb 4e01295 6fa7850 4e01295 bed109a 7337c95 bed109a c764bfb d1d085b 1c30e88 58791c2 6fa7850 d1d085b c764bfb d1d085b 6fa7850 d1d085b 6fa7850 d1d085b c764bfb 18b6735 c764bfb c5fd4f3 5b13b80 c5fd4f3 7337c95 868b522 c764bfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from pdfminer.high_level import extract_text
import logging
from typing import cast
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files
# Global tts module, initialized from a model selected
tts = None
def read_pdf(file):
with open(file.name, "rb") as f:
text = extract_text(f)
return text
def set_model(model_name_str):
"""
Gets value from `model_name`, loads the model,
re-initializes the tts object, and gets a list of
speakers that the model supports and sets them to `speaker`.
"""
model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
global tts
tts = TTS(model_path)
speakers = tts.get_speakers()
value = speakers[-1]
return speakers, value
def main():
logging.basicConfig(level=logging.INFO)
repo_files = list_repo_files(repo_id="balacoon/tts")
model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
speaker_dropdown = gr.inputs.Dropdown(label="Speaker", choices=[92])
file_input = gr.inputs.File(label="Select a PDF File", type="file")
text = gr.outputs.Textbox()
def synthesize_audio(file, model_name_str, speaker_str):
"""
Gets the selected PDF `file`, model name from `model_name`,
and speaker name from `speaker`. Synthesizes the audio waveform
from the text extracted from the PDF and returns it.
"""
if file is None or file.name == "":
logging.info("No file selected.")
return None
text_str = read_pdf(file)
if len(text_str) > 1024:
text_str = text_str[:1024]
global tts
samples = cast(TTS, tts).synthesize(text_str, speaker_str)
return (cast(TTS, tts).get_sampling_rate(), samples)
audio = gr.outputs.Audio(label="Generated Audio", type="numpy")
iface = gr.Interface(
fn=synthesize_audio,
inputs=[file_input, model_name_dropdown, speaker_dropdown],
outputs=audio,
title="PDF TO SPEECH CONVERTER",
layout="rows",
debug=True
)
iface.launch()
if __name__ == "__main__":
main()
|