Sambhavnoobcoder's picture
this is chatgpt modificaion completely . so if things go to shit , revert back immediately
6fa7850
raw
history blame
2.48 kB
import gradio as gr
from pdfminer.high_level import extract_text
import logging
from typing import cast
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files
# Global tts module, initialized from a model selected
tts = None
def read_pdf(file):
with open(file.name, "rb") as f:
text = extract_text(f)
return text
def main():
logging.basicConfig(level=logging.INFO)
with gr.Interface(fn=None, title="PDF TO SPEECH CONVERTER", layout="rows", debug=True) as iface:
repo_files = list_repo_files(repo_id="balacoon/tts")
model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
model_name = gr.inputs.Dropdown(label="Model", choices=model_files)
speaker = gr.inputs.Dropdown(label="Speaker", choices=[])
def set_model(model_name_str):
"""
Gets value from `model_name`, loads the model,
re-initializes the tts object, and gets a list of
speakers that the model supports and sets them to `speaker`.
"""
model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
global tts
tts = TTS(model_path)
speakers = tts.get_speakers()
value = speakers[-1]
speaker.choices = speakers
speaker.value = value
model_name.onChange(set_model)
file_input = gr.inputs.File(label="Select a PDF File", type="file")
text = gr.outputs.Textbox()
def synthesize_audio(file, model_name_str, speaker_str):
"""
Gets the selected PDF `file`, model name from `model_name`,
and speaker name from `speaker`. Synthesizes the audio waveform
from the text extracted from the PDF and returns it.
"""
if file is None or file.name == "":
logging.info("No file selected.")
return None
text_str = read_pdf(file)
if len(text_str) > 1024:
text_str = text_str[:1024]
global tts
samples = cast(TTS, tts).synthesize(text_str, speaker_str)
return (cast(TTS, tts).get_sampling_rate(), samples)
audio = gr.outputs.Audio(label="Generated Audio")
iface.inputs = [file_input, model_name, speaker]
iface.outputs = audio
iface.fn = synthesize_audio
iface.launch()
if __name__ == "__main__":
main()