Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

File size: 1,605 Bytes

4e01295
 
c764bfb
 
 
4e01295
6fa7850
 
4e01295
 
bed109a
 
 
 
 
9a433d2
 
 
 
 
 
 
 
 
bed109a
c764bfb
d1d085b
 
1c30e88
e780472
6fa7850
d1d085b
 
c764bfb
9a433d2
c764bfb
c5fd4f3
 
5b13b80
c5fd4f3
 
 
 
 
7337c95
9a433d2
e780472
868b522
c764bfb

import gradio as gr
from pdfminer.high_level import extract_text
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files

def read_pdf(file):
    with open(file.name, "rb") as f:
        text = extract_text(f)
    return text

def set_model(model_name_str):
    model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
    global tts
    tts = TTS(model_path)
    speakers = tts.get_speakers()
    speaker_dropdown.choices = speakers
    speaker_dropdown.update(default=speakers[0] if speakers else None)

def synthesize_audio(file, model_name_str, speaker_str):
    text_str = read_pdf(file)
    if len(text_str) > 1024:
        text_str = text_str[:1024]
    samples = tts.synthesize(text_str, speaker_str)
    return (tts.get_sampling_rate(), samples)

def main():
    repo_files = list_repo_files(repo_id="balacoon/tts")
    model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
    model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
    speaker_dropdown = gr.inputs.Dropdown(label="Speaker", choices=[])

    file_input = gr.inputs.File(label="Select a PDF File", type="file")
    text = gr.outputs.Textbox()

    audio = gr.outputs.Audio(label="Generated Audio")

    iface = gr.Interface(
        fn=synthesize_audio,
        inputs=[file_input, model_name_dropdown, speaker_dropdown],
        outputs=audio,
        title="PDF TO SPEECH CONVERTER",
        layout="rows",
        debug=True
    )

    model_name_dropdown.set_action(set_model)

    iface.launch()


if __name__ == "__main__":
    main()