Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

File size: 2,426 Bytes

4e01295
 
c764bfb
 
 
 
 
6fa7850
c764bfb
4e01295
 
6fa7850
 
4e01295
 
c764bfb
 
d1d085b
 
1c30e88
d1d085b
6fa7850
d1d085b
 
 
 
 
 
 
 
 
 
 
 
 
c764bfb
cda7031
 
 
 
 
c764bfb
d1d085b
 
c764bfb
d1d085b
 
 
 
 
 
 
 
 
6fa7850
d1d085b
 
 
6fa7850
d1d085b
 
 
c764bfb
18b6735
c764bfb
c5fd4f3
 
 
 
 
 
 
 
d1d085b
c764bfb

import gradio as gr
from pdfminer.high_level import extract_text
import logging
from typing import cast
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files

# Global tts module, initialized from a model selected
tts = None

def read_pdf(file):
    with open(file.name, "rb") as f:
        text = extract_text(f)
    return text

def main():
    logging.basicConfig(level=logging.INFO)
    repo_files = list_repo_files(repo_id="balacoon/tts")
    model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
    model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
    speaker = gr.inputs.Dropdown(label="Speaker", choices=[])

    def set_model(model_name_str):
        """
        Gets value from `model_name`, loads the model,
        re-initializes the tts object, and gets a list of
        speakers that the model supports and sets them to `speaker`.
        """
        model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
        global tts
        tts = TTS(model_path)
        speakers = tts.get_speakers()
        value = speakers[-1]
        speaker.choices = speakers
        speaker.value = value

    model_name = gr.Interface(
        fn=set_model,
        inputs=model_name_dropdown,
        outputs=None
    )

    file_input = gr.inputs.File(label="Select a PDF File", type="file")
    text = gr.outputs.Textbox()

    def synthesize_audio(file, model_name_str, speaker_str):
        """
        Gets the selected PDF `file`, model name from `model_name`,
        and speaker name from `speaker`. Synthesizes the audio waveform
        from the text extracted from the PDF and returns it.
        """
        if file is None or file.name == "":
            logging.info("No file selected.")
            return None

        text_str = read_pdf(file)
        if len(text_str) > 1024:
            text_str = text_str[:1024]

        global tts
        samples = cast(TTS, tts).synthesize(text_str, speaker_str)
        return (cast(TTS, tts).get_sampling_rate(), samples)

    audio = gr.outputs.Audio(label="Generated Audio", type="numpy")

    iface = gr.Interface(
        fn=synthesize_audio,
        inputs=[file_input, model_name, speaker],
        outputs=audio,
        title="PDF TO SPEECH CONVERTER",
        layout="rows",
        debug=True
    )
    iface.launch()


if __name__ == "__main__":
    main()