PDF TO SPEECH CONVERTER

import gradio as gr
import pdfminer
from pdfminer.high_level import extract_text
import logging
from typing import cast

import gradio as gr
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files

# global tts module, initialized from a model selected
tts = None

def read_pdf(file):
    text = extract_text(file.name)
    return text

# iface = gr.Interface(
#     read_pdf,
#     gr.inputs.File(),
#     # gr.outputs.Textbox()
# )
# iface.launch()


def main():
    logging.basicConfig(level=logging.INFO)
    with gr.Blocks() as demo:
        gr.Markdown(
            """
            <h1 align="center">PDF TO SPEECH CONVERTER</h1>
            1. insert a pdf 
            2. Select the model to synthesize with
            3. Select speaker
            4. Hit "Generate" and listen to the result!
            When you select model for the first time,
            it will take a little time to download it.
            this project is designed to take the love 
            of reading without the hassle of looking over.
            if you want an audio book , you now got it .
            """
        )

        with gr.Row(variant="panel"):
            f=gr.inputs.File()
            text = read_pdf(f)

        with gr.Row():
            with gr.Column(variant="panel"):
                repo_files = list_repo_files(repo_id="balacoon/tts")
                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
                model_name = gr.Dropdown(
                    label="Model",
                    choices=model_files,
                )
            with gr.Column(variant="panel"):
                speaker = gr.Dropdown(label="Speaker", choices=[])

            def set_model(model_name_str: str):
                """
                gets value from `model_name`, loads model,
                re-initializes tts object, gets list of
                speakers that model supports and set them to `speaker`
                """
                model_path = hf_hub_download(
                    repo_id="balacoon/tts", filename=model_name_str
                )
                global tts
                tts = TTS(model_path)
                speakers = tts.get_speakers()
                value = speakers[-1]
                return gr.Dropdown.update(
                    choices=speakers, value=value, visible=True
                )

            model_name.change(set_model, inputs=model_name, outputs=speaker)

        with gr.Row(variant="panel"):
            generate = gr.Button("Generate")
        with gr.Row(variant="panel"):
            audio = gr.Audio()

        def synthesize_audio(text_str: str, speaker_str: str = ""):
            """
            gets utterance to synthesize from `text` Textbox
            and speaker name from `speaker` dropdown list.
            speaker name might be empty for single-speaker models.
            Synthesizes the waveform and updates `audio` with it.
            """
            if not text_str:
                logging.info("text or speaker are not provided")
                return None
            global tts
            if len(text_str) > 1024:
                text_str = text_str[:1024]
            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))

        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)

    demo.launch()


if __name__ == "__main__":
    main()