Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

App Files Files Community

Sambhavnoobcoder commited on Jun 19, 2023

Commit

527f66c

1 Parent(s): 25e677a

new version

Browse files

Files changed (1) hide show

app.py +17 -96

app.py CHANGED Viewed

@@ -1,106 +1,27 @@
 import gradio as gr
-import pdfminer
-from pdfminer.high_level import extract_text
-import logging
-from typing import cast
-import gradio as gr
-from balacoon_tts import TTS
-from huggingface_hub import hf_hub_download, list_repo_files
-# global tts module, initialized from a model selected
-tts = None
-def read_pdf(file):
-    with open(file.name, "rb") as f:
-        text = extract_text(f)
-    return text
-# iface = gr.Interface(
-#     read_pdf,
-#     gr.inputs.File(),
-#     # gr.outputs.Textbox()
-# )
-# iface.launch()
-def main():
-    logging.basicConfig(level=logging.INFO)
-    with gr.Blocks() as demo:
-        gr.Markdown(
-            """
-            <h1 align="center">PDF TO SPEECH CONVERTER</h1>
-            1. insert a pdf
-            2. Select the model to synthesize with
-            3. Select speaker
-            4. Hit "Generate" and listen to the result!
-            When you select model for the first time,
-            it will take a little time to download it.
-            this project is designed to take the love
-            of reading without the hassle of looking over.
-            if you want an audio book , you now got it .
-            """
-        )
-        with gr.Row(variant="panel"):
-            f=gr.inputs.File("enter the file")
-            text = read_pdf(f)
-        with gr.Row():
-            with gr.Column(variant="panel"):
-                repo_files = list_repo_files(repo_id="balacoon/tts")
-                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
-                model_name = gr.Dropdown(
-                    label="Model",
-                    choices=model_files,
-                )
-            with gr.Column(variant="panel"):
-                speaker = gr.Dropdown(label="Speaker", choices=[])
-            def set_model(model_name_str: str):
-                """
-                gets value from `model_name`, loads model,
-                re-initializes tts object, gets list of
-                speakers that model supports and set them to `speaker`
-                """
-                model_path = hf_hub_download(
-                    repo_id="balacoon/tts", filename=model_name_str
-                )
-                global tts
-                tts = TTS(model_path)
-                speakers = tts.get_speakers()
-                value = speakers[-1]
-                return gr.Dropdown.update(
-                    choices=speakers, value=value, visible=True
-                )
-            model_name.change(set_model, inputs=model_name, outputs=speaker)
-        with gr.Row(variant="panel"):
-            generate = gr.Button("Generate")
-        with gr.Row(variant="panel"):
-            audio = gr.Audio()
-        def synthesize_audio(text_str: str, speaker_str: str = ""):
-            """
-            gets utterance to synthesize from `text` Textbox
-            and speaker name from `speaker` dropdown list.
-            speaker name might be empty for single-speaker models.
-            Synthesizes the waveform and updates `audio` with it.
-            """
-            if not text_str:
-                logging.info("text or speaker are not provided")
-                return None
-            global tts
-            if len(text_str) > 1024:
-                text_str = text_str[:1024]
-            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
-            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
-        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
-    demo.launch()
-if __name__ == "__main__":
-    main()

 import gradio as gr
+import pyttsx3
+import PyPDF2
+def pdf_to_audio(pdf_file):
+    pdf_reader = PyPDF2.PdfFileReader(pdf_file)
+    text = ""
+    for page in range(pdf_reader.numPages):
+        text += pdf_reader.getPage(page).extractText()
+    engine = pyttsx3.init()
+    engine.say(text)
+    engine.runAndWait()
+demo = gr.Blocks()
+with demo:
+    pdf_file = gr.File(type="filepath")
+    text = gr.Textbox()
+    b1 = gr.Button("Convert PDF to Audio")
+    b1.click(pdf_to_audio, inputs=pdf_file, outputs=text)
+demo.launch()