Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

App Files Files Community

Sambhavnoobcoder commited on Jun 19, 2023

Commit

9404020

1 Parent(s): ab7a1ab

reverted back to commit before moving to chatgpt

Browse files

Files changed (1) hide show

app.py +98 -20

app.py CHANGED Viewed

@@ -1,27 +1,105 @@
 import gradio as gr
-import pdf2speech
-from gtts import gTTS
-from tempfile import NamedTemporaryFile
-import os
-def convert_pdf_to_speech(pdf_file):
-    text = pdf2speech.extract_text_from_pdf(pdf_file.name)
-    tts = gTTS(text=text, lang='en')
-    audio_file = NamedTemporaryFile(suffix=".mp3", delete=False)
-    tts.save(audio_file.name)
-    audio_file.close()
-    return audio_file.name
-def pdf_to_speech(pdf_file):
-    audio_file_path = convert_pdf_to_speech(pdf_file)
-    return audio_file_path
 def main():
-    pdf_input = gr.inputs.File(label="Upload PDF", type="file")
-    audio_output = gr.outputs.Audio(label="Generated Audio")
-    iface = gr.Interface(fn=pdf_to_speech, inputs=pdf_input, outputs=audio_output, title="PDF to Speech Converter")
-    iface.launch()
 if __name__ == "__main__":
     main()

 import gradio as gr
+import pdfminer
+from pdfminer.high_level import extract_text
+import logging
+from typing import cast
+import gradio as gr
+from balacoon_tts import TTS
+from huggingface_hub import hf_hub_download, list_repo_files
+# global tts module, initialized from a model selected
+tts = None
+def read_pdf(file):
+    text = extract_text(file.name)
+    return text
+# iface = gr.Interface(
+#     read_pdf,
+#     gr.inputs.File(),
+#     # gr.outputs.Textbox()
+# )
+# iface.launch()
 def main():
+    logging.basicConfig(level=logging.INFO)
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            <h1 align="center">PDF TO SPEECH CONVERTER</h1>
+            1. insert a pdf
+            2. Select the model to synthesize with
+            3. Select speaker
+            4. Hit "Generate" and listen to the result!
+            When you select model for the first time,
+            it will take a little time to download it.
+            this project is designed to take the love
+            of reading without the hassle of looking over.
+            if you want an audio book , you now got it .
+            """
+        )
+        with gr.Row(variant="panel"):
+            f=gr.inputs.File("enter the file")
+            text = read_pdf(f)
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                repo_files = list_repo_files(repo_id="balacoon/tts")
+                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
+                model_name = gr.Dropdown(
+                    label="Model",
+                    choices=model_files,
+                )
+            with gr.Column(variant="panel"):
+                speaker = gr.Dropdown(label="Speaker", choices=[])
+            def set_model(model_name_str: str):
+                """
+                gets value from `model_name`, loads model,
+                re-initializes tts object, gets list of
+                speakers that model supports and set them to `speaker`
+                """
+                model_path = hf_hub_download(
+                    repo_id="balacoon/tts", filename=model_name_str
+                )
+                global tts
+                tts = TTS(model_path)
+                speakers = tts.get_speakers()
+                value = speakers[-1]
+                return gr.Dropdown.update(
+                    choices=speakers, value=value, visible=True
+                )
+            model_name.change(set_model, inputs=model_name, outputs=speaker)
+        with gr.Row(variant="panel"):
+            generate = gr.Button("Generate")
+        with gr.Row(variant="panel"):
+            audio = gr.Audio()
+        def synthesize_audio(text_str: str, speaker_str: str = ""):
+            """
+            gets utterance to synthesize from `text` Textbox
+            and speaker name from `speaker` dropdown list.
+            speaker name might be empty for single-speaker models.
+            Synthesizes the waveform and updates `audio` with it.
+            """
+            if not text_str:
+                logging.info("text or speaker are not provided")
+                return None
+            global tts
+            if len(text_str) > 1024:
+                text_str = text_str[:1024]
+            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
+            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
+        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
+    demo.launch()
 if __name__ == "__main__":
     main()