Sambhavnoobcoder commited on
Commit
9404020
·
1 Parent(s): ab7a1ab

reverted back to commit before moving to chatgpt

Browse files
Files changed (1) hide show
  1. app.py +98 -20
app.py CHANGED
@@ -1,27 +1,105 @@
1
  import gradio as gr
2
- import pdf2speech
3
- from gtts import gTTS
4
- from tempfile import NamedTemporaryFile
5
- import os
6
-
7
- def convert_pdf_to_speech(pdf_file):
8
- text = pdf2speech.extract_text_from_pdf(pdf_file.name)
9
- tts = gTTS(text=text, lang='en')
10
- audio_file = NamedTemporaryFile(suffix=".mp3", delete=False)
11
- tts.save(audio_file.name)
12
- audio_file.close()
13
- return audio_file.name
14
-
15
- def pdf_to_speech(pdf_file):
16
- audio_file_path = convert_pdf_to_speech(pdf_file)
17
- return audio_file_path
 
 
 
 
 
 
 
18
 
19
  def main():
20
- pdf_input = gr.inputs.File(label="Upload PDF", type="file")
21
- audio_output = gr.outputs.Audio(label="Generated Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- iface = gr.Interface(fn=pdf_to_speech, inputs=pdf_input, outputs=audio_output, title="PDF to Speech Converter")
24
- iface.launch()
25
 
26
  if __name__ == "__main__":
27
  main()
 
1
  import gradio as gr
2
+ import pdfminer
3
+ from pdfminer.high_level import extract_text
4
+ import logging
5
+ from typing import cast
6
+
7
+ import gradio as gr
8
+ from balacoon_tts import TTS
9
+ from huggingface_hub import hf_hub_download, list_repo_files
10
+
11
+ # global tts module, initialized from a model selected
12
+ tts = None
13
+
14
+ def read_pdf(file):
15
+ text = extract_text(file.name)
16
+ return text
17
+
18
+ # iface = gr.Interface(
19
+ # read_pdf,
20
+ # gr.inputs.File(),
21
+ # # gr.outputs.Textbox()
22
+ # )
23
+ # iface.launch()
24
+
25
 
26
  def main():
27
+ logging.basicConfig(level=logging.INFO)
28
+ with gr.Blocks() as demo:
29
+ gr.Markdown(
30
+ """
31
+ <h1 align="center">PDF TO SPEECH CONVERTER</h1>
32
+ 1. insert a pdf
33
+ 2. Select the model to synthesize with
34
+ 3. Select speaker
35
+ 4. Hit "Generate" and listen to the result!
36
+ When you select model for the first time,
37
+ it will take a little time to download it.
38
+ this project is designed to take the love
39
+ of reading without the hassle of looking over.
40
+ if you want an audio book , you now got it .
41
+ """
42
+ )
43
+
44
+ with gr.Row(variant="panel"):
45
+ f=gr.inputs.File("enter the file")
46
+ text = read_pdf(f)
47
+
48
+ with gr.Row():
49
+ with gr.Column(variant="panel"):
50
+ repo_files = list_repo_files(repo_id="balacoon/tts")
51
+ model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
52
+ model_name = gr.Dropdown(
53
+ label="Model",
54
+ choices=model_files,
55
+ )
56
+ with gr.Column(variant="panel"):
57
+ speaker = gr.Dropdown(label="Speaker", choices=[])
58
+
59
+ def set_model(model_name_str: str):
60
+ """
61
+ gets value from `model_name`, loads model,
62
+ re-initializes tts object, gets list of
63
+ speakers that model supports and set them to `speaker`
64
+ """
65
+ model_path = hf_hub_download(
66
+ repo_id="balacoon/tts", filename=model_name_str
67
+ )
68
+ global tts
69
+ tts = TTS(model_path)
70
+ speakers = tts.get_speakers()
71
+ value = speakers[-1]
72
+ return gr.Dropdown.update(
73
+ choices=speakers, value=value, visible=True
74
+ )
75
+
76
+ model_name.change(set_model, inputs=model_name, outputs=speaker)
77
+
78
+ with gr.Row(variant="panel"):
79
+ generate = gr.Button("Generate")
80
+ with gr.Row(variant="panel"):
81
+ audio = gr.Audio()
82
+
83
+ def synthesize_audio(text_str: str, speaker_str: str = ""):
84
+ """
85
+ gets utterance to synthesize from `text` Textbox
86
+ and speaker name from `speaker` dropdown list.
87
+ speaker name might be empty for single-speaker models.
88
+ Synthesizes the waveform and updates `audio` with it.
89
+ """
90
+ if not text_str:
91
+ logging.info("text or speaker are not provided")
92
+ return None
93
+ global tts
94
+ if len(text_str) > 1024:
95
+ text_str = text_str[:1024]
96
+ samples = cast(TTS, tts).synthesize(text_str, speaker_str)
97
+ return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
98
+
99
+ generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
100
+
101
+ demo.launch()
102
 
 
 
103
 
104
  if __name__ == "__main__":
105
  main()