File size: 1,165 Bytes
33051d3
4e01295
6568e62
33051d3
6568e62
33051d3
 
 
c764bfb
4e01295
33051d3
4e01295
 
6568e62
9a433d2
 
 
6568e62
33051d3
bed109a
c764bfb
33051d3
7917b4e
33051d3
6568e62
 
 
 
c764bfb
7917b4e
c764bfb
6568e62
 
7337c95
7917b4e
 
6568e62
7917b4e
 
6568e62
7917b4e
 
e780472
7917b4e
c764bfb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import logging
from pdfminer.high_level import extract_text
from huggingface_hub import hf_hub_download
from balacoon_tts import TTS
import gradio as gr

# Global TTS module, initialized from a selected model
tts = None

def read_pdf(file):
    text = extract_text(file.name)
    return text

def synthesize_audio(file):
    text_str = read_pdf(file)
    if len(text_str) > 1024:
        text_str = text_str[:1024]
    samples = tts.synthesize(text_str)
    return (samples, tts.get_sampling_rate())

def main():
    logging.basicConfig(level=logging.INFO)
    file_input = gr.inputs.File(label="Upload PDF")

    model_name = "balacoon/tts"  # Set the desired model name here
    model_path = hf_hub_download(repo_id=model_name)
    global tts
    tts = TTS(model_path)

    audio = gr.outputs.Audio(label="Generated Audio", type="numpy")

    def generate_audio(file):
        return synthesize_audio(file)

    iface = gr.Interface(
        fn=generate_audio,
        inputs=file_input,
        outputs=audio,
        title="PDF TO SPEECH CONVERTER",
        layout="vertical",
        debug=True
    )

    iface.launch()

if __name__ == "__main__":
    main()