File size: 2,017 Bytes
1a3d685
a5855f7
de6fe87
95b78a2
 
 
 
 
de6fe87
0cbe68c
9f382fd
 
 
8d100c5
 
 
 
 
 
 
9f382fd
 
8d100c5
9f382fd
 
 
 
 
 
0360bdf
885aec7
2459ab5
9f382fd
0360bdf
 
82cfdd6
8d100c5
9f382fd
 
 
 
 
a2008ac
 
 
9f382fd
a2008ac
9f382fd
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
from gradio_client import Client, handle_file
import os

# Get Hugging Face token from environment variable
hf_token = os.getenv("your_huggingface_token")  # Make sure this is set in your environment

# Initialize the Gradio client with the token
client = Client("ResembleAI/Chatterbox", hf_token)

# Define the function to call the model and return the generated audio file
def generate_tts_audio(text_input, audio_prompt_url, exaggeration_input, temperature_input, seed_num_input, cfgw_input):
    try:
        print("sending request")
        # Only process the audio prompt if the field is not empty
        if audio_prompt_url and audio_prompt_url.strip():
            audio_prompt = handle_file(audio_prompt_url)
        else:
            audio_prompt = None  # This is the KEY change

        result = client.predict(
            text_input=text_input,
            audio_prompt_path_input=audio_prompt,
            exaggeration_input=exaggeration_input,
            temperature_input=temperature_input,
            seed_num_input=seed_num_input,
            cfgw_input=cfgw_input,
            api_name="/generate_tts_audio"
        )
        print("API Result:", result)
        return result
            
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# Create the Gradio interface
interface = gr.Interface(
    fn=generate_tts_audio,
    inputs=[
        gr.Textbox(label="Text to Synthesize", placeholder="Enter your text here..."),
        gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference Audio File (Optional)"),
        gr.Slider(minimum=0, maximum=1, value=0.5, label="Exaggeration"),
        gr.Slider(minimum=0, maximum=1, value=0.8, label="Temperature"),
        gr.Number(label="Seed Number", value=0),
        gr.Slider(minimum=0, maximum=1, value=0.5, label="CFG/Pace")
    ],
    outputs=gr.Audio(label="Generated Audio")
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()