Spaces:
Running
on
Zero
Running
on
Zero
Willing to explore ChartterBox
#10
by
MStrange11
- opened
app.py
CHANGED
@@ -45,29 +45,25 @@ def set_seed(seed: int):
|
|
45 |
@spaces.GPU
|
46 |
def generate_tts_audio(
|
47 |
text_input: str,
|
48 |
-
audio_prompt_path_input: str
|
49 |
-
exaggeration_input: float
|
50 |
-
temperature_input: float
|
51 |
-
seed_num_input: int
|
52 |
-
cfgw_input: float
|
53 |
) -> tuple[int, np.ndarray]:
|
54 |
"""
|
55 |
-
|
56 |
-
|
57 |
-
This tool synthesizes natural-sounding speech from input text. When a reference audio file
|
58 |
-
is provided, it captures the speaker's voice characteristics and speaking style. The generated audio
|
59 |
-
maintains the prosody, tone, and vocal qualities of the reference speaker, or uses default voice if no reference is provided.
|
60 |
|
61 |
Args:
|
62 |
-
text_input
|
63 |
-
audio_prompt_path_input
|
64 |
-
exaggeration_input
|
65 |
-
temperature_input
|
66 |
-
seed_num_input
|
67 |
-
cfgw_input
|
68 |
|
69 |
Returns:
|
70 |
-
|
71 |
"""
|
72 |
current_model = get_or_load_model()
|
73 |
|
@@ -78,20 +74,12 @@ def generate_tts_audio(
|
|
78 |
set_seed(int(seed_num_input))
|
79 |
|
80 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
81 |
-
|
82 |
-
# Handle optional audio prompt
|
83 |
-
generate_kwargs = {
|
84 |
-
"exaggeration": exaggeration_input,
|
85 |
-
"temperature": temperature_input,
|
86 |
-
"cfg_weight": cfgw_input,
|
87 |
-
}
|
88 |
-
|
89 |
-
if audio_prompt_path_input:
|
90 |
-
generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
|
91 |
-
|
92 |
wav = current_model.generate(
|
93 |
text_input[:300], # Truncate text to max chars
|
94 |
-
|
|
|
|
|
|
|
95 |
)
|
96 |
print("Audio generation complete.")
|
97 |
return (current_model.sr, wav.squeeze(0).numpy())
|
@@ -145,4 +133,4 @@ with gr.Blocks() as demo:
|
|
145 |
outputs=[audio_output],
|
146 |
)
|
147 |
|
148 |
-
demo.launch(
|
|
|
45 |
@spaces.GPU
|
46 |
def generate_tts_audio(
|
47 |
text_input: str,
|
48 |
+
audio_prompt_path_input: str,
|
49 |
+
exaggeration_input: float,
|
50 |
+
temperature_input: float,
|
51 |
+
seed_num_input: int,
|
52 |
+
cfgw_input: float
|
53 |
) -> tuple[int, np.ndarray]:
|
54 |
"""
|
55 |
+
Generates TTS audio using the ChatterboxTTS model.
|
|
|
|
|
|
|
|
|
56 |
|
57 |
Args:
|
58 |
+
text_input: The text to synthesize (max 300 characters).
|
59 |
+
audio_prompt_path_input: Path to the reference audio file.
|
60 |
+
exaggeration_input: Exaggeration parameter for the model.
|
61 |
+
temperature_input: Temperature parameter for the model.
|
62 |
+
seed_num_input: Random seed (0 for random).
|
63 |
+
cfgw_input: CFG/Pace weight.
|
64 |
|
65 |
Returns:
|
66 |
+
A tuple containing the sample rate (int) and the audio waveform (numpy.ndarray).
|
67 |
"""
|
68 |
current_model = get_or_load_model()
|
69 |
|
|
|
74 |
set_seed(int(seed_num_input))
|
75 |
|
76 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
wav = current_model.generate(
|
78 |
text_input[:300], # Truncate text to max chars
|
79 |
+
audio_prompt_path=audio_prompt_path_input,
|
80 |
+
exaggeration=exaggeration_input,
|
81 |
+
temperature=temperature_input,
|
82 |
+
cfg_weight=cfgw_input,
|
83 |
)
|
84 |
print("Audio generation complete.")
|
85 |
return (current_model.sr, wav.squeeze(0).numpy())
|
|
|
133 |
outputs=[audio_output],
|
134 |
)
|
135 |
|
136 |
+
demo.launch()
|