Abhijit Bhattacharya
Update with latest code structure - Simple script (app.py) + optional Gradio interface (app_gradio.py) - Updated README with accurate compatibility info - Fixed MPS issues with CPU mode approach - Ready for download and local usage
6231313
#!/usr/bin/env python3 | |
""" | |
Chatterbox-TTS Apple Silicon Gradio Interface | |
Full web interface for local usage with Apple Silicon compatibility | |
Install gradio first: pip install gradio | |
Then run: python app_gradio.py | |
""" | |
import gradio as gr | |
from app import ( | |
get_or_load_model, | |
generate_audio, | |
DEVICE, | |
split_text_into_chunks, | |
logger | |
) | |
import torch | |
import tempfile | |
import os | |
def gradio_generate_audio( | |
text_input: str, | |
audio_prompt_input, | |
exaggeration_input: float, | |
temperature_input: float, | |
seed_input: int, | |
cfg_weight_input: float, | |
chunk_size_input: int = 250 | |
): | |
"""Gradio wrapper for audio generation""" | |
try: | |
# Handle audio prompt | |
audio_prompt_path = None | |
if audio_prompt_input is not None: | |
if isinstance(audio_prompt_input, tuple): | |
# Gradio audio format: (sample_rate, audio_data) | |
audio_prompt_path = audio_prompt_input | |
elif isinstance(audio_prompt_input, str): | |
# File path | |
audio_prompt_path = audio_prompt_input | |
# Generate audio using our main function | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
output_path = generate_audio( | |
text=text_input, | |
audio_prompt_path=audio_prompt_path, | |
exaggeration=exaggeration_input, | |
temperature=temperature_input, | |
seed=seed_input if seed_input != 0 else None, | |
cfg_weight=cfg_weight_input, | |
chunk_size=chunk_size_input, | |
output_path=tmp_file.name | |
) | |
return output_path | |
except Exception as e: | |
raise gr.Error(f"Generation failed: {str(e)}") | |
# Create Gradio interface | |
with gr.Blocks( | |
title="ποΈ Chatterbox-TTS (Apple Silicon)", | |
theme=gr.themes.Soft(), | |
css=""" | |
.gradio-container { max-width: 1200px; margin: auto; } | |
.gr-button { background: linear-gradient(45deg, #FF6B6B, #4ECDC4); color: white; } | |
.info-box { | |
padding: 15px; | |
border-radius: 10px; | |
margin-top: 20px; | |
border: 1px solid #ddd; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.info-box h4 { | |
margin-top: 0; | |
color: #333; | |
font-weight: bold; | |
} | |
.info-box p { | |
margin: 8px 0; | |
color: #555; | |
line-height: 1.4; | |
} | |
.chunking-info { background: linear-gradient(135deg, #e8f5e8, #f0f8f0); } | |
.system-info { background: linear-gradient(135deg, #f0f4f8, #e6f2ff); } | |
""" | |
) as demo: | |
gr.HTML(""" | |
<div style="text-align: center; padding: 20px;"> | |
<h1>ποΈ Chatterbox-TTS Apple Silicon</h1> | |
<p style="font-size: 18px; color: #666;"> | |
Generate high-quality speech from text with voice cloning<br> | |
<strong>Optimized for Apple Silicon compatibility!</strong> | |
</p> | |
<p style="font-size: 14px; color: #888;"> | |
Based on <a href="https://huggingface.co/spaces/ResembleAI/Chatterbox">official ResembleAI implementation</a><br> | |
β¨ <strong>Enhanced with smart text chunking and Apple Silicon support!</strong> | |
</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
text = gr.Textbox( | |
value="Hello! This is a test of the Chatterbox-TTS voice cloning system running locally on Apple Silicon.", | |
label="Text to synthesize (supports long text with automatic chunking)", | |
max_lines=10, | |
lines=5 | |
) | |
ref_wav = gr.Audio( | |
type="filepath", | |
label="Reference Audio File (Optional - 6+ seconds recommended)", | |
sources=["upload", "microphone"] | |
) | |
with gr.Row(): | |
exaggeration = gr.Slider( | |
0.25, 2, step=0.05, | |
label="Exaggeration (Neutral = 0.5)", | |
value=0.5 | |
) | |
cfg_weight = gr.Slider( | |
0.2, 1, step=0.05, | |
label="CFG/Pace", | |
value=0.5 | |
) | |
with gr.Accordion("βοΈ Advanced Options", open=False): | |
chunk_size = gr.Slider( | |
100, 400, step=25, | |
label="Chunk Size (characters per chunk for long text)", | |
value=250 | |
) | |
seed_num = gr.Number( | |
value=0, | |
label="Random seed (0 for random)", | |
precision=0 | |
) | |
temp = gr.Slider( | |
0.05, 5, step=0.05, | |
label="Temperature", | |
value=0.8 | |
) | |
run_btn = gr.Button("π΅ Generate Speech", variant="primary", size="lg") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Generated Speech") | |
gr.HTML(""" | |
<div class="info-box chunking-info"> | |
<h4>π Text Chunking Info</h4> | |
<p><strong>Smart Chunking:</strong> Long text is automatically split at sentence boundaries</p> | |
<p><strong>Chunk Processing:</strong> Each chunk generates separate audio, then concatenated</p> | |
<p><strong>Silence Gaps:</strong> 0.3s silence added between chunks for natural flow</p> | |
</div> | |
""") | |
# System info | |
gr.HTML(f""" | |
<div class="info-box system-info"> | |
<h4>π» System Status</h4> | |
<p><strong>Device:</strong> {DEVICE.upper()} {'π' if torch.backends.mps.is_available() else 'π»'}</p> | |
<p><strong>PyTorch:</strong> {torch.__version__}</p> | |
<p><strong>MPS Available:</strong> {'β Yes' if torch.backends.mps.is_available() else 'β No'}</p> | |
<p><strong>Compatibility:</strong> CPU mode for stability</p> | |
</div> | |
""") | |
# Connect the interface | |
run_btn.click( | |
fn=gradio_generate_audio, | |
inputs=[ | |
text, | |
ref_wav, | |
exaggeration, | |
temp, | |
seed_num, | |
cfg_weight, | |
chunk_size, | |
], | |
outputs=[audio_output], | |
show_progress=True | |
) | |
# Example texts | |
gr.Examples( | |
examples=[ | |
["Hello! This is a test of voice cloning running on Apple Silicon."], | |
["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet."], | |
["Welcome to the future of voice synthesis! With Chatterbox, you can clone any voice in seconds."], | |
], | |
inputs=[text], | |
label="π Example Texts" | |
) | |
def main(): | |
"""Launch the Gradio interface""" | |
try: | |
print("π Starting Chatterbox-TTS Gradio Interface") | |
print(f"Device: {DEVICE}") | |
# Pre-load model | |
print("Loading model...") | |
get_or_load_model() | |
print("β Model loaded!") | |
# Launch interface | |
demo.launch( | |
server_name="127.0.0.1", | |
server_port=7861, | |
share=False, | |
debug=True, | |
show_error=True | |
) | |
except ImportError as e: | |
print("β Missing dependency!") | |
print("Install with: pip install gradio") | |
print("Then run: python app_gradio.py") | |
except Exception as e: | |
print(f"β Error: {e}") | |
if __name__ == "__main__": | |
main() |