ramimu commited on
Commit
91d6893
·
verified ·
1 Parent(s): 7be21d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -276
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import os
3
  import traceback
4
  import torch
 
5
  from huggingface_hub import hf_hub_download
6
  import shutil
7
  import spaces
@@ -17,47 +18,109 @@ try:
17
  from chatterbox.tts import ChatterboxTTS
18
  chatterbox_available = True
19
  print("Chatterbox TTS imported successfully")
 
 
 
20
 
21
- import inspect
22
- print(f"ChatterboxTTS methods: {[method for method in dir(ChatterboxTTS) if not method.startswith('_')]}")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  try:
25
- sig = inspect.signature(ChatterboxTTS.__init__)
26
- print(f"ChatterboxTTS.__init__ signature: {sig}")
27
- except:
28
- pass
29
-
30
- if hasattr(ChatterboxTTS, 'from_local'):
 
31
  try:
32
- sig = inspect.signature(ChatterboxTTS.from_local)
33
- print(f"ChatterboxTTS.from_local signature: {sig}")
34
- except:
35
- pass
36
-
37
- if hasattr(ChatterboxTTS, 'from_pretrained'):
38
- try:
39
- sig = inspect.signature(ChatterboxTTS.from_pretrained)
40
- print(f"ChatterboxTTS.from_pretrained signature: {sig}")
41
- except:
42
- pass
43
-
44
- except ImportError as e:
45
- print(f"Failed to import ChatterboxTTS: {e}")
46
- print("Trying alternative import...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  try:
48
- import chatterbox
49
- from chatterbox import ChatterboxTTS
50
- chatterbox_available = True
51
- print("Chatterbox TTS imported with alternative method")
52
- except ImportError as e2:
53
- print(f"Alternative import also failed: {e2}")
54
- chatterbox_available = False
55
-
56
- model = None
 
 
 
 
 
 
 
57
 
58
  def download_model_files():
 
59
  print(f"Checking for model files in {LOCAL_MODEL_PATH}...")
60
  os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
 
61
  for filename in MODEL_FILES:
62
  local_path = os.path.join(LOCAL_MODEL_PATH, filename)
63
  if not os.path.exists(local_path):
@@ -78,97 +141,19 @@ def download_model_files():
78
  print(f"✓ {filename} already exists locally")
79
  print("All model files are ready!")
80
 
 
81
  if chatterbox_available:
82
- print("Downloading model files from Hugging Face Hub...")
83
  try:
84
  download_model_files()
 
85
  except Exception as e:
86
- print(f"ERROR: Failed to download model files: {e}")
87
- print("Model loading will fail without these files.")
88
-
89
- print(f"Attempting to load Chatterbox model from local directory: {LOCAL_MODEL_PATH}")
90
- if not os.path.exists(LOCAL_MODEL_PATH):
91
- print(f"ERROR: Local model directory not found at {LOCAL_MODEL_PATH}")
92
- print("Please ensure the model files were downloaded successfully.")
93
- else:
94
- print(f"Contents of {LOCAL_MODEL_PATH}: {os.listdir(LOCAL_MODEL_PATH)}")
95
- try:
96
- device = "cuda" if torch.cuda.is_available() else "cpu"
97
- print(f"Using device: {device}")
98
-
99
- try:
100
- model = ChatterboxTTS.from_local(LOCAL_MODEL_PATH, device)
101
- print("Chatterbox model loaded successfully using from_local method.")
102
- except Exception as e1:
103
- print(f"from_local attempt failed: {e1}")
104
- try:
105
- model = ChatterboxTTS.from_pretrained(device)
106
- print("Chatterbox model loaded successfully with from_pretrained.")
107
- except Exception as e2:
108
- print(f"from_pretrained failed: {e2}")
109
- try:
110
- import pathlib
111
- import json
112
-
113
- model_path = pathlib.Path(LOCAL_MODEL_PATH)
114
- print(f"Manual loading with correct constructor signature...")
115
-
116
- s3gen_path = model_path / "s3gen.pt"
117
- ve_path = model_path / "ve.pt"
118
- tokenizer_path = model_path / "tokenizer.json"
119
- t3_cfg_path = model_path / "t3_cfg.pt"
120
-
121
- print(f" Loading s3gen from: {s3gen_path}")
122
- s3gen = torch.load(s3gen_path, map_location=torch.device('cpu'))
123
- print(f" Loading ve from: {ve_path}")
124
- ve = torch.load(ve_path, map_location=torch.device('cpu'))
125
- print(f" Loading t3_cfg from: {t3_cfg_path}")
126
- t3_cfg = torch.load(t3_cfg_path, map_location=torch.device('cpu'))
127
- print(f" Loading tokenizer from: {tokenizer_path}")
128
- with open(tokenizer_path, 'r') as f:
129
- tokenizer_data = json.load(f)
130
-
131
- try:
132
- from chatterbox.models.tokenizers.tokenizer import EnTokenizer
133
- tokenizer = EnTokenizer.from_dict(tokenizer_data)
134
- print(" Created EnTokenizer from JSON data")
135
- except Exception as tok_error:
136
- print(f" Could not create EnTokenizer: {tok_error}")
137
- tokenizer = tokenizer_data
138
-
139
- print(" Creating ChatterboxTTS instance with correct signature...")
140
- model = ChatterboxTTS(
141
- t3=t3_cfg,
142
- s3gen=s3gen,
143
- ve=ve,
144
- tokenizer=tokenizer,
145
- device=device
146
- )
147
- print("Chatterbox model loaded successfully with manual constructor.")
148
-
149
- except Exception as e3:
150
- print(f"Manual loading failed: {e3}")
151
- print(f"Detailed error: {str(e3)}")
152
- try:
153
- print("Trying alternative parameter order...")
154
- model = ChatterboxTTS(
155
- s3gen, ve, tokenizer, t3_cfg, device
156
- )
157
- print("Chatterbox model loaded with alternative parameter order.")
158
- except Exception as e4:
159
- print(f"Alternative parameter order failed: {e4}")
160
- raise e3
161
-
162
- except Exception as e:
163
- print(f"ERROR: Failed to load Chatterbox model from local directory: {e}")
164
- print("Detailed error trace:")
165
- traceback.print_exc()
166
- model = None
167
- else:
168
- print("ERROR: Chatterbox TTS library not available")
169
 
170
  @spaces.GPU
171
  def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
 
 
 
172
  if not chatterbox_available:
173
  return None, "Error: Chatterbox TTS library not available. Please check installation."
174
  if model is None:
@@ -179,52 +164,99 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
179
  return None, "Error: Please upload a reference audio file (.wav or .mp3)."
180
 
181
  try:
182
- print(f"Received request:")
183
- print(f" Text: '{text_to_speak}'")
184
  print(f" Audio: '{reference_audio_path}'")
185
- print(f" Exaggeration: {exaggeration}")
186
- print(f" CFG/Pace: {cfg_pace}")
187
- print(f" Random Seed: {random_seed}")
188
- print(f" Temperature: {temperature}")
189
-
 
190
  if random_seed > 0:
191
- import torch
192
  torch.manual_seed(random_seed)
193
  if torch.cuda.is_available():
194
  torch.cuda.manual_seed(random_seed)
195
-
196
- output_wav_data = model.generate(
197
- text=text_to_speak,
198
- audio_prompt_path=reference_audio_path,
199
- exaggeration=exaggeration,
200
- cfg_weight=cfg_pace,
201
- temperature=temperature
202
- )
203
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  try:
205
  sample_rate = model.sr
206
  except:
207
  sample_rate = 24000
208
-
209
- print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
210
-
211
  if isinstance(output_wav_data, str):
212
- return output_wav_data, "Success: Audio generated successfully!"
213
  else:
214
  import numpy as np
215
  if hasattr(output_wav_data, 'cpu'):
216
  output_wav_data = output_wav_data.cpu().numpy()
217
  if output_wav_data.ndim > 1:
218
  output_wav_data = output_wav_data.squeeze()
219
- return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
220
-
 
 
 
 
 
 
 
 
 
221
  except Exception as e:
222
- print(f"ERROR: Failed during audio generation: {e}")
223
- print("Detailed error trace for audio generation:")
224
  traceback.print_exc()
225
- return None, f"Error during audio generation: {str(e)}. Check logs for more details."
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
 
228
  import requests
229
  import tempfile
230
  import os
@@ -232,164 +264,45 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
232
 
233
  temp_audio_path = None
234
  try:
 
235
  if reference_audio_url.startswith('data:audio'):
236
  header, encoded = reference_audio_url.split(',', 1)
237
  audio_data = base64.b64decode(encoded)
238
- if 'mp3' in header:
239
- ext = '.mp3'
240
- elif 'wav' in header:
241
- ext = '.wav'
242
- else:
243
- ext = '.wav'
244
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
245
  temp_file.write(audio_data)
246
  temp_audio_path = temp_file.name
247
  elif reference_audio_url.startswith('http'):
248
- response = requests.get(reference_audio_url)
249
  response.raise_for_status()
250
- if reference_audio_url.endswith('.mp3'):
251
- ext = '.mp3'
252
- elif reference_audio_url.endswith('.wav'):
253
- ext = '.wav'
254
- else:
255
- ext = '.wav'
256
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
257
  temp_file.write(response.content)
258
  temp_audio_path = temp_file.name
259
  else:
260
  temp_audio_path = reference_audio_url
261
 
 
262
  audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
263
-
264
- if temp_audio_path and temp_audio_path != reference_audio_url:
265
- try:
266
- os.unlink(temp_audio_path)
267
- except:
268
- pass
269
  return audio_output, status
 
270
  except Exception as e:
 
 
 
 
271
  if temp_audio_path and temp_audio_path != reference_audio_url:
272
  try:
273
  os.unlink(temp_audio_path)
274
  except:
275
  pass
276
- return None, f"API Error: {str(e)}"
277
 
 
278
  def main():
279
  print("Starting Advanced Gradio interface...")
280
-
281
- # Create a Blocks interface with multiple functions
282
- with gr.Blocks(title="🎙️ Advanced Chatterbox Voice Cloning") as demo:
283
- gr.Markdown("# 🎙️ Advanced Chatterbox Voice Cloning")
284
- gr.Markdown("Clone any voice using advanced AI technology with fine-tuned controls.")
285
-
286
- with gr.Row():
287
- with gr.Column(scale=2):
288
- # Main interface inputs
289
- text_input = gr.Textbox(
290
- label="Text to Speak",
291
- placeholder="Enter the text you want the cloned voice to say...",
292
- lines=3
293
- )
294
- audio_input = gr.Audio(
295
- type="filepath",
296
- label="Reference Audio (Upload a short .wav or .mp3 clip)",
297
- sources=["upload", "microphone"]
298
- )
299
-
300
- with gr.Accordion("🔧 Advanced Settings", open=False):
301
- with gr.Row():
302
- exaggeration_input = gr.Slider(
303
- minimum=0.25,
304
- maximum=1.0,
305
- value=0.6,
306
- step=0.05,
307
- label="Exaggeration",
308
- info="Controls voice characteristic emphasis"
309
- )
310
- cfg_pace_input = gr.Slider(
311
- minimum=0.2,
312
- maximum=1.0,
313
- value=0.3,
314
- step=0.05,
315
- label="CFG/Pace",
316
- info="Classifier-free guidance weight"
317
- )
318
- with gr.Row():
319
- seed_input = gr.Number(
320
- value=0,
321
- label="Random Seed",
322
- info="Set to 0 for random results",
323
- precision=0
324
- )
325
- temperature_input = gr.Slider(
326
- minimum=0.05,
327
- maximum=2.0,
328
- value=0.6,
329
- step=0.05,
330
- label="Temperature",
331
- info="Controls randomness in generation"
332
- )
333
-
334
- generate_btn = gr.Button("🎵 Generate Voice Clone", variant="primary", size="lg")
335
-
336
- with gr.Column(scale=1):
337
- # Outputs
338
- audio_output = gr.Audio(label="Generated Audio", type="numpy")
339
- status_output = gr.Textbox(label="Status", lines=2)
340
-
341
- with gr.Accordion("📝 Examples", open=False):
342
- gr.Examples(
343
- examples=[
344
- ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
345
- ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
346
- ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
347
- ],
348
- inputs=[text_input, audio_input, exaggeration_input, cfg_pace_input, seed_input, temperature_input]
349
- )
350
-
351
- # Main interface function (for file uploads)
352
- generate_btn.click(
353
- fn=clone_voice_api,
354
- inputs=[text_input, audio_input, exaggeration_input, cfg_pace_input, seed_input, temperature_input],
355
- outputs=[audio_output, status_output],
356
- api_name="predict"
357
- )
358
-
359
- # API function for base64 data (for external API calls)
360
- def clone_voice_base64_api(text_to_speak, reference_audio_b64, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
361
- """API function that accepts base64 audio data directly."""
362
- return clone_voice_api(text_to_speak, reference_audio_b64, exaggeration, cfg_pace, random_seed, temperature)
363
-
364
- # Hidden inputs/outputs for the base64 API
365
- with gr.Row(visible=False):
366
- api_text_input = gr.Textbox()
367
- api_audio_input = gr.Textbox() # This will receive base64 data URL
368
- api_exaggeration_input = gr.Slider(minimum=0.25, maximum=1.0, value=0.6)
369
- api_cfg_pace_input = gr.Slider(minimum=0.2, maximum=1.0, value=0.3)
370
- api_seed_input = gr.Number(value=0, precision=0)
371
- api_temperature_input = gr.Slider(minimum=0.05, maximum=2.0, value=0.6)
372
- api_audio_output = gr.Audio(type="numpy")
373
- api_status_output = gr.Textbox()
374
- api_btn = gr.Button()
375
-
376
- # API endpoint for base64 data
377
- api_btn.click(
378
- fn=clone_voice_base64_api,
379
- inputs=[api_text_input, api_audio_input, api_exaggeration_input, api_cfg_pace_input, api_seed_input, api_temperature_input],
380
- outputs=[api_audio_output, api_status_output],
381
- api_name="clone_voice"
382
- )
383
-
384
- demo.launch(
385
- server_name="0.0.0.0",
386
- server_port=7860,
387
- show_error=True,
388
- quiet=False,
389
- favicon_path=None,
390
- share=False,
391
- auth=None
392
- )
393
 
394
  if __name__ == "__main__":
395
- main()
 
2
  import os
3
  import traceback
4
  import torch
5
+ import gc
6
  from huggingface_hub import hf_hub_download
7
  import shutil
8
  import spaces
 
18
  from chatterbox.tts import ChatterboxTTS
19
  chatterbox_available = True
20
  print("Chatterbox TTS imported successfully")
21
+ except ImportError as e:
22
+ print(f"Failed to import ChatterboxTTS: {e}")
23
+ chatterbox_available = False
24
 
25
+ model = None
 
26
 
27
+ def cleanup_gpu_memory():
28
+ """Clean up GPU memory to prevent CUDA errors."""
29
+ if torch.cuda.is_available():
30
+ torch.cuda.empty_cache()
31
+ torch.cuda.synchronize()
32
+ gc.collect()
33
+
34
+ def safe_load_model():
35
+ """Safely load the model with proper error handling."""
36
+ global model
37
+
38
+ if not chatterbox_available:
39
+ print("ERROR: Chatterbox TTS library not available")
40
+ return False
41
+
42
  try:
43
+ # Clean up any existing GPU memory
44
+ cleanup_gpu_memory()
45
+
46
+ device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ print(f"Loading model on device: {device}")
48
+
49
+ # Try different loading methods
50
  try:
51
+ model = ChatterboxTTS.from_local(LOCAL_MODEL_PATH, device)
52
+ print("✓ Model loaded successfully using from_local method.")
53
+ except Exception as e1:
54
+ print(f"from_local failed: {e1}")
55
+ try:
56
+ model = ChatterboxTTS.from_pretrained(device)
57
+ print("✓ Model loaded successfully with from_pretrained.")
58
+ except Exception as e2:
59
+ print(f"from_pretrained failed: {e2}")
60
+ # Manual loading as fallback
61
+ model = load_model_manually(device)
62
+
63
+ # Move model to device and set to eval mode
64
+ if model and hasattr(model, 'to'):
65
+ model = model.to(device)
66
+ if model and hasattr(model, 'eval'):
67
+ model.eval()
68
+
69
+ # Clean up after loading
70
+ cleanup_gpu_memory()
71
+ return True
72
+
73
+ except Exception as e:
74
+ print(f"ERROR: Failed to load model: {e}")
75
+ traceback.print_exc()
76
+ model = None
77
+ cleanup_gpu_memory()
78
+ return False
79
+
80
+ def load_model_manually(device):
81
+ """Manual model loading with proper error handling."""
82
+ import pathlib
83
+ import json
84
+
85
+ model_path = pathlib.Path(LOCAL_MODEL_PATH)
86
+ print("Manual loading with correct constructor signature...")
87
+
88
+ # Load components to CPU first
89
+ s3gen_path = model_path / "s3gen.pt"
90
+ ve_path = model_path / "ve.pt"
91
+ tokenizer_path = model_path / "tokenizer.json"
92
+ t3_cfg_path = model_path / "t3_cfg.pt"
93
+
94
+ s3gen = torch.load(s3gen_path, map_location='cpu')
95
+ ve = torch.load(ve_path, map_location='cpu')
96
+ t3_cfg = torch.load(t3_cfg_path, map_location='cpu')
97
+
98
+ with open(tokenizer_path, 'r') as f:
99
+ tokenizer_data = json.load(f)
100
+
101
  try:
102
+ from chatterbox.models.tokenizers.tokenizer import EnTokenizer
103
+ tokenizer = EnTokenizer.from_dict(tokenizer_data)
104
+ except Exception:
105
+ tokenizer = tokenizer_data
106
+
107
+ # Create model instance
108
+ model = ChatterboxTTS(
109
+ t3=t3_cfg,
110
+ s3gen=s3gen,
111
+ ve=ve,
112
+ tokenizer=tokenizer,
113
+ device=device
114
+ )
115
+
116
+ print("✓ Model loaded successfully with manual constructor.")
117
+ return model
118
 
119
  def download_model_files():
120
+ """Download model files with error handling."""
121
  print(f"Checking for model files in {LOCAL_MODEL_PATH}...")
122
  os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
123
+
124
  for filename in MODEL_FILES:
125
  local_path = os.path.join(LOCAL_MODEL_PATH, filename)
126
  if not os.path.exists(local_path):
 
141
  print(f"✓ {filename} already exists locally")
142
  print("All model files are ready!")
143
 
144
+ # Initialize model
145
  if chatterbox_available:
 
146
  try:
147
  download_model_files()
148
+ safe_load_model()
149
  except Exception as e:
150
+ print(f"ERROR during initialization: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  @spaces.GPU
153
  def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
154
+ """Main voice cloning function with improved error handling."""
155
+
156
+ # Input validation
157
  if not chatterbox_available:
158
  return None, "Error: Chatterbox TTS library not available. Please check installation."
159
  if model is None:
 
164
  return None, "Error: Please upload a reference audio file (.wav or .mp3)."
165
 
166
  try:
167
+ print(f"Processing request:")
168
+ print(f" Text length: {len(text_to_speak)} characters")
169
  print(f" Audio: '{reference_audio_path}'")
170
+ print(f" Parameters: exag={exaggeration}, cfg={cfg_pace}, seed={random_seed}, temp={temperature}")
171
+
172
+ # Clean GPU memory before generation
173
+ cleanup_gpu_memory()
174
+
175
+ # Set random seed if specified
176
  if random_seed > 0:
 
177
  torch.manual_seed(random_seed)
178
  if torch.cuda.is_available():
179
  torch.cuda.manual_seed(random_seed)
180
+
181
+ # Check CUDA availability before generation
182
+ if torch.cuda.is_available():
183
+ print(f"CUDA memory before generation: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
184
+
185
+ # Generate audio with error handling
186
+ try:
187
+ with torch.no_grad(): # Disable gradient computation
188
+ output_wav_data = model.generate(
189
+ text=text_to_speak,
190
+ audio_prompt_path=reference_audio_path,
191
+ exaggeration=exaggeration,
192
+ cfg_weight=cfg_pace,
193
+ temperature=temperature
194
+ )
195
+ except RuntimeError as e:
196
+ if "CUDA" in str(e) or "out of memory" in str(e):
197
+ print(f"CUDA error during generation: {e}")
198
+ # Try to recover by cleaning memory and retrying
199
+ cleanup_gpu_memory()
200
+ try:
201
+ with torch.no_grad():
202
+ output_wav_data = model.generate(
203
+ text=text_to_speak,
204
+ audio_prompt_path=reference_audio_path,
205
+ exaggeration=exaggeration,
206
+ cfg_weight=cfg_pace,
207
+ temperature=temperature
208
+ )
209
+ print("✓ Recovery successful after memory cleanup")
210
+ except Exception as retry_error:
211
+ print(f"✗ Recovery failed: {retry_error}")
212
+ return None, f"CUDA error: {str(e)}. GPU memory issue - please try again in a moment."
213
+ else:
214
+ raise e
215
+
216
+ # Get sample rate
217
  try:
218
  sample_rate = model.sr
219
  except:
220
  sample_rate = 24000
221
+
222
+ # Process output
 
223
  if isinstance(output_wav_data, str):
224
+ result = output_wav_data
225
  else:
226
  import numpy as np
227
  if hasattr(output_wav_data, 'cpu'):
228
  output_wav_data = output_wav_data.cpu().numpy()
229
  if output_wav_data.ndim > 1:
230
  output_wav_data = output_wav_data.squeeze()
231
+ result = (sample_rate, output_wav_data)
232
+
233
+ # Clean up GPU memory after generation
234
+ cleanup_gpu_memory()
235
+
236
+ if torch.cuda.is_available():
237
+ print(f"CUDA memory after generation: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
238
+
239
+ print("✓ Audio generated successfully")
240
+ return result, "Success: Audio generated successfully!"
241
+
242
  except Exception as e:
243
+ print(f"ERROR during audio generation: {e}")
 
244
  traceback.print_exc()
245
+
246
+ # Clean up on error
247
+ cleanup_gpu_memory()
248
+
249
+ # Provide specific error messages
250
+ error_msg = str(e)
251
+ if "CUDA" in error_msg or "device-side assert" in error_msg:
252
+ return None, f"CUDA error: {error_msg}. This is usually a temporary GPU issue. Please try again in a moment."
253
+ elif "out of memory" in error_msg:
254
+ return None, f"GPU memory error: {error_msg}. Please try with shorter text or try again later."
255
+ else:
256
+ return None, f"Error during audio generation: {error_msg}. Check logs for more details."
257
 
258
  def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
259
+ """API wrapper with improved error handling."""
260
  import requests
261
  import tempfile
262
  import os
 
264
 
265
  temp_audio_path = None
266
  try:
267
+ # Handle different audio input formats
268
  if reference_audio_url.startswith('data:audio'):
269
  header, encoded = reference_audio_url.split(',', 1)
270
  audio_data = base64.b64decode(encoded)
271
+ ext = '.mp3' if 'mp3' in header else '.wav'
 
 
 
 
 
272
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
273
  temp_file.write(audio_data)
274
  temp_audio_path = temp_file.name
275
  elif reference_audio_url.startswith('http'):
276
+ response = requests.get(reference_audio_url, timeout=30)
277
  response.raise_for_status()
278
+ ext = '.mp3' if reference_audio_url.endswith('.mp3') else '.wav'
 
 
 
 
 
279
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
280
  temp_file.write(response.content)
281
  temp_audio_path = temp_file.name
282
  else:
283
  temp_audio_path = reference_audio_url
284
 
285
+ # Generate audio
286
  audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
287
+
 
 
 
 
 
288
  return audio_output, status
289
+
290
  except Exception as e:
291
+ print(f"API Error: {e}")
292
+ return None, f"API Error: {str(e)}"
293
+ finally:
294
+ # Clean up temporary file
295
  if temp_audio_path and temp_audio_path != reference_audio_url:
296
  try:
297
  os.unlink(temp_audio_path)
298
  except:
299
  pass
 
300
 
301
+ # Rest of your Gradio interface code remains the same...
302
  def main():
303
  print("Starting Advanced Gradio interface...")
304
+ # Your existing Gradio interface code here
305
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  if __name__ == "__main__":
308
+ main()