Spaces:

ramimu
/

voice_cloning

Running on Zero

App Files Files Community

ramimu commited on May 31

Commit

deb04b6

1 Parent(s): 652c965

Changes to be committed:

Browse files

Files changed (1) hide show

app.py +89 -243

app.py CHANGED Viewed

@@ -1,53 +1,46 @@
 import gradio as gr
 import os
-import traceback # For detailed error logging
 import torch
 from huggingface_hub import hf_hub_download
 import shutil
 import spaces
-# Import configuration
 try:
     from config import MODEL_REPO_ID, MODEL_FILES, LOCAL_MODEL_PATH
 except ImportError:
-    # Fallback configuration if config.py is not found
     MODEL_REPO_ID = "ramimu/chatterbox-voice-cloning-model"
     LOCAL_MODEL_PATH = "./chatterbox_model_files"
     MODEL_FILES = ["s3gen.pt", "t3_cfg.pt", "ve.pt", "tokenizer.json"]
-# Try importing chatterbox with better error handling
 try:
     from chatterbox.tts import ChatterboxTTS
     chatterbox_available = True
     print("Chatterbox TTS imported successfully")
-    # Inspect the ChatterboxTTS class to understand its API
     import inspect
     print(f"ChatterboxTTS methods: {[method for method in dir(ChatterboxTTS) if not method.startswith('_')]}")
-    # Check constructor signature
     try:
         sig = inspect.signature(ChatterboxTTS.__init__)
         print(f"ChatterboxTTS.__init__ signature: {sig}")
     except:
         pass
-    # Check from_local signature if it exists
     if hasattr(ChatterboxTTS, 'from_local'):
         try:
             sig = inspect.signature(ChatterboxTTS.from_local)
             print(f"ChatterboxTTS.from_local signature: {sig}")
         except:
             pass
-    # Check from_pretrained signature if it exists
     if hasattr(ChatterboxTTS, 'from_pretrained'):
         try:
             sig = inspect.signature(ChatterboxTTS.from_pretrained)
             print(f"ChatterboxTTS.from_pretrained signature: {sig}")
         except:
             pass
 except ImportError as e:
     print(f"Failed to import ChatterboxTTS: {e}")
     print("Trying alternative import...")
@@ -60,16 +53,11 @@ except ImportError as e:
         print(f"Alternative import also failed: {e2}")
         chatterbox_available = False
-# --- Global Model Variable ---
 model = None
 def download_model_files():
-    """Download model files from Hugging Face Hub if they don't exist locally"""
     print(f"Checking for model files in {LOCAL_MODEL_PATH}...")
-    # Create model directory if it doesn't exist
     os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
     for filename in MODEL_FILES:
         local_path = os.path.join(LOCAL_MODEL_PATH, filename)
         if not os.path.exists(local_path):
@@ -79,9 +67,8 @@ def download_model_files():
                     repo_id=MODEL_REPO_ID,
                     filename=filename,
                     cache_dir="./cache",
-                    force_download=False  # Use cache if available
                 )
-                # Copy to our local model path
                 shutil.copy2(downloaded_path, local_path)
                 print(f"✓ Downloaded and copied {filename}")
             except Exception as e:
@@ -89,10 +76,8 @@ def download_model_files():
                 raise e
         else:
             print(f"✓ {filename} already exists locally")
     print("All model files are ready!")
-# --- Load the Model ---
 if chatterbox_available:
     print("Downloading model files from Hugging Face Hub...")
     try:
@@ -100,7 +85,7 @@ if chatterbox_available:
     except Exception as e:
         print(f"ERROR: Failed to download model files: {e}")
         print("Model loading will fail without these files.")
     print(f"Attempting to load Chatterbox model from local directory: {LOCAL_MODEL_PATH}")
     if not os.path.exists(LOCAL_MODEL_PATH):
         print(f"ERROR: Local model directory not found at {LOCAL_MODEL_PATH}")
@@ -108,83 +93,62 @@ if chatterbox_available:
     else:
         print(f"Contents of {LOCAL_MODEL_PATH}: {os.listdir(LOCAL_MODEL_PATH)}")
         try:
-            # Load the model from the specified local directory
-            # Set device to CPU or CUDA if available
             device = "cuda" if torch.cuda.is_available() else "cpu"
             print(f"Using device: {device}")
-            # Based on API inspection:
-            # ChatterboxTTS.from_local signature: (ckpt_dir, device) -> 'ChatterboxTTS'
-            # ChatterboxTTS.from_pretrained signature: (device) -> 'ChatterboxTTS'
             try:
-                # Method 1: Use from_local with correct signature (ckpt_dir, device)
                 model = ChatterboxTTS.from_local(LOCAL_MODEL_PATH, device)
                 print("Chatterbox model loaded successfully using from_local method.")
             except Exception as e1:
                 print(f"from_local attempt failed: {e1}")
                 try:
-                    # Method 2: Use from_pretrained with device only
                     model = ChatterboxTTS.from_pretrained(device)
                     print("Chatterbox model loaded successfully with from_pretrained.")
                 except Exception as e2:
                     print(f"from_pretrained failed: {e2}")
                     try:
-                        # Method 3: Manual loading with correct constructor signature
-                        # ChatterboxTTS.__init__ signature: (self, t3, s3gen, ve, tokenizer, device, conds=None)
                         import pathlib
                         import json
                         model_path = pathlib.Path(LOCAL_MODEL_PATH)
                         print(f"Manual loading with correct constructor signature...")
-                        # Load all components
                         s3gen_path = model_path / "s3gen.pt"
                         ve_path = model_path / "ve.pt"
                         tokenizer_path = model_path / "tokenizer.json"
                         t3_cfg_path = model_path / "t3_cfg.pt"
                         print(f"  Loading s3gen from: {s3gen_path}")
                         s3gen = torch.load(s3gen_path, map_location=torch.device('cpu'))
                         print(f"  Loading ve from: {ve_path}")
                         ve = torch.load(ve_path, map_location=torch.device('cpu'))
                         print(f"  Loading t3_cfg from: {t3_cfg_path}")
                         t3_cfg = torch.load(t3_cfg_path, map_location=torch.device('cpu'))
                         print(f"  Loading tokenizer from: {tokenizer_path}")
                         with open(tokenizer_path, 'r') as f:
                             tokenizer_data = json.load(f)
-                        # The tokenizer might need to be instantiated as a proper object
-                        # Let's try to use the ChatterboxTTS internal tokenizer class
                         try:
                             from chatterbox.models.tokenizers.tokenizer import EnTokenizer
                             tokenizer = EnTokenizer.from_dict(tokenizer_data)
                             print("  Created EnTokenizer from JSON data")
                         except Exception as tok_error:
                             print(f"  Could not create EnTokenizer: {tok_error}")
-                            tokenizer = tokenizer_data  # Use raw data as fallback
                         print("  Creating ChatterboxTTS instance with correct signature...")
-                        # Constructor signature: (self, t3, s3gen, ve, tokenizer, device, conds=None)
                         model = ChatterboxTTS(
                             t3=t3_cfg,
-                            s3gen=s3gen,
                             ve=ve,
                             tokenizer=tokenizer,
                             device=device
                         )
                         print("Chatterbox model loaded successfully with manual constructor.")
                     except Exception as e3:
                         print(f"Manual loading failed: {e3}")
                         print(f"Detailed error: {str(e3)}")
-                        # Last resort: try with different parameter orders
                         try:
                             print("Trying alternative parameter order...")
                             model = ChatterboxTTS(
@@ -194,12 +158,12 @@ if chatterbox_available:
                         except Exception as e4:
                             print(f"Alternative parameter order failed: {e4}")
                             raise e3
         except Exception as e:
             print(f"ERROR: Failed to load Chatterbox model from local directory: {e}")
             print("Detailed error trace:")
-            traceback.print_exc() # Prints the full traceback to the Hugging Face Space logs
-            model = None # Ensure model is None if loading fails
 else:
     print("ERROR: Chatterbox TTS library not available")
@@ -223,116 +187,86 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
         print(f"  Random Seed: {random_seed}")
         print(f"  Temperature: {temperature}")
-        # Set random seed if specified
         if random_seed > 0:
             import torch
             torch.manual_seed(random_seed)
             if torch.cuda.is_available():
                 torch.cuda.manual_seed(random_seed)
-        # Use the correct ChatterboxTTS generate method signature with advanced parameters
         output_wav_data = model.generate(
             text=text_to_speak,
             audio_prompt_path=reference_audio_path,
-            exaggeration=exaggeration,    # Controls how much the voice characteristics are emphasized
-            cfg_weight=cfg_pace,          # Classifier-free guidance weight (pace)
-            temperature=temperature       # Controls randomness in generation
         )
-        # Get the sample rate from the model
         try:
-            sample_rate = model.sr  # ChatterboxTTS uses 'sr' attribute
         except:
-            sample_rate = 24000  # Default fallback
         print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
-        # Handle different output formats
         if isinstance(output_wav_data, str):
-            # If it's a file path, return the path
             return output_wav_data, "Success: Audio generated successfully!"
         else:
-            # If it's numpy array or tensor, return with sample rate
             import numpy as np
             if hasattr(output_wav_data, 'cpu'):
-                # Convert tensor to numpy if needed
                 output_wav_data = output_wav_data.cpu().numpy()
-            # Ensure it's the right shape for Gradio (1D array)
             if output_wav_data.ndim > 1:
                 output_wav_data = output_wav_data.squeeze()
             return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
     except Exception as e:
         print(f"ERROR: Failed during audio generation: {e}")
         print("Detailed error trace for audio generation:")
-        traceback.print_exc() # Prints the full traceback
         return None, f"Error during audio generation: {str(e)}. Check logs for more details."
-# --- API Endpoint Function ---
 def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
-    """
-    API version of clone_voice that accepts URL or base64 audio data
-    """
     import requests
     import tempfile
     import os
     import base64
-    # Handle different audio input formats
     temp_audio_path = None
     try:
         if reference_audio_url.startswith('data:audio'):
-            # Handle base64 encoded audio
             header, encoded = reference_audio_url.split(',', 1)
             audio_data = base64.b64decode(encoded)
-            # Determine file extension from MIME type
             if 'mp3' in header:
                 ext = '.mp3'
             elif 'wav' in header:
                 ext = '.wav'
             else:
-                ext = '.wav'  # Default
-            # Save to temporary file
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(audio_data)
                 temp_audio_path = temp_file.name
         elif reference_audio_url.startswith('http'):
-            # Download audio from URL
             response = requests.get(reference_audio_url)
             response.raise_for_status()
-            # Determine extension from URL or content type
             if reference_audio_url.endswith('.mp3'):
                 ext = '.mp3'
             elif reference_audio_url.endswith('.wav'):
                 ext = '.wav'
             else:
-                ext = '.wav'  # Default
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(response.content)
                 temp_audio_path = temp_file.name
         else:
-            # Assume it's a local file path
             temp_audio_path = reference_audio_url
-        # Call the main clone_voice function
         audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
-        # Clean up temporary file if we created one
         if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
                 os.unlink(temp_audio_path)
             except:
                 pass
         return audio_output, status
     except Exception as e:
         if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
@@ -341,160 +275,72 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
                 pass
         return None, f"API Error: {str(e)}"
-# --- Define Gradio Interface ---
-# --- Define Gradio Interface ---
-with gr.Blocks(title="Advanced Chatterbox Voice Cloning", theme=gr.themes.Soft()) as iface:
-    gr.Markdown("# 🎙️ Advanced Chatterbox Voice Cloning")
-    gr.Markdown("Clone any voice using advanced AI technology with fine-tuned controls.")
-    with gr.Row():
-        with gr.Column(scale=2):
-            # Main inputs
-            text_input = gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter the text you want the cloned voice to say...",
                 lines=3
-            )
-            audio_input = gr.Audio(
                 type="filepath",
                 label="Reference Audio (Upload a short .wav or .mp3 clip)",
                 sources=["upload", "microphone"]
             )
-            # Advanced controls in an accordion
-            with gr.Accordion("🔧 Advanced Settings", open=False):
-                with gr.Row():
-                    exaggeration = gr.Slider(
-                        minimum=0.25,
-                        maximum=1.0,
-                        value=0.6,
-                        step=0.05,
-                        label="Exaggeration",
-                        info="Controls voice characteristic emphasis (0.5 = neutral, higher = more exaggerated)"
-                    )
-                    cfg_pace = gr.Slider(
-                        minimum=0.2,
-                        maximum=1.0,
-                        value=0.3,
-                        step=0.05,
-                        label="CFG/Pace",
-                        info="Classifier-free guidance weight (affects generation quality and pace)"
-                    )
-                with gr.Row():
-                    random_seed = gr.Number(
-                        value=0,
-                        label="Random Seed",
-                        info="Set to 0 for random results, or use a specific number for reproducible outputs",
-                        precision=0
-                    )
-                    temperature = gr.Slider(
-                        minimum=0.05,
-                        maximum=2.0,
-                        value=0.6,
-                        step=0.05,
-                        label="Temperature",
-                        info="Controls randomness in generation (lower = more consistent, higher = more varied)"
-                    )
-            # Generate button
-            generate_btn = gr.Button("🎵 Generate Voice Clone", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            # Outputs
-            audio_output = gr.Audio(
-                label="Generated Audio",
-                type="numpy",
-                interactive=False
-            )
-            status_output = gr.Textbox(
-                label="Status",
-                interactive=False,
-                lines=2
-            )
-    # API Information
-    with gr.Accordion("🔌 API Usage", open=False):
-        gr.Markdown("""
-        ### Using this as an API endpoint
-        You can use this Hugging Face Space as an API endpoint in your applications:
-        **Endpoint URL:** `https://your-username-voice-cloning.hf.space/api/predict`
-        **Example Python code:**
-        ```python
-        import requests
-        import base64
-        # Encode your audio file
-        with open("reference_audio.wav", "rb") as f:
-            audio_data = base64.b64encode(f.read()).decode()
-            audio_url = f"data:audio/wav;base64,{audio_data}"
-        # API request
-        response = requests.post(
-            "https://your-username-voice-cloning.hf.space/api/predict",
-            json={
-                "data": [
-                    "Hello, this is my cloned voice!",  # text
-                    audio_url,                          # reference audio (base64 or URL)
-                    0.6,                               # exaggeration
-                    0.3,                               # cfg_pace
-                    0,                                 # random_seed
-                    0.6                                # temperature
-                ]
-            }
-        )
-        ```
-        **Parameters:**
-        - `text_to_speak`: Text to synthesize
-        - `reference_audio`: Base64 encoded audio or URL
-        - `exaggeration`: Voice emphasis (0.25-1.0, default: 0.6)
-        - `cfg_pace`: Generation guidance (0.2-1.0, default: 0.3)
-        - `random_seed`: Reproducibility seed (0 for random, default: 0)
-        - `temperature`: Generation randomness (0.05-2.0, default: 0.6)
-        """)
-    # Examples
-    with gr.Accordion("📝 Examples", open=False):
-        gr.Examples(
-            examples=[
-                ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
-                ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
-                ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
-            ],
-            inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
-            outputs=[audio_output, status_output],
-            fn=clone_voice,
-            cache_examples=False
-        )
-    # Connect the generate button
-    generate_btn.click(
-        fn=clone_voice,
-        inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
-        outputs=[audio_output, status_output],
-        api_name="clone_voice"  # This enables API access
     )
-# --- Launch the Gradio App ---
-def main():
-    print("Starting Advanced Gradio interface...")
-    # Launch with specific configuration for API access and avoid manifest issues
     iface.launch(
-        server_name="0.0.0.0",  # Allow external connections
-        server_port=7860,       # Explicit port
-        show_error=True,        # Show detailed errors
-        quiet=False,            # Show startup logs
-        favicon_path=None,      # Disable favicon to avoid 404
-        share=False,            # Set to True if you want a public link
-        auth=None,              # Add authentication if needed: ("username", "password")
-        app_kwargs={
-            "docs_url": "/docs",     # Enable API docs at /docs
-            "redoc_url": "/redoc"    # Enable alternative docs at /redoc
-        }
     )
 if __name__ == "__main__":

 import gradio as gr
 import os
+import traceback
 import torch
 from huggingface_hub import hf_hub_download
 import shutil
 import spaces
 try:
     from config import MODEL_REPO_ID, MODEL_FILES, LOCAL_MODEL_PATH
 except ImportError:
     MODEL_REPO_ID = "ramimu/chatterbox-voice-cloning-model"
     LOCAL_MODEL_PATH = "./chatterbox_model_files"
     MODEL_FILES = ["s3gen.pt", "t3_cfg.pt", "ve.pt", "tokenizer.json"]
 try:
     from chatterbox.tts import ChatterboxTTS
     chatterbox_available = True
     print("Chatterbox TTS imported successfully")
     import inspect
     print(f"ChatterboxTTS methods: {[method for method in dir(ChatterboxTTS) if not method.startswith('_')]}")
     try:
         sig = inspect.signature(ChatterboxTTS.__init__)
         print(f"ChatterboxTTS.__init__ signature: {sig}")
     except:
         pass
     if hasattr(ChatterboxTTS, 'from_local'):
         try:
             sig = inspect.signature(ChatterboxTTS.from_local)
             print(f"ChatterboxTTS.from_local signature: {sig}")
         except:
             pass
     if hasattr(ChatterboxTTS, 'from_pretrained'):
         try:
             sig = inspect.signature(ChatterboxTTS.from_pretrained)
             print(f"ChatterboxTTS.from_pretrained signature: {sig}")
         except:
             pass
 except ImportError as e:
     print(f"Failed to import ChatterboxTTS: {e}")
     print("Trying alternative import...")
         print(f"Alternative import also failed: {e2}")
         chatterbox_available = False
 model = None
 def download_model_files():
     print(f"Checking for model files in {LOCAL_MODEL_PATH}...")
     os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
     for filename in MODEL_FILES:
         local_path = os.path.join(LOCAL_MODEL_PATH, filename)
         if not os.path.exists(local_path):
                     repo_id=MODEL_REPO_ID,
                     filename=filename,
                     cache_dir="./cache",
+                    force_download=False
                 )
                 shutil.copy2(downloaded_path, local_path)
                 print(f"✓ Downloaded and copied {filename}")
             except Exception as e:
                 raise e
         else:
             print(f"✓ {filename} already exists locally")
     print("All model files are ready!")
 if chatterbox_available:
     print("Downloading model files from Hugging Face Hub...")
     try:
     except Exception as e:
         print(f"ERROR: Failed to download model files: {e}")
         print("Model loading will fail without these files.")
     print(f"Attempting to load Chatterbox model from local directory: {LOCAL_MODEL_PATH}")
     if not os.path.exists(LOCAL_MODEL_PATH):
         print(f"ERROR: Local model directory not found at {LOCAL_MODEL_PATH}")
     else:
         print(f"Contents of {LOCAL_MODEL_PATH}: {os.listdir(LOCAL_MODEL_PATH)}")
         try:
             device = "cuda" if torch.cuda.is_available() else "cpu"
             print(f"Using device: {device}")
             try:
                 model = ChatterboxTTS.from_local(LOCAL_MODEL_PATH, device)
                 print("Chatterbox model loaded successfully using from_local method.")
             except Exception as e1:
                 print(f"from_local attempt failed: {e1}")
                 try:
                     model = ChatterboxTTS.from_pretrained(device)
                     print("Chatterbox model loaded successfully with from_pretrained.")
                 except Exception as e2:
                     print(f"from_pretrained failed: {e2}")
                     try:
                         import pathlib
                         import json
                         model_path = pathlib.Path(LOCAL_MODEL_PATH)
                         print(f"Manual loading with correct constructor signature...")
                         s3gen_path = model_path / "s3gen.pt"
                         ve_path = model_path / "ve.pt"
                         tokenizer_path = model_path / "tokenizer.json"
                         t3_cfg_path = model_path / "t3_cfg.pt"
                         print(f"  Loading s3gen from: {s3gen_path}")
                         s3gen = torch.load(s3gen_path, map_location=torch.device('cpu'))
                         print(f"  Loading ve from: {ve_path}")
                         ve = torch.load(ve_path, map_location=torch.device('cpu'))
                         print(f"  Loading t3_cfg from: {t3_cfg_path}")
                         t3_cfg = torch.load(t3_cfg_path, map_location=torch.device('cpu'))
                         print(f"  Loading tokenizer from: {tokenizer_path}")
                         with open(tokenizer_path, 'r') as f:
                             tokenizer_data = json.load(f)
                         try:
                             from chatterbox.models.tokenizers.tokenizer import EnTokenizer
                             tokenizer = EnTokenizer.from_dict(tokenizer_data)
                             print("  Created EnTokenizer from JSON data")
                         except Exception as tok_error:
                             print(f"  Could not create EnTokenizer: {tok_error}")
+                            tokenizer = tokenizer_data
                         print("  Creating ChatterboxTTS instance with correct signature...")
                         model = ChatterboxTTS(
                             t3=t3_cfg,
+                            s3gen=s3gen,
                             ve=ve,
                             tokenizer=tokenizer,
                             device=device
                         )
                         print("Chatterbox model loaded successfully with manual constructor.")
                     except Exception as e3:
                         print(f"Manual loading failed: {e3}")
                         print(f"Detailed error: {str(e3)}")
                         try:
                             print("Trying alternative parameter order...")
                             model = ChatterboxTTS(
                         except Exception as e4:
                             print(f"Alternative parameter order failed: {e4}")
                             raise e3
         except Exception as e:
             print(f"ERROR: Failed to load Chatterbox model from local directory: {e}")
             print("Detailed error trace:")
+            traceback.print_exc()
+            model = None
 else:
     print("ERROR: Chatterbox TTS library not available")
         print(f"  Random Seed: {random_seed}")
         print(f"  Temperature: {temperature}")
         if random_seed > 0:
             import torch
             torch.manual_seed(random_seed)
             if torch.cuda.is_available():
                 torch.cuda.manual_seed(random_seed)
         output_wav_data = model.generate(
             text=text_to_speak,
             audio_prompt_path=reference_audio_path,
+            exaggeration=exaggeration,
+            cfg_weight=cfg_pace,
+            temperature=temperature
         )
         try:
+            sample_rate = model.sr
         except:
+            sample_rate = 24000
         print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
         if isinstance(output_wav_data, str):
             return output_wav_data, "Success: Audio generated successfully!"
         else:
             import numpy as np
             if hasattr(output_wav_data, 'cpu'):
                 output_wav_data = output_wav_data.cpu().numpy()
             if output_wav_data.ndim > 1:
                 output_wav_data = output_wav_data.squeeze()
             return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
     except Exception as e:
         print(f"ERROR: Failed during audio generation: {e}")
         print("Detailed error trace for audio generation:")
+        traceback.print_exc()
         return None, f"Error during audio generation: {str(e)}. Check logs for more details."
 def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
     import requests
     import tempfile
     import os
     import base64
     temp_audio_path = None
     try:
         if reference_audio_url.startswith('data:audio'):
             header, encoded = reference_audio_url.split(',', 1)
             audio_data = base64.b64decode(encoded)
             if 'mp3' in header:
                 ext = '.mp3'
             elif 'wav' in header:
                 ext = '.wav'
             else:
+                ext = '.wav'
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(audio_data)
                 temp_audio_path = temp_file.name
         elif reference_audio_url.startswith('http'):
             response = requests.get(reference_audio_url)
             response.raise_for_status()
             if reference_audio_url.endswith('.mp3'):
                 ext = '.mp3'
             elif reference_audio_url.endswith('.wav'):
                 ext = '.wav'
             else:
+                ext = '.wav'
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(response.content)
                 temp_audio_path = temp_file.name
         else:
             temp_audio_path = reference_audio_url
         audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
         if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
                 os.unlink(temp_audio_path)
             except:
                 pass
         return audio_output, status
     except Exception as e:
         if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
                 pass
         return None, f"API Error: {str(e)}"
+def main():
+    print("Starting Advanced Gradio interface...")
+    iface = gr.Interface(
+        fn=clone_voice_api,
+        inputs=[
+            gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter the text you want the cloned voice to say...",
                 lines=3
+            ),
+            gr.Audio(
                 type="filepath",
                 label="Reference Audio (Upload a short .wav or .mp3 clip)",
                 sources=["upload", "microphone"]
+            ),
+            gr.Slider(
+                minimum=0.25,
+                maximum=1.0,
+                value=0.6,
+                step=0.05,
+                label="Exaggeration",
+                info="Controls voice characteristic emphasis (0.5 = neutral, higher = more exaggerated)"
+            ),
+            gr.Slider(
+                minimum=0.2,
+                maximum=1.0,
+                value=0.3,
+                step=0.05,
+                label="CFG/Pace",
+                info="Classifier-free guidance weight (affects generation quality and pace)"
+            ),
+            gr.Number(
+                value=0,
+                label="Random Seed",
+                info="Set to 0 for random results, or use a specific number for reproducible outputs",
+                precision=0
+            ),
+            gr.Slider(
+                minimum=0.05,
+                maximum=2.0,
+                value=0.6,
+                step=0.05,
+                label="Temperature",
+                info="Controls randomness in generation (lower = more consistent, higher = more varied)"
             )
+        ],
+        outputs=[
+            gr.Audio(label="Generated Audio", type="numpy"),
+            gr.Textbox(label="Status", lines=2)
+        ],
+        title="🎙️ Advanced Chatterbox Voice Cloning",
+        description="Clone any voice using advanced AI technology with fine-tuned controls.",
+        examples=[
+            ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
+            ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
+            ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
+        ]
     )
     iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        quiet=False,
+        favicon_path=None,
+        share=False,
+        auth=None
     )
 if __name__ == "__main__":