import gradio as gr from gradio_client import Client, handle_file import os # Get Hugging Face token from environment variable hf_token = os.getenv("your_huggingface_token") # Make sure this is set in your environment # Initialize the Gradio client with the token client = Client("ResembleAI/Chatterbox", hf_token) # Define the function to call the model and return the generated audio file def generate_tts_audio(text_input, audio_prompt_url, exaggeration_input, temperature_input, seed_num_input, cfgw_input): try: print("sending request") # Only process the audio prompt if the field is not empty if audio_prompt_url and audio_prompt_url.strip(): audio_prompt = handle_file(audio_prompt_url) else: audio_prompt = None # This is the KEY change result = client.predict( text_input=text_input, audio_prompt_path_input=audio_prompt, exaggeration_input=exaggeration_input, temperature_input=temperature_input, seed_num_input=seed_num_input, cfgw_input=cfgw_input, api_name="/generate_tts_audio" ) print("API Result:", result) return result except Exception as e: print(f"An error occurred: {e}") return None # Create the Gradio interface interface = gr.Interface( fn=generate_tts_audio, inputs=[ gr.Textbox(label="Text to Synthesize", placeholder="Enter your text here..."), gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference Audio File (Optional)"), gr.Slider(minimum=0, maximum=1, value=0.5, label="Exaggeration"), gr.Slider(minimum=0, maximum=1, value=0.8, label="Temperature"), gr.Number(label="Seed Number", value=0), gr.Slider(minimum=0, maximum=1, value=0.5, label="CFG/Pace") ], outputs=gr.Audio(label="Generated Audio") ) # Launch the Gradio interface if __name__ == "__main__": interface.launch()