import gradio as gr
from gradio_client import Client, handle_file
import os

# Get Hugging Face token from environment variable
hf_token = os.getenv("your_huggingface_token")  # Make sure this is set in your environment

# Initialize the Gradio client with the token
client = Client("ResembleAI/Chatterbox", hf_token)

# Define the function to call the model and return the generated audio file
def generate_tts_audio(text_input, audio_prompt_url, exaggeration_input, temperature_input, seed_num_input, cfgw_input):
    try:
        print("sending request")
        # Only process the audio prompt if the field is not empty
        if audio_prompt_url and audio_prompt_url.strip():
            audio_prompt = handle_file(audio_prompt_url)
        else:
            audio_prompt = None  # This is the KEY change

        result = client.predict(
            text_input=text_input,
            audio_prompt_path_input=audio_prompt,
            exaggeration_input=exaggeration_input,
            temperature_input=temperature_input,
            seed_num_input=seed_num_input,
            cfgw_input=cfgw_input,
            api_name="/generate_tts_audio"
        )
        print("API Result:", result)
        return result
            
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# Create the Gradio interface
interface = gr.Interface(
    fn=generate_tts_audio,
    inputs=[
        gr.Textbox(label="Text to Synthesize", placeholder="Enter your text here..."),
        gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference Audio File (Optional)"),
        gr.Slider(minimum=0, maximum=1, value=0.5, label="Exaggeration"),
        gr.Slider(minimum=0, maximum=1, value=0.8, label="Temperature"),
        gr.Number(label="Seed Number", value=0),
        gr.Slider(minimum=0, maximum=1, value=0.5, label="CFG/Pace")
    ],
    outputs=gr.Audio(label="Generated Audio")
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()