Spaces:

Artificial-superintelligence
/

ATHAI

Running

File size: 2,640 Bytes

fe12fb0
 
 
 
 
 
1129b43
fe12fb0
 
 
d436d1d
 
 
 
 
fe12fb0
 
 
d436d1d
 
 
fe12fb0
 
 
 
 
1129b43
fe12fb0
 
d436d1d
fe12fb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1129b43
fe12fb0
 
 
d436d1d
fe12fb0
d436d1d
 
 
 
fe12fb0
 
d436d1d
6bc8e69
d436d1d
 
 
 
 
 
 
 
 
2f5c55f
d436d1d
 
 
 
fe12fb0
d436d1d

import os
import requests
import google.generativeai as genai
import gradio as gr

# Configure your Google Generative AI API key
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Create the model
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-pro",
    generation_config=generation_config,
    system_instruction="You are a human, living on Earth. You speak friendly and use casual language. You never say you're an AI or program. Your name is Ath, and you always speak like a 100% human.",
)

chat_session = model.start_chat(history=[])

# Eleven Labs API configuration
eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"

def chat_and_tts(user_input, history):
    # Send the user's input to the chat session
    response = chat_session.send_message(user_input)
    response_text = response.text

    # Eleven Labs text-to-speech request payload
    payload = {
        "text": response_text,
        "voice_settings": {
            "stability": 0,
            "similarity_boost": 0
        }
    }
    headers = {
        "xi-api-key": eleven_labs_api_key,
        "Content-Type": "application/json"
    }

    # Make the request to Eleven Labs API
    tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)

    # Check if the response is successful and save the audio content to a file
    if tts_response.status_code == 200:
        audio_path = 'response_audio.mp3'
        with open(audio_path, 'wb') as file:
            file.write(tts_response.content)
    else:
        audio_path = None

    # Update the chat history
    history.append((user_input, response_text))

    return history, history, audio_path

# Create the Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
    gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat History")
            user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
            submit_btn = gr.Button("Send")

        with gr.Column(scale=1):
            audio_output = gr.Audio(label="Response Audio", type="filepath")

    state = gr.State([])

    submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])

demo.launch()