Spaces:

Fluospark128
/

Mind_Aid

Build error

File size: 5,977 Bytes

import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
import time
from huggingface_hub import HfApi

# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Environment Variables
HF_TOKEN = os.environ.get("HF_TOKEN")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}

# Emotion descriptions
emotion_options = {
    "neutral": "Neutral or balanced mood",
    "positive": "Generally positive or optimistic",
    "happy": "Feeling joy or happiness",
    "excited": "Feeling enthusiastic or energetic",
    "sad": "Feeling down or unhappy",
    "angry": "Feeling frustrated or irritated",
    "negative": "Generally negative or pessimistic",
    "anxious": "Feeling worried or nervous"
}

# History
conversation_history = []

# Transcribe audio

def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            transcription = recognizer.recognize_google(audio_data)
            return transcription
    except Exception as e:
        logger.error(f"Audio transcription failed: {e}")
        return ""

# Generate Groq response

def get_groq_response(prompt, history):
    messages = [{"role": "system", "content": prompt}]
    for msg in history:
        if msg.startswith("User: "):
            messages.append({"role": "user", "content": msg[6:]})
        elif msg.startswith("AI: "):
            messages.append({"role": "assistant", "content": msg[4:]})
    payload = {
        "model": GROQ_MODEL,
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 1024
    }
    try:
        response = requests.post(GROQ_API_URL, headers=headers, json=payload)
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        logger.error(f"Groq API error: {e}")
        return "Error contacting AI."

# Generate TTS using Yarngpt

def generate_speech_and_upload(text):
    try:
        hf_model_id = "saheedniyi/Yarngpt"
        inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}"
        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        payload = {"inputs": text}

        response = requests.post(inference_url, headers=headers, json=payload)
        if response.status_code != 200:
            logger.error(f"Hugging Face TTS API error: {response.text}")
            return None

        temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
        with open(temp_file.name, "wb") as f:
            f.write(response.content)

        return temp_file.name

    except Exception as e:
        logger.error(f"Hugging Face TTS error: {e}")
        return None

# Main handler

def chat_with_ai(audio, text_input, emotion, history):
    global conversation_history
    user_text = text_input or ""
    if audio:
        transcription = transcribe_audio(audio)
        if transcription:
            user_text = transcription
        else:
            return "Couldn't understand the audio.", None, history
    if not user_text.strip():
        return "No input provided.", None, history

    conversation_history.append(f"User: {user_text}")
    recent_messages = conversation_history[-20:]

    prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively."
    ai_response = get_groq_response(prompt, recent_messages)
    conversation_history.append(f"AI: {ai_response}")

    audio_path = generate_speech_and_upload(ai_response)
    return ai_response, audio_path, history + [[user_text, ai_response]]


def clear_conversation():
    global conversation_history
    conversation_history = []
    return [], None, None, "Conversation cleared."

# Gradio UI
iface = gr.Blocks()

with iface:
    gr.Markdown("# Mind AID AI Assistant")
    gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.")

    with gr.Row():
        with gr.Column(scale=3):
            emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral")
            emotion_description = gr.Markdown("**Current mood:** Neutral")

            def update_emotion_desc(em):
                return f"**Current mood:** {emotion_options.get(em, 'Unknown')}"

            emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description])
        with gr.Column(scale=1):
            clear_btn = gr.Button("Clear Conversation")
            status = gr.Textbox(label="Status")

    chat_history = gr.Chatbot(label="Chat History", height=300)

    with gr.Row():
        text_input = gr.Textbox(label="Type your message", lines=2)
        audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"])

    output_audio = gr.Audio(label="AI Voice Response")
    submit_btn = gr.Button("Send", variant="primary")

    submit_btn.click(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status, output_audio, chat_history]
    )

    text_input.submit(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status, output_audio, chat_history]
    )

    clear_btn.click(
        fn=clear_conversation,
        inputs=[],
        outputs=[chat_history, audio_input, text_input, status]
    )

iface.launch()


Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this.