File size: 5,977 Bytes
411901c
ecce878
411901c
 
 
 
 
 
0d46b61
 
dffc0e7
411901c
 
 
dffc0e7
 
 
411901c
 
 
 
 
 
 
 
dffc0e7
411901c
 
 
 
 
 
 
 
 
 
 
dffc0e7
411901c
 
dffc0e7
665b958
411901c
 
 
 
 
 
 
 
 
 
 
dffc0e7
665b958
411901c
 
dffc0e7
 
 
 
 
 
411901c
 
 
 
 
 
dffc0e7
411901c
 
 
 
 
665b958
 
dffc0e7
411901c
665b958
 
 
 
 
 
 
 
 
 
411901c
665b958
 
 
 
 
411901c
665b958
411901c
 
dffc0e7
665b958
411901c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dffc0e7
411901c
 
 
dffc0e7
411901c
 
665b958
411901c
 
 
 
 
dffc0e7
411901c
 
 
 
 
665b958
411901c
 
 
 
665b958
411901c
 
665b958
411901c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665b958
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
import time
from huggingface_hub import HfApi

# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Environment Variables
HF_TOKEN = os.environ.get("HF_TOKEN")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}

# Emotion descriptions
emotion_options = {
    "neutral": "Neutral or balanced mood",
    "positive": "Generally positive or optimistic",
    "happy": "Feeling joy or happiness",
    "excited": "Feeling enthusiastic or energetic",
    "sad": "Feeling down or unhappy",
    "angry": "Feeling frustrated or irritated",
    "negative": "Generally negative or pessimistic",
    "anxious": "Feeling worried or nervous"
}

# History
conversation_history = []

# Transcribe audio

def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            transcription = recognizer.recognize_google(audio_data)
            return transcription
    except Exception as e:
        logger.error(f"Audio transcription failed: {e}")
        return ""

# Generate Groq response

def get_groq_response(prompt, history):
    messages = [{"role": "system", "content": prompt}]
    for msg in history:
        if msg.startswith("User: "):
            messages.append({"role": "user", "content": msg[6:]})
        elif msg.startswith("AI: "):
            messages.append({"role": "assistant", "content": msg[4:]})
    payload = {
        "model": GROQ_MODEL,
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 1024
    }
    try:
        response = requests.post(GROQ_API_URL, headers=headers, json=payload)
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        logger.error(f"Groq API error: {e}")
        return "Error contacting AI."

# Generate TTS using Yarngpt

def generate_speech_and_upload(text):
    try:
        hf_model_id = "saheedniyi/Yarngpt"
        inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}"
        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        payload = {"inputs": text}

        response = requests.post(inference_url, headers=headers, json=payload)
        if response.status_code != 200:
            logger.error(f"Hugging Face TTS API error: {response.text}")
            return None

        temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
        with open(temp_file.name, "wb") as f:
            f.write(response.content)

        return temp_file.name

    except Exception as e:
        logger.error(f"Hugging Face TTS error: {e}")
        return None

# Main handler

def chat_with_ai(audio, text_input, emotion, history):
    global conversation_history
    user_text = text_input or ""
    if audio:
        transcription = transcribe_audio(audio)
        if transcription:
            user_text = transcription
        else:
            return "Couldn't understand the audio.", None, history
    if not user_text.strip():
        return "No input provided.", None, history

    conversation_history.append(f"User: {user_text}")
    recent_messages = conversation_history[-20:]

    prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively."
    ai_response = get_groq_response(prompt, recent_messages)
    conversation_history.append(f"AI: {ai_response}")

    audio_path = generate_speech_and_upload(ai_response)
    return ai_response, audio_path, history + [[user_text, ai_response]]


def clear_conversation():
    global conversation_history
    conversation_history = []
    return [], None, None, "Conversation cleared."

# Gradio UI
iface = gr.Blocks()

with iface:
    gr.Markdown("# Mind AID AI Assistant")
    gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.")

    with gr.Row():
        with gr.Column(scale=3):
            emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral")
            emotion_description = gr.Markdown("**Current mood:** Neutral")

            def update_emotion_desc(em):
                return f"**Current mood:** {emotion_options.get(em, 'Unknown')}"

            emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description])
        with gr.Column(scale=1):
            clear_btn = gr.Button("Clear Conversation")
            status = gr.Textbox(label="Status")

    chat_history = gr.Chatbot(label="Chat History", height=300)

    with gr.Row():
        text_input = gr.Textbox(label="Type your message", lines=2)
        audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"])

    output_audio = gr.Audio(label="AI Voice Response")
    submit_btn = gr.Button("Send", variant="primary")

    submit_btn.click(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status, output_audio, chat_history]
    )

    text_input.submit(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status, output_audio, chat_history]
    )

    clear_btn.click(
        fn=clear_conversation,
        inputs=[],
        outputs=[chat_history, audio_input, text_input, status]
    )

iface.launch()


Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this.