Spaces:
Build error
Build error
File size: 5,977 Bytes
411901c ecce878 411901c 0d46b61 dffc0e7 411901c dffc0e7 411901c dffc0e7 411901c dffc0e7 411901c dffc0e7 665b958 411901c dffc0e7 665b958 411901c dffc0e7 411901c dffc0e7 411901c 665b958 dffc0e7 411901c 665b958 411901c 665b958 411901c 665b958 411901c dffc0e7 665b958 411901c dffc0e7 411901c dffc0e7 411901c 665b958 411901c dffc0e7 411901c 665b958 411901c 665b958 411901c 665b958 411901c 665b958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
import time
from huggingface_hub import HfApi
# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Environment Variables
HF_TOKEN = os.environ.get("HF_TOKEN")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
# Emotion descriptions
emotion_options = {
"neutral": "Neutral or balanced mood",
"positive": "Generally positive or optimistic",
"happy": "Feeling joy or happiness",
"excited": "Feeling enthusiastic or energetic",
"sad": "Feeling down or unhappy",
"angry": "Feeling frustrated or irritated",
"negative": "Generally negative or pessimistic",
"anxious": "Feeling worried or nervous"
}
# History
conversation_history = []
# Transcribe audio
def transcribe_audio(audio_path):
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
transcription = recognizer.recognize_google(audio_data)
return transcription
except Exception as e:
logger.error(f"Audio transcription failed: {e}")
return ""
# Generate Groq response
def get_groq_response(prompt, history):
messages = [{"role": "system", "content": prompt}]
for msg in history:
if msg.startswith("User: "):
messages.append({"role": "user", "content": msg[6:]})
elif msg.startswith("AI: "):
messages.append({"role": "assistant", "content": msg[4:]})
payload = {
"model": GROQ_MODEL,
"messages": messages,
"temperature": 0.7,
"max_tokens": 1024
}
try:
response = requests.post(GROQ_API_URL, headers=headers, json=payload)
return response.json()["choices"][0]["message"]["content"]
except Exception as e:
logger.error(f"Groq API error: {e}")
return "Error contacting AI."
# Generate TTS using Yarngpt
def generate_speech_and_upload(text):
try:
hf_model_id = "saheedniyi/Yarngpt"
inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
payload = {"inputs": text}
response = requests.post(inference_url, headers=headers, json=payload)
if response.status_code != 200:
logger.error(f"Hugging Face TTS API error: {response.text}")
return None
temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
with open(temp_file.name, "wb") as f:
f.write(response.content)
return temp_file.name
except Exception as e:
logger.error(f"Hugging Face TTS error: {e}")
return None
# Main handler
def chat_with_ai(audio, text_input, emotion, history):
global conversation_history
user_text = text_input or ""
if audio:
transcription = transcribe_audio(audio)
if transcription:
user_text = transcription
else:
return "Couldn't understand the audio.", None, history
if not user_text.strip():
return "No input provided.", None, history
conversation_history.append(f"User: {user_text}")
recent_messages = conversation_history[-20:]
prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively."
ai_response = get_groq_response(prompt, recent_messages)
conversation_history.append(f"AI: {ai_response}")
audio_path = generate_speech_and_upload(ai_response)
return ai_response, audio_path, history + [[user_text, ai_response]]
def clear_conversation():
global conversation_history
conversation_history = []
return [], None, None, "Conversation cleared."
# Gradio UI
iface = gr.Blocks()
with iface:
gr.Markdown("# Mind AID AI Assistant")
gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.")
with gr.Row():
with gr.Column(scale=3):
emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral")
emotion_description = gr.Markdown("**Current mood:** Neutral")
def update_emotion_desc(em):
return f"**Current mood:** {emotion_options.get(em, 'Unknown')}"
emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description])
with gr.Column(scale=1):
clear_btn = gr.Button("Clear Conversation")
status = gr.Textbox(label="Status")
chat_history = gr.Chatbot(label="Chat History", height=300)
with gr.Row():
text_input = gr.Textbox(label="Type your message", lines=2)
audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"])
output_audio = gr.Audio(label="AI Voice Response")
submit_btn = gr.Button("Send", variant="primary")
submit_btn.click(
fn=chat_with_ai,
inputs=[audio_input, text_input, emotion, chat_history],
outputs=[status, output_audio, chat_history]
)
text_input.submit(
fn=chat_with_ai,
inputs=[audio_input, text_input, emotion, chat_history],
outputs=[status, output_audio, chat_history]
)
clear_btn.click(
fn=clear_conversation,
inputs=[],
outputs=[chat_history, audio_input, text_input, status]
)
iface.launch()
Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this.
|