import os import gradio as gr import requests import json import speech_recognition as sr from tempfile import NamedTemporaryFile import logging import time from huggingface_hub import HfApi # Logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Environment Variables HF_TOKEN = os.environ.get("HF_TOKEN") GROQ_API_KEY = os.getenv("GROQ_API_KEY") GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768") GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } # Emotion descriptions emotion_options = { "neutral": "Neutral or balanced mood", "positive": "Generally positive or optimistic", "happy": "Feeling joy or happiness", "excited": "Feeling enthusiastic or energetic", "sad": "Feeling down or unhappy", "angry": "Feeling frustrated or irritated", "negative": "Generally negative or pessimistic", "anxious": "Feeling worried or nervous" } # History conversation_history = [] # Transcribe audio def transcribe_audio(audio_path): recognizer = sr.Recognizer() try: with sr.AudioFile(audio_path) as source: audio_data = recognizer.record(source) transcription = recognizer.recognize_google(audio_data) return transcription except Exception as e: logger.error(f"Audio transcription failed: {e}") return "" # Generate Groq response def get_groq_response(prompt, history): messages = [{"role": "system", "content": prompt}] for msg in history: if msg.startswith("User: "): messages.append({"role": "user", "content": msg[6:]}) elif msg.startswith("AI: "): messages.append({"role": "assistant", "content": msg[4:]}) payload = { "model": GROQ_MODEL, "messages": messages, "temperature": 0.7, "max_tokens": 1024 } try: response = requests.post(GROQ_API_URL, headers=headers, json=payload) return response.json()["choices"][0]["message"]["content"] except Exception as e: logger.error(f"Groq API error: {e}") return "Error contacting AI." # Generate TTS using Yarngpt def generate_speech_and_upload(text): try: hf_model_id = "saheedniyi/Yarngpt" inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}" headers = {"Authorization": f"Bearer {HF_TOKEN}"} payload = {"inputs": text} response = requests.post(inference_url, headers=headers, json=payload) if response.status_code != 200: logger.error(f"Hugging Face TTS API error: {response.text}") return None temp_file = NamedTemporaryFile(delete=False, suffix=".wav") with open(temp_file.name, "wb") as f: f.write(response.content) return temp_file.name except Exception as e: logger.error(f"Hugging Face TTS error: {e}") return None # Main handler def chat_with_ai(audio, text_input, emotion, history): global conversation_history user_text = text_input or "" if audio: transcription = transcribe_audio(audio) if transcription: user_text = transcription else: return "Couldn't understand the audio.", None, history if not user_text.strip(): return "No input provided.", None, history conversation_history.append(f"User: {user_text}") recent_messages = conversation_history[-20:] prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively." ai_response = get_groq_response(prompt, recent_messages) conversation_history.append(f"AI: {ai_response}") audio_path = generate_speech_and_upload(ai_response) return ai_response, audio_path, history + [[user_text, ai_response]] def clear_conversation(): global conversation_history conversation_history = [] return [], None, None, "Conversation cleared." # Gradio UI iface = gr.Blocks() with iface: gr.Markdown("# Mind AID AI Assistant") gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.") with gr.Row(): with gr.Column(scale=3): emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral") emotion_description = gr.Markdown("**Current mood:** Neutral") def update_emotion_desc(em): return f"**Current mood:** {emotion_options.get(em, 'Unknown')}" emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description]) with gr.Column(scale=1): clear_btn = gr.Button("Clear Conversation") status = gr.Textbox(label="Status") chat_history = gr.Chatbot(label="Chat History", height=300) with gr.Row(): text_input = gr.Textbox(label="Type your message", lines=2) audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"]) output_audio = gr.Audio(label="AI Voice Response") submit_btn = gr.Button("Send", variant="primary") submit_btn.click( fn=chat_with_ai, inputs=[audio_input, text_input, emotion, chat_history], outputs=[status, output_audio, chat_history] ) text_input.submit( fn=chat_with_ai, inputs=[audio_input, text_input, emotion, chat_history], outputs=[status, output_audio, chat_history] ) clear_btn.click( fn=clear_conversation, inputs=[], outputs=[chat_history, audio_input, text_input, status] ) iface.launch() Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this.