Spaces:

case-llm-traversaal
/

fawad-mahdi

Runtime error

File size: 2,494 Bytes

ffbc069
8f78710
 
 
 
087b1c7
d6aeeda
 
087b1c7
d6aeeda
087b1c7
 
 
8f78710
ffbc069
d6aeeda
8f78710
 
 
 
 
d6aeeda
 
 
 
 
 
 
 
 
 
 
 
 
 
8f78710
ffbc069
d6aeeda
8f78710
d6aeeda
 
 
 
8f78710
 
 
 
 
 
 
 
 
 
 
d6aeeda
8f78710
d6aeeda
 
 
 
 
8f78710
 
d6aeeda
 
 
8f78710
 
d6aeeda
8f78710

import gradio as gr
from gtts import gTTS
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import tempfile
import nltk
import speech_recognition as sr
from pydub import AudioSegment

# Ensure nltk tokenizer is available
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

# Load sentiment models
simple_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
llm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
llm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
llm_pipe = pipeline("sentiment-analysis", model=llm_model, tokenizer=llm_tokenizer)

def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    sound = AudioSegment.from_file(audio_path)
    sound.export("temp.wav", format="wav")

    with sr.AudioFile("temp.wav") as source:
        audio = recognizer.record(source)
        try:
            return recognizer.recognize_google(audio)
        except sr.UnknownValueError:
            return ""
        except sr.RequestError:
            return ""

def classify_sentiment(text, model_choice):
    if not text.strip():
        return "🤔 Could not understand audio.", None

    result = (
        simple_model(text)[0] if model_choice == "Simple Model"
        else llm_pipe(text)[0]
    )

    label = result['label']
    mood = {
        "POSITIVE": "😊 Happy / Positive",
        "NEGATIVE": "😞 Sad / Negative",
        "NEUTRAL": "😐 Neutral"
    }.get(label.upper(), label)

    tts = gTTS(text=mood)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        tts.save(tmp.name)
        return mood, tmp.name

def predict(audio_file, model_choice):
    if audio_file is None:
        return "No input", None
    text = transcribe_audio(audio_file)
    return classify_sentiment(text, model_choice)

with gr.Blocks() as demo:
    model_choice = gr.Dropdown(["Simple Model", "LLM Model"], value="Simple Model", label="Choose Model")
    gr.Markdown("## 🎙️ Mood from Voice (via Upload)")
    audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Your Voice")
    output_text = gr.Textbox(label="Predicted Mood")
    output_audio = gr.Audio(label="Spoken Mood")
    audio_input.change(predict, inputs=[audio_input, model_choice], outputs=[output_text, output_audio])

demo.launch()