Spaces:
Runtime error
Runtime error
import gradio as gr | |
from gtts import gTTS | |
import torch | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
import tempfile | |
import nltk | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
# Ensure nltk tokenizer is available | |
try: | |
nltk.data.find('tokenizers/punkt') | |
except LookupError: | |
nltk.download('punkt') | |
# Load sentiment models | |
simple_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
llm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") | |
llm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") | |
llm_pipe = pipeline("sentiment-analysis", model=llm_model, tokenizer=llm_tokenizer) | |
def transcribe_audio(audio_path): | |
recognizer = sr.Recognizer() | |
sound = AudioSegment.from_file(audio_path) | |
sound.export("temp.wav", format="wav") | |
with sr.AudioFile("temp.wav") as source: | |
audio = recognizer.record(source) | |
try: | |
return recognizer.recognize_google(audio) | |
except sr.UnknownValueError: | |
return "" | |
except sr.RequestError: | |
return "" | |
def classify_sentiment(text, model_choice): | |
if not text.strip(): | |
return "🤔 Could not understand audio.", None | |
result = ( | |
simple_model(text)[0] if model_choice == "Simple Model" | |
else llm_pipe(text)[0] | |
) | |
label = result['label'] | |
mood = { | |
"POSITIVE": "😊 Happy / Positive", | |
"NEGATIVE": "😞 Sad / Negative", | |
"NEUTRAL": "😐 Neutral" | |
}.get(label.upper(), label) | |
tts = gTTS(text=mood) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: | |
tts.save(tmp.name) | |
return mood, tmp.name | |
def predict(audio_file, model_choice): | |
if audio_file is None: | |
return "No input", None | |
text = transcribe_audio(audio_file) | |
return classify_sentiment(text, model_choice) | |
with gr.Blocks() as demo: | |
model_choice = gr.Dropdown(["Simple Model", "LLM Model"], value="Simple Model", label="Choose Model") | |
gr.Markdown("## 🎙️ Mood from Voice (via Upload)") | |
audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Your Voice") | |
output_text = gr.Textbox(label="Predicted Mood") | |
output_audio = gr.Audio(label="Spoken Mood") | |
audio_input.change(predict, inputs=[audio_input, model_choice], outputs=[output_text, output_audio]) | |
demo.launch() |