fawad-mahdi / app.py
fawadm's picture
Update app.py
d6aeeda verified
import gradio as gr
from gtts import gTTS
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import tempfile
import nltk
import speech_recognition as sr
from pydub import AudioSegment
# Ensure nltk tokenizer is available
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
# Load sentiment models
simple_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
llm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
llm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
llm_pipe = pipeline("sentiment-analysis", model=llm_model, tokenizer=llm_tokenizer)
def transcribe_audio(audio_path):
recognizer = sr.Recognizer()
sound = AudioSegment.from_file(audio_path)
sound.export("temp.wav", format="wav")
with sr.AudioFile("temp.wav") as source:
audio = recognizer.record(source)
try:
return recognizer.recognize_google(audio)
except sr.UnknownValueError:
return ""
except sr.RequestError:
return ""
def classify_sentiment(text, model_choice):
if not text.strip():
return "🤔 Could not understand audio.", None
result = (
simple_model(text)[0] if model_choice == "Simple Model"
else llm_pipe(text)[0]
)
label = result['label']
mood = {
"POSITIVE": "😊 Happy / Positive",
"NEGATIVE": "😞 Sad / Negative",
"NEUTRAL": "😐 Neutral"
}.get(label.upper(), label)
tts = gTTS(text=mood)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
tts.save(tmp.name)
return mood, tmp.name
def predict(audio_file, model_choice):
if audio_file is None:
return "No input", None
text = transcribe_audio(audio_file)
return classify_sentiment(text, model_choice)
with gr.Blocks() as demo:
model_choice = gr.Dropdown(["Simple Model", "LLM Model"], value="Simple Model", label="Choose Model")
gr.Markdown("## 🎙️ Mood from Voice (via Upload)")
audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Your Voice")
output_text = gr.Textbox(label="Predicted Mood")
output_audio = gr.Audio(label="Spoken Mood")
audio_input.change(predict, inputs=[audio_input, model_choice], outputs=[output_text, output_audio])
demo.launch()