RiteshAkhade commited on
Commit
466d120
Β·
verified Β·
1 Parent(s): 24a1975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -54
app.py CHANGED
@@ -1,7 +1,3 @@
1
- import os
2
- os.system("pip install --upgrade openai-whisper torch")
3
- os.system("pip install --upgrade transformers")
4
-
5
  import whisper
6
  import gradio as gr
7
  import torch
@@ -50,19 +46,19 @@ def predict_relevance(question, answer):
50
  context_model.eval()
51
  with torch.no_grad():
52
  outputs = context_model(**inputs)
53
- probabilities = torch.softmax(outputs.logits, dim=-1)
54
- return "Relevant" if probabilities[0, 1] > 0.5 else "Irrelevant"
55
 
56
  # Confidence prediction
57
  def predict_confidence(question, answer, threshold=0.4):
58
- if not isinstance(answer, str) or not answer.strip():
59
  return "Not Confident"
60
  inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
61
  confidence_model.eval()
62
  with torch.no_grad():
63
  outputs = confidence_model(**inputs)
64
- probabilities = torch.softmax(outputs.logits, dim=-1)
65
- return "Confident" if probabilities[0, 1].item() > threshold else "Not Confident"
66
 
67
  # Emotion detection
68
  def detect_emotion(answer):
@@ -70,8 +66,7 @@ def detect_emotion(answer):
70
  return "No Answer", ""
71
  result = emotion_pipe(answer)
72
  label = result[0][0]["label"].lower()
73
- emotion_text, emoji = interview_emotion_map.get(label, ("Unknown", "❓"))
74
- return emotion_text, emoji
75
 
76
  # Question navigation (non-tech)
77
  def show_non_tech_question():
@@ -81,6 +76,7 @@ def show_non_tech_question():
81
  def next_non_tech_question():
82
  global current_non_tech_index
83
  current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions)
 
84
  return non_technical_questions[current_non_tech_index], "", ""
85
 
86
  # Question navigation (tech)
@@ -91,34 +87,33 @@ def show_tech_question():
91
  def next_tech_question():
92
  global current_tech_index
93
  current_tech_index = (current_tech_index + 1) % len(technical_questions)
 
94
  return technical_questions[current_tech_index], "", "", ""
95
 
96
  # Transcribe + analyze (non-technical)
97
  def transcribe_and_analyze_non_tech(audio, question):
98
  try:
99
- audio = whisper.load_audio(audio)
100
- audio = whisper.pad_or_trim(audio)
101
- mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
102
  result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
103
- transcribed_text = result.text
104
- emotion_text, emoji = detect_emotion(transcribed_text)
105
- return transcribed_text, f"{emotion_text} {emoji}"
106
  except Exception as e:
107
- return f"Error: {str(e)}", "❓"
108
 
109
  # Transcribe + analyze (technical)
110
  def transcribe_and_analyze_tech(audio, question):
111
  try:
112
- audio = whisper.load_audio(audio)
113
- audio = whisper.pad_or_trim(audio)
114
- mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
115
  result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
116
- transcribed_text = result.text
117
- context_result = predict_relevance(question, transcribed_text)
118
- confidence_result = predict_confidence(question, transcribed_text)
119
- return transcribed_text, context_result, confidence_result
120
  except Exception as e:
121
- return f"Error: {str(e)}", "", ""
122
 
123
  # UI layout
124
  with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo:
@@ -129,35 +124,44 @@ with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo:
129
  # NON-TECHNICAL TAB
130
  with gr.Tab("Non-Technical"):
131
  gr.Markdown("### Emotional Context Analysis (🧠 + 😊)")
132
- question_display_1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False)
133
- audio_input_1 = gr.Audio(type="filepath", label="Record Your Answer")
134
- transcribed_text_1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
135
- emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
136
-
137
- audio_input_1.change(fn=transcribe_and_analyze_non_tech,
138
- inputs=[audio_input_1, question_display_1],
139
- outputs=[transcribed_text_1, emotion_output])
140
-
141
- next_button_1 = gr.Button("Next Question")
142
- next_button_1.click(fn=next_non_tech_question,
143
- outputs=[question_display_1, audio_input_1, transcribed_text_1, emotion_output])
 
 
 
 
 
144
 
145
  # TECHNICAL TAB
146
  with gr.Tab("Technical"):
147
  gr.Markdown("### Technical Question Analysis (πŸŽ“ + πŸ€–)")
148
- question_display_2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False)
149
- audio_input_2 = gr.Audio(type="filepath", label="Record Your Answer")
150
- transcribed_text_2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
151
- context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False)
152
- confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False)
153
-
154
- audio_input_2.change(fn=transcribe_and_analyze_tech,
155
- inputs=[audio_input_2, question_display_2],
156
- outputs=[transcribed_text_2, context_analysis_result, confidence_analysis_result])
157
-
158
- next_button_2 = gr.Button("Next Question")
159
- next_button_2.click(fn=next_tech_question,
160
- outputs=[question_display_2, audio_input_2, transcribed_text_2,
161
- context_analysis_result, confidence_analysis_result])
162
-
163
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
1
  import whisper
2
  import gradio as gr
3
  import torch
 
46
  context_model.eval()
47
  with torch.no_grad():
48
  outputs = context_model(**inputs)
49
+ probs = torch.softmax(outputs.logits, dim=-1)
50
+ return "Relevant" if probs[0, 1] > 0.5 else "Irrelevant"
51
 
52
  # Confidence prediction
53
  def predict_confidence(question, answer, threshold=0.4):
54
+ if not answer.strip():
55
  return "Not Confident"
56
  inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
57
  confidence_model.eval()
58
  with torch.no_grad():
59
  outputs = confidence_model(**inputs)
60
+ probs = torch.softmax(outputs.logits, dim=-1)
61
+ return "Confident" if probs[0, 1].item() > threshold else "Not Confident"
62
 
63
  # Emotion detection
64
  def detect_emotion(answer):
 
66
  return "No Answer", ""
67
  result = emotion_pipe(answer)
68
  label = result[0][0]["label"].lower()
69
+ return interview_emotion_map.get(label, ("Unknown", "❓"))
 
70
 
71
  # Question navigation (non-tech)
72
  def show_non_tech_question():
 
76
  def next_non_tech_question():
77
  global current_non_tech_index
78
  current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions)
79
+ # return: question, cleared transcribed_text, cleared emotion
80
  return non_technical_questions[current_non_tech_index], "", ""
81
 
82
  # Question navigation (tech)
 
87
  def next_tech_question():
88
  global current_tech_index
89
  current_tech_index = (current_tech_index + 1) % len(technical_questions)
90
+ # return: question, cleared transcribed_text, cleared context, cleared confidence
91
  return technical_questions[current_tech_index], "", "", ""
92
 
93
  # Transcribe + analyze (non-technical)
94
  def transcribe_and_analyze_non_tech(audio, question):
95
  try:
96
+ audio_data = whisper.load_audio(audio)
97
+ audio_data = whisper.pad_or_trim(audio_data)
98
+ mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device)
99
  result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
100
+ text = result.text
101
+ emotion_text, emoji = detect_emotion(text)
102
+ return text, f"{emotion_text} {emoji}"
103
  except Exception as e:
104
+ return f"Error: {e}", "❓"
105
 
106
  # Transcribe + analyze (technical)
107
  def transcribe_and_analyze_tech(audio, question):
108
  try:
109
+ audio_data = whisper.load_audio(audio)
110
+ audio_data = whisper.pad_or_trim(audio_data)
111
+ mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device)
112
  result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
113
+ text = result.text
114
+ return text, predict_relevance(question, text), predict_confidence(question, text)
 
 
115
  except Exception as e:
116
+ return f"Error: {e}", "", ""
117
 
118
  # UI layout
119
  with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo:
 
124
  # NON-TECHNICAL TAB
125
  with gr.Tab("Non-Technical"):
126
  gr.Markdown("### Emotional Context Analysis (🧠 + 😊)")
127
+ q1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False)
128
+ a1 = gr.Audio(type="filepath", label="Record Your Answer")
129
+ t1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
130
+ e1 = gr.Textbox(label="Detected Emotion", interactive=False)
131
+
132
+ a1.change(
133
+ fn=transcribe_and_analyze_non_tech,
134
+ inputs=[a1, q1],
135
+ outputs=[t1, e1]
136
+ )
137
+
138
+ btn1 = gr.Button("Next Question")
139
+ btn1.click(
140
+ fn=next_non_tech_question,
141
+ inputs=[],
142
+ outputs=[q1, t1, e1]
143
+ )
144
 
145
  # TECHNICAL TAB
146
  with gr.Tab("Technical"):
147
  gr.Markdown("### Technical Question Analysis (πŸŽ“ + πŸ€–)")
148
+ q2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False)
149
+ a2 = gr.Audio(type="filepath", label="Record Your Answer")
150
+ t2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
151
+ c2 = gr.Textbox(label="Context Analysis", interactive=False)
152
+ f2 = gr.Textbox(label="Confidence Analysis", interactive=False)
153
+
154
+ a2.change(
155
+ fn=transcribe_and_analyze_tech,
156
+ inputs=[a2, q2],
157
+ outputs=[t2, c2, f2]
158
+ )
159
+
160
+ btn2 = gr.Button("Next Question")
161
+ btn2.click(
162
+ fn=next_tech_question,
163
+ inputs=[],
164
+ outputs=[q2, t2, c2, f2]
165
+ )
166
+
167
+ demo.launch(share=True)