File size: 7,367 Bytes
81bc0f3
 
 
9c1e852
81bc0f3
 
6d5ef45
 
e011761
 
81bc0f3
 
9c1e852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81bc0f3
9c1e852
81bc0f3
 
 
 
 
56d0b10
 
81bc0f3
9c1e852
b48f4c6
56d0b10
fdf347a
81bc0f3
 
 
 
56d0b10
 
9c1e852
 
 
 
 
 
 
56d0b10
 
9c1e852
 
 
 
 
 
 
 
 
56d0b10
9c1e852
 
 
 
 
 
 
 
 
56d0b10
9c1e852
 
 
81bc0f3
56d0b10
 
 
9c1e852
56d0b10
 
 
9c1e852
56d0b10
81bc0f3
9c1e852
 
 
56d0b10
 
 
9c1e852
56d0b10
 
 
 
81bc0f3
56d0b10
81bc0f3
9c1e852
 
 
 
 
81bc0f3
9c1e852
 
 
56d0b10
 
 
 
 
 
 
 
 
 
 
 
9c1e852
 
 
 
56d0b10
 
 
 
 
 
 
 
 
 
 
 
 
 
466d120
c9900d7
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import whisper
import gradio as gr
import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from app.questions import get_question

# Load models
whisper_model = whisper.load_model("small")
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/final_confidence')
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/final_confidence')
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')
emotion_pipe = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=1)

# Emotion map with labels and emojis
interview_emotion_map = {
    "joy": ("Confident", "๐Ÿ™‚"),
    "fear": ("Nervous", "๐Ÿ˜จ"),
    "sadness": ("Uncertain", "๐Ÿ™"),
    "anger": ("Frustrated", "๐Ÿ˜ "),
    "surprise": ("Curious", "๐Ÿ˜ฎ"),
    "neutral": ("Calm", "๐Ÿ˜"),
    "disgust": ("Disengaged", "๐Ÿ˜’"),
}

# Static question sets
technical_questions = [get_question(i) for i in range(6)]
non_technical_questions = [
    "Tell me about yourself.",
    "What are your strengths and weaknesses?",
    "Where do you see yourself in 5 years?",
    "How do you handle stress or pressure?",
    "Describe a time you faced a conflict and how you resolved it.",
    "What motivates you to do your best?"
]

# Index trackers
current_tech_index = 0
current_non_tech_index = 0

# Relevance prediction
def predict_relevance(question, answer):
    if not answer.strip():
        return "Irrelevant"
    inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
    context_model.eval()
    with torch.no_grad():
        outputs = context_model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=-1)
        return "Relevant" if probabilities[0, 1] > 0.5 else "Irrelevant"

# Confidence prediction
def predict_confidence(question, answer, threshold=0.4):
    if not isinstance(answer, str) or not answer.strip():
        return "Not Confident"
    inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
    confidence_model.eval()
    with torch.no_grad():
        outputs = confidence_model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=-1)
        return "Confident" if probabilities[0, 1].item() > threshold else "Not Confident"

# Emotion detection
def detect_emotion(answer):
    if not answer.strip():
        return "No Answer", ""
    result = emotion_pipe(answer)
    label = result[0][0]["label"].lower()
    emotion_text, emoji = interview_emotion_map.get(label, ("Unknown", "โ“"))
    return emotion_text, emoji

# Question navigation (non-tech)
def show_non_tech_question():
    global current_non_tech_index
    return non_technical_questions[current_non_tech_index]

def next_non_tech_question():
    global current_non_tech_index
    current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions)
    return non_technical_questions[current_non_tech_index], None, "", ""

# Question navigation (tech)
def show_tech_question():
    global current_tech_index
    return technical_questions[current_tech_index]

def next_tech_question():
    global current_tech_index
    current_tech_index = (current_tech_index + 1) % len(technical_questions)
    return technical_questions[current_tech_index], None, "", "", ""

# Transcribe + analyze (non-technical)
def transcribe_and_analyze_non_tech(audio, question):
    try:
        audio = whisper.load_audio(audio)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
        result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
        transcribed_text = result.text
        emotion_text, emoji = detect_emotion(transcribed_text)
        return transcribed_text, f"{emotion_text} {emoji}"
    except Exception as e:
        return f"Error: {str(e)}", "โ“"

# Transcribe + analyze (technical)
def transcribe_and_analyze_tech(audio, question):
    try:
        audio = whisper.load_audio(audio)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
        result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
        transcribed_text = result.text
        context_result = predict_relevance(question, transcribed_text)
        confidence_result = predict_confidence(question, transcribed_text)
        return transcribed_text, context_result, confidence_result
    except Exception as e:
        return f"Error: {str(e)}", "", ""

# UI layout
with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo:
    gr.HTML("<h1 style='text-align: center; font-size: 32px;'>INTERVIEW PREPARATION MODEL</h1>")

    with gr.Tabs():
        
        # NON-TECHNICAL TAB
        with gr.Tab("Non-Technical"):
            gr.Markdown("### Emotional Context Analysis (๐Ÿง  + ๐Ÿ˜Š)")
            question_display_1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False)
            audio_input_1 = gr.Audio(type="filepath", label="Record Your Answer")
            transcribed_text_1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
            emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)

            audio_input_1.change(fn=transcribe_and_analyze_non_tech,
                                 inputs=[audio_input_1, question_display_1],
                                 outputs=[transcribed_text_1, emotion_output])

            next_button_1 = gr.Button("Next Question")
            next_button_1.click(fn=next_non_tech_question,
                                outputs=[question_display_1, audio_input_1, transcribed_text_1, emotion_output])

        # TECHNICAL TAB
        with gr.Tab("Technical"):
            gr.Markdown("### Technical Question Analysis (๐ŸŽ“ + ๐Ÿค–)")
            question_display_2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False)
            audio_input_2 = gr.Audio(type="filepath", label="Record Your Answer")
            transcribed_text_2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
            context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False)
            confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False)

            audio_input_2.change(fn=transcribe_and_analyze_tech,
                                 inputs=[audio_input_2, question_display_2],
                                 outputs=[transcribed_text_2, context_analysis_result, confidence_analysis_result])

            next_button_2 = gr.Button("Next Question")
            next_button_2.click(fn=next_tech_question,
                                outputs=[question_display_2, audio_input_2, transcribed_text_2,
                                         context_analysis_result, confidence_analysis_result])

if __name__ == "__main__":
    import os
    
    # Force disable API mode specifically for Hugging Face Spaces
    if "SPACE_ID" in os.environ:
        demo.launch(share=False, show_api=False, api_mode=False)
    else:
        demo.launch(share=True, show_api=False)