naa142 commited on
Commit
90b9df2
Β·
verified Β·
1 Parent(s): 2891ca3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -34
app.py CHANGED
@@ -5,13 +5,12 @@ import torch
5
  import torch.nn as nn
6
  from transformers import AutoTokenizer, AutoModel
7
 
8
- # βœ… Set device
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
- # βœ… Load tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained("final_deberta_model")
13
 
14
- # βœ… Define Scoring Model
15
  class ScoringModel(nn.Module):
16
  def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
17
  super().__init__()
@@ -29,50 +28,46 @@ class ScoringModel(nn.Module):
29
  self.classifier(self.dropout3(hidden))) / 3
30
  return logits
31
 
32
- # βœ… Load model
33
  model = ScoringModel().to(device)
34
- model.load_state_dict(torch.load("final_deberta_model/scoring_model.pt", map_location=device))
35
  model.eval()
36
 
37
- # βœ… Streamlit App
38
- st.set_page_config(page_title="LLM Response Quality Evaluator", page_icon="πŸ€–")
 
39
 
40
- st.title("πŸ€– LLM Fine-Tuned Response Evaluator")
41
- st.markdown("""
42
- Welcome to the LLM Response Evaluator App!
43
- Enter a prompt and two model responses β€” the system will predict which one is **better** based on fine-tuning results.
44
- """)
45
 
46
- # βœ… User Inputs
47
- prompt = st.text_area("Enter your prompt here:")
48
- response_a = st.text_area("Response A:")
49
- response_b = st.text_area("Response B:")
50
-
51
- if st.button("Predict Better Response"):
52
  if prompt and response_a and response_b:
53
- with torch.no_grad():
54
- text_a = f"Prompt: {prompt} [SEP] {response_a}"
55
- text_b = f"Prompt: {prompt} [SEP] {response_b}"
56
 
57
- encoded_a = tokenizer(text_a, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
58
- encoded_b = tokenizer(text_b, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
59
 
60
- # βœ… Fix: only pass input_ids and attention_mask
61
- encoded_a = {k: v.to(device) for k, v in encoded_a.items() if k in ["input_ids", "attention_mask"]}
62
- encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
63
 
 
64
  score_a = model(**encoded_a).squeeze()
65
  score_b = model(**encoded_b).squeeze()
66
 
67
- prob_a = torch.sigmoid(score_a).item()
68
- prob_b = torch.sigmoid(score_b).item()
 
 
 
 
 
69
 
70
- if prob_b > prob_a:
71
- st.success(f"βœ… Response B is better! (Score B: {prob_b:.4f} vs Score A: {prob_a:.4f})")
72
- else:
73
- st.success(f"βœ… Response A is better! (Score A: {prob_a:.4f} vs Score B: {prob_b:.4f})")
74
  else:
75
- st.warning("⚠️ Please fill all fields (prompt, response A, response B)!")
76
 
77
 
78
 
 
5
  import torch.nn as nn
6
  from transformers import AutoTokenizer, AutoModel
7
 
 
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
 
10
+ # βœ… 1. Load the tokenizer from current directory
11
+ tokenizer = AutoTokenizer.from_pretrained(".", local_files_only=True)
12
 
13
+ # βœ… 2. Define your ScoringModel
14
  class ScoringModel(nn.Module):
15
  def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
16
  super().__init__()
 
28
  self.classifier(self.dropout3(hidden))) / 3
29
  return logits
30
 
31
+ # βœ… 3. Instantiate model and load weights
32
  model = ScoringModel().to(device)
33
+ model.load_state_dict(torch.load("scoring_model.pt", map_location=device))
34
  model.eval()
35
 
36
+ # βœ… 4. Streamlit App
37
+ st.set_page_config(page_title="🧠 Response Evaluator", page_icon="πŸš€", layout="centered")
38
+ st.title("πŸš€ Response Quality Predictor")
39
 
40
+ prompt = st.text_area("Enter the prompt", height=150)
41
+ response_a = st.text_area("Response A", height=100)
42
+ response_b = st.text_area("Response B", height=100)
 
 
43
 
44
+ if st.button("Evaluate Responses"):
 
 
 
 
 
45
  if prompt and response_a and response_b:
46
+ text_a = f"Prompt: {prompt} [SEP] {response_a}"
47
+ text_b = f"Prompt: {prompt} [SEP] {response_b}"
 
48
 
49
+ encoded_a = tokenizer(text_a, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
50
+ encoded_b = tokenizer(text_b, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
51
 
52
+ encoded_a = {k: v.to(device) for k, v in encoded_a.items() if k in ["input_ids", "attention_mask"]}
53
+ encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
 
54
 
55
+ with torch.no_grad():
56
  score_a = model(**encoded_a).squeeze()
57
  score_b = model(**encoded_b).squeeze()
58
 
59
+ prob_a = torch.sigmoid(score_a).item()
60
+ prob_b = torch.sigmoid(score_b).item()
61
+
62
+ if prob_b > prob_a:
63
+ st.success(f"βœ… Model predicts: **Response B** is better! (Confidence: {prob_b:.4f})")
64
+ else:
65
+ st.success(f"βœ… Model predicts: **Response A** is better! (Confidence: {prob_a:.4f})")
66
 
67
+ st.metric("Probability A", f"{prob_a:.4f}")
68
+ st.metric("Probability B", f"{prob_b:.4f}")
 
 
69
  else:
70
+ st.warning("⚠️ Please fill in all the fields first!")
71
 
72
 
73