Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,12 @@ import torch
|
|
5 |
import torch.nn as nn
|
6 |
from transformers import AutoTokenizer, AutoModel
|
7 |
|
8 |
-
# β
Set device
|
9 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
|
11 |
-
# β
Load tokenizer
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
13 |
|
14 |
-
# β
Define
|
15 |
class ScoringModel(nn.Module):
|
16 |
def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
|
17 |
super().__init__()
|
@@ -29,50 +28,46 @@ class ScoringModel(nn.Module):
|
|
29 |
self.classifier(self.dropout3(hidden))) / 3
|
30 |
return logits
|
31 |
|
32 |
-
# β
|
33 |
model = ScoringModel().to(device)
|
34 |
-
model.load_state_dict(torch.load("
|
35 |
model.eval()
|
36 |
|
37 |
-
# β
Streamlit App
|
38 |
-
st.set_page_config(page_title="
|
|
|
39 |
|
40 |
-
st.
|
41 |
-
st.
|
42 |
-
|
43 |
-
Enter a prompt and two model responses β the system will predict which one is **better** based on fine-tuning results.
|
44 |
-
""")
|
45 |
|
46 |
-
|
47 |
-
prompt = st.text_area("Enter your prompt here:")
|
48 |
-
response_a = st.text_area("Response A:")
|
49 |
-
response_b = st.text_area("Response B:")
|
50 |
-
|
51 |
-
if st.button("Predict Better Response"):
|
52 |
if prompt and response_a and response_b:
|
53 |
-
|
54 |
-
|
55 |
-
text_b = f"Prompt: {prompt} [SEP] {response_b}"
|
56 |
|
57 |
-
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
|
63 |
|
|
|
64 |
score_a = model(**encoded_a).squeeze()
|
65 |
score_b = model(**encoded_b).squeeze()
|
66 |
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
else:
|
73 |
-
st.success(f"β
Response A is better! (Score A: {prob_a:.4f} vs Score B: {prob_b:.4f})")
|
74 |
else:
|
75 |
-
st.warning("β οΈ Please fill all fields
|
76 |
|
77 |
|
78 |
|
|
|
5 |
import torch.nn as nn
|
6 |
from transformers import AutoTokenizer, AutoModel
|
7 |
|
|
|
8 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
|
10 |
+
# β
1. Load the tokenizer from current directory
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(".", local_files_only=True)
|
12 |
|
13 |
+
# β
2. Define your ScoringModel
|
14 |
class ScoringModel(nn.Module):
|
15 |
def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
|
16 |
super().__init__()
|
|
|
28 |
self.classifier(self.dropout3(hidden))) / 3
|
29 |
return logits
|
30 |
|
31 |
+
# β
3. Instantiate model and load weights
|
32 |
model = ScoringModel().to(device)
|
33 |
+
model.load_state_dict(torch.load("scoring_model.pt", map_location=device))
|
34 |
model.eval()
|
35 |
|
36 |
+
# β
4. Streamlit App
|
37 |
+
st.set_page_config(page_title="π§ Response Evaluator", page_icon="π", layout="centered")
|
38 |
+
st.title("π Response Quality Predictor")
|
39 |
|
40 |
+
prompt = st.text_area("Enter the prompt", height=150)
|
41 |
+
response_a = st.text_area("Response A", height=100)
|
42 |
+
response_b = st.text_area("Response B", height=100)
|
|
|
|
|
43 |
|
44 |
+
if st.button("Evaluate Responses"):
|
|
|
|
|
|
|
|
|
|
|
45 |
if prompt and response_a and response_b:
|
46 |
+
text_a = f"Prompt: {prompt} [SEP] {response_a}"
|
47 |
+
text_b = f"Prompt: {prompt} [SEP] {response_b}"
|
|
|
48 |
|
49 |
+
encoded_a = tokenizer(text_a, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
|
50 |
+
encoded_b = tokenizer(text_b, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
|
51 |
|
52 |
+
encoded_a = {k: v.to(device) for k, v in encoded_a.items() if k in ["input_ids", "attention_mask"]}
|
53 |
+
encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
|
|
|
54 |
|
55 |
+
with torch.no_grad():
|
56 |
score_a = model(**encoded_a).squeeze()
|
57 |
score_b = model(**encoded_b).squeeze()
|
58 |
|
59 |
+
prob_a = torch.sigmoid(score_a).item()
|
60 |
+
prob_b = torch.sigmoid(score_b).item()
|
61 |
+
|
62 |
+
if prob_b > prob_a:
|
63 |
+
st.success(f"β
Model predicts: **Response B** is better! (Confidence: {prob_b:.4f})")
|
64 |
+
else:
|
65 |
+
st.success(f"β
Model predicts: **Response A** is better! (Confidence: {prob_a:.4f})")
|
66 |
|
67 |
+
st.metric("Probability A", f"{prob_a:.4f}")
|
68 |
+
st.metric("Probability B", f"{prob_b:.4f}")
|
|
|
|
|
69 |
else:
|
70 |
+
st.warning("β οΈ Please fill in all the fields first!")
|
71 |
|
72 |
|
73 |
|