Spaces:

naa142
/

llmfinetune

Sleeping

App Files Files Community

naa142 commited on Apr 27

Commit

90b9df2

verified ·

1 Parent(s): 2891ca3

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -34

app.py CHANGED Viewed

@@ -5,13 +5,12 @@ import torch
 import torch.nn as nn
 from transformers import AutoTokenizer, AutoModel
-# ✅ Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ✅ Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained("final_deberta_model")
-# ✅ Define Scoring Model
 class ScoringModel(nn.Module):
     def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
         super().__init__()
@@ -29,50 +28,46 @@ class ScoringModel(nn.Module):
                   self.classifier(self.dropout3(hidden))) / 3
         return logits
-# ✅ Load model
 model = ScoringModel().to(device)
-model.load_state_dict(torch.load("final_deberta_model/scoring_model.pt", map_location=device))
 model.eval()
-# ✅ Streamlit App
-st.set_page_config(page_title="LLM Response Quality Evaluator", page_icon="🤖")
-st.title("🤖 LLM Fine-Tuned Response Evaluator")
-st.markdown("""
-Welcome to the LLM Response Evaluator App!
-Enter a prompt and two model responses — the system will predict which one is **better** based on fine-tuning results.
-""")
-# ✅ User Inputs
-prompt = st.text_area("Enter your prompt here:")
-response_a = st.text_area("Response A:")
-response_b = st.text_area("Response B:")
-if st.button("Predict Better Response"):
     if prompt and response_a and response_b:
-        with torch.no_grad():
-            text_a = f"Prompt: {prompt} [SEP] {response_a}"
-            text_b = f"Prompt: {prompt} [SEP] {response_b}"
-            encoded_a = tokenizer(text_a, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
-            encoded_b = tokenizer(text_b, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
-            # ✅ Fix: only pass input_ids and attention_mask
-            encoded_a = {k: v.to(device) for k, v in encoded_a.items() if k in ["input_ids", "attention_mask"]}
-            encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
             score_a = model(**encoded_a).squeeze()
             score_b = model(**encoded_b).squeeze()
-            prob_a = torch.sigmoid(score_a).item()
-            prob_b = torch.sigmoid(score_b).item()
-            if prob_b > prob_a:
-                st.success(f"✅ Response B is better! (Score B: {prob_b:.4f} vs Score A: {prob_a:.4f})")
-            else:
-                st.success(f"✅ Response A is better! (Score A: {prob_a:.4f} vs Score B: {prob_b:.4f})")
     else:
-        st.warning("⚠️ Please fill all fields (prompt, response A, response B)!")

 import torch.nn as nn
 from transformers import AutoTokenizer, AutoModel
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ✅ 1. Load the tokenizer from current directory
+tokenizer = AutoTokenizer.from_pretrained(".", local_files_only=True)
+# ✅ 2. Define your ScoringModel
 class ScoringModel(nn.Module):
     def __init__(self, base_model_name="microsoft/deberta-v3-small", dropout_rate=0.242):
         super().__init__()
                   self.classifier(self.dropout3(hidden))) / 3
         return logits
+# ✅ 3. Instantiate model and load weights
 model = ScoringModel().to(device)
+model.load_state_dict(torch.load("scoring_model.pt", map_location=device))
 model.eval()
+# ✅ 4. Streamlit App
+st.set_page_config(page_title="🧠 Response Evaluator", page_icon="🚀", layout="centered")
+st.title("🚀 Response Quality Predictor")
+prompt = st.text_area("Enter the prompt", height=150)
+response_a = st.text_area("Response A", height=100)
+response_b = st.text_area("Response B", height=100)
+if st.button("Evaluate Responses"):
     if prompt and response_a and response_b:
+        text_a = f"Prompt: {prompt} [SEP] {response_a}"
+        text_b = f"Prompt: {prompt} [SEP] {response_b}"
+        encoded_a = tokenizer(text_a, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
+        encoded_b = tokenizer(text_b, return_tensors='pt', padding='max_length', truncation=True, max_length=186)
+        encoded_a = {k: v.to(device) for k, v in encoded_a.items() if k in ["input_ids", "attention_mask"]}
+        encoded_b = {k: v.to(device) for k, v in encoded_b.items() if k in ["input_ids", "attention_mask"]}
+        with torch.no_grad():
             score_a = model(**encoded_a).squeeze()
             score_b = model(**encoded_b).squeeze()
+        prob_a = torch.sigmoid(score_a).item()
+        prob_b = torch.sigmoid(score_b).item()
+        if prob_b > prob_a:
+            st.success(f"✅ Model predicts: **Response B** is better! (Confidence: {prob_b:.4f})")
+        else:
+            st.success(f"✅ Model predicts: **Response A** is better! (Confidence: {prob_a:.4f})")
+        st.metric("Probability A", f"{prob_a:.4f}")
+        st.metric("Probability B", f"{prob_b:.4f}")
     else:
+        st.warning("⚠️ Please fill in all the fields first!")