Spaces:

yeswanthvarma
/

answer-evaluation-app

Sleeping

App Files Files Community

yeswanthvarma commited on Jun 26

Commit

cd37260

verified ·

1 Parent(s): ff7d72e

Update utils/xlnet_model.py

Browse files

Files changed (1) hide show

utils/xlnet_model.py +25 -45

utils/xlnet_model.py CHANGED Viewed

@@ -1,41 +1,34 @@
 import os
 import torch
-import numpy as np
 from torch import nn
 from transformers import XLNetModel, XLNetTokenizer
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
 from huggingface_hub import hf_hub_download
-from torch.nn.functional import cosine_similarity
 # Set Hugging Face cache directory
 os.environ["HF_HOME"] = "/tmp/huggingface"
-# Download model weights from Hugging Face Hub
 MODEL_PATH = hf_hub_download(
     repo_id="yeswanthvarma/xlnet-evaluator-model",
     filename="xlnet_answer_assessment_model.pt"
 )
-# Define your custom model
 class XLNetAnswerAssessmentModel(nn.Module):
     def __init__(self):
         super().__init__()
         self.xlnet = XLNetModel.from_pretrained("xlnet-base-cased")
-        hidden = 768
-        self.fc1 = nn.Linear(hidden, 256)
         self.fc2 = nn.Linear(256, 64)
-        self.output = nn.Linear(64, 1)  # ← Change from `self.out` to `self.output`
     def forward(self, input_ids, attention_mask=None):
-        pooled = self.xlnet(input_ids, attention_mask).last_hidden_state.mean(1)
         x = torch.relu(self.fc1(pooled))
         x = torch.relu(self.fc2(x))
-        return torch.sigmoid(self.output(x))  # ← And change here too
-# Initialize model and tokenizer
 xlnet_available = False
 try:
     tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
@@ -45,50 +38,37 @@ try:
     xlnet_available = True
     print("✅ Custom XLNet model loaded.")
 except Exception as e:
-    print("⚠️  Could not load XLNet model → fallback to TF‑IDF\n", e)
 # -------------------------------
-# Scoring logic
 # -------------------------------
 def get_model_prediction(q, s, r):
     if not xlnet_available:
-        raise ValueError("XLNet unavailable")
     combined = f"{q} [SEP] {s} [SEP] {r}"
     inputs = tokenizer(combined, return_tensors="pt", truncation=True, max_length=512, padding=True)
     with torch.no_grad():
-        score = float(model(**inputs).squeeze()) * 100
-    return round(score)
-def tfidf_similarity(t1, t2):
-    vec = TfidfVectorizer()
-    mat = vec.fit_transform([t1, t2])
-    return round(cosine_similarity(mat[0], mat[1])[0][0] * 100)
 def fallback_similarity(t1, t2):
     w1, w2 = set(t1.lower().split()), set(t2.lower().split())
     return round(len(w1 & w2) / len(w1 | w2) * 100) if w1 and w2 else 0
-from torch.nn.functional import cosine_similarity
-def get_similarity_score(question, student, reference):
     try:
-        if not xlnet_available:
-            raise RuntimeError("XLNet not loaded")
-        def encode(text):
-            inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-            with torch.no_grad():
-                output = model.xlnet(**inputs).last_hidden_state.mean(dim=1)
-            return output
-        student_embed = encode(student)
-        reference_embed = encode(reference)
-        sim = cosine_similarity(student_embed, reference_embed).item()
-        score = round((sim + 1) / 2 * 100)  # Normalize [-1, 1] → [0, 100]
-        return score
     except Exception as e:
-        print("❌ Similarity error:", e)
-        return 0

 import os
 import torch
 from torch import nn
 from transformers import XLNetModel, XLNetTokenizer
 from huggingface_hub import hf_hub_download
 # Set Hugging Face cache directory
 os.environ["HF_HOME"] = "/tmp/huggingface"
+# Download trained model weights from Hugging Face Hub
 MODEL_PATH = hf_hub_download(
     repo_id="yeswanthvarma/xlnet-evaluator-model",
     filename="xlnet_answer_assessment_model.pt"
 )
+# Define your trained model architecture
 class XLNetAnswerAssessmentModel(nn.Module):
     def __init__(self):
         super().__init__()
         self.xlnet = XLNetModel.from_pretrained("xlnet-base-cased")
+        self.fc1 = nn.Linear(768, 256)
         self.fc2 = nn.Linear(256, 64)
+        self.output = nn.Linear(64, 1)
     def forward(self, input_ids, attention_mask=None):
+        pooled = self.xlnet(input_ids, attention_mask).last_hidden_state.mean(dim=1)
         x = torch.relu(self.fc1(pooled))
         x = torch.relu(self.fc2(x))
+        return torch.sigmoid(self.output(x))  # Output: score in range [0, 1]
+# Load tokenizer and model
 xlnet_available = False
 try:
     tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
     xlnet_available = True
     print("✅ Custom XLNet model loaded.")
 except Exception as e:
+    print("⚠️  Could not load XLNet model → fallback will be used\n", e)
 # -------------------------------
+# Main prediction function
 # -------------------------------
 def get_model_prediction(q, s, r):
     if not xlnet_available:
+        raise RuntimeError("XLNet model not available")
+    # Combine input text as during training
     combined = f"{q} [SEP] {s} [SEP] {r}"
     inputs = tokenizer(combined, return_tensors="pt", truncation=True, max_length=512, padding=True)
     with torch.no_grad():
+        output = model(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"]
+        )
+        score = output.squeeze().item() * 100  # Convert from [0,1] → [0,100]
+    return round(score)
+# Optional: Fallback similarity using word overlap
 def fallback_similarity(t1, t2):
     w1, w2 = set(t1.lower().split()), set(t2.lower().split())
     return round(len(w1 & w2) / len(w1 | w2) * 100) if w1 and w2 else 0
+# Final score API (use in app.py)
+def get_similarity_score(q, s, r):
     try:
+        return get_model_prediction(q, s, r)
     except Exception as e:
+        print("❌ XLNet failed, using fallback:", e)
+        return fallback_similarity(s, r)