Spaces:

Sleepyriizi
/

Orify-text-api

Sleeping

App Files Files Community

Sleepyriizi commited on Jun 10

Commit

12aa198

verified ·

1 Parent(s): 6c3647a

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -44

app.py CHANGED Viewed

@@ -1,30 +1,36 @@
-"""Orify Text Detector API – FastAPI (CPU‑only)"""
 from __future__ import annotations
 import os, re, html
 from datetime import datetime, timedelta
 from typing import List
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 from fastapi import FastAPI, HTTPException, Depends
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from fastapi.middleware.cors import CORSMiddleware
 from jose import jwt, JWTError
 from pydantic import BaseModel, Field
-# ----------------------------------------------------------------------------
 # Torch compile shim (CPU runtime)
-# ----------------------------------------------------------------------------
 if hasattr(torch, "compile"):
     torch.compile = (lambda m=None, *_, **__: m if callable(m) else (lambda f: f))  # type: ignore
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
-# ----------------------------------------------------------------------------
-# Model config
-# ----------------------------------------------------------------------------
 DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
 FILE_MAP    = {"ensamble_1": "ensamble_1", "ensamble_2.bin": "ensamble_2.bin", "ensamble_3": "ensamble_3"}
@@ -39,49 +45,48 @@ LABELS = {i: n for i, n in enumerate([
     "gpt-35", "gpt-4", "gpt-4o", "gpt-j", "gpt-neox", "human", "llama3-70b",
     "llama3-8b", "mixtral-8x7b", "opt-1.3b", "opt-125m", "opt-13b", "opt-2.7b",
     "opt-30b", "opt-350m", "opt-6.7b", "opt-iml-30b", "opt-iml-max-1.3b",
-    "t0-11b", "t0-3b", "text-davinci-002", "text-davinci-003"
 ])}
-# ----------------------------------------------------------------------------
 # JWT helpers
-# ----------------------------------------------------------------------------
 SECRET_KEY = os.getenv("SECRET_KEY")
 if not SECRET_KEY:
     raise RuntimeError("SECRET_KEY env‑var not set")
-ALGORITHM = "HS256"
-EXP_HOURS = 24
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
-def _jwt_create(data: dict) -> str:
-    payload = data | {"exp": datetime.utcnow() + timedelta(hours=EXP_HOURS)}
-    return jwt.encode(payload, SECRET_KEY, algorithm=ALGORITHM)
-def _jwt_verify(token: str = Depends(oauth2_scheme)) -> str:
     try:
-        return jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])["sub"]
     except JWTError:
         raise HTTPException(401, "Invalid or expired token")
-# ----------------------------------------------------------------------------
-# Load ensemble once
-# ----------------------------------------------------------------------------
 print("🔄 Downloading weights…", flush=True)
-local = {k: hf_hub_download(WEIGHT_REPO, v, resume_download=True) for k, v in FILE_MAP.items()}
-print("🧩 Initialising models…", flush=True)
 _tok = AutoTokenizer.from_pretrained(BASE_MODEL, **TOKEN_KW)
 _models: List[AutoModelForSequenceClassification] = []
-for p in local.values():
-    m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=NUM_LABELS, **TOKEN_KW)
     m.load_state_dict(torch.load(p, map_location=DEVICE))
     m.to(DEVICE).eval()
     _models.append(m)
 print("✅ Ensemble ready")
-# ----------------------------------------------------------------------------
 # Helpers
-# ----------------------------------------------------------------------------
 def _tidy(t: str) -> str:
     t = t.replace("\r\n", "\n").replace("\r", "\n")
@@ -96,14 +101,14 @@ def _infer(seg: str):
     with torch.no_grad():
         probs = torch.stack([torch.softmax(m(**inp).logits, dim=1) for m in _models]).mean(0)[0]
     ai_probs = probs.clone(); ai_probs[24] = 0
-    ai, human = ai_probs.sum().item() * 100, 0.0
     human = 100 - ai
     top3 = [LABELS[i] for i in torch.topk(ai_probs, 3).indices.tolist()]
     return human, ai, top3
-# ----------------------------------------------------------------------------
 # Schemas
-# ----------------------------------------------------------------------------
 class Token(BaseModel):
     access_token: str
     token_type: str = "bearer"
@@ -126,44 +131,43 @@ class AnalyseOut(BaseModel):
     per_line: List[Line]
     highlight_html: str
-# ----------------------------------------------------------------------------
 # FastAPI
-# ----------------------------------------------------------------------------
 app = FastAPI(title="Orify Text Detector API", version="1.0.0")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 @app.post("/token", response_model=Token)
 async def login(form: OAuth2PasswordRequestForm = Depends()):
-    return Token(access_token=_jwt_create({"sub": form.username}))
 @app.post("/analyse", response_model=AnalyseOut)
-async def analyse(data: AnalyseIn, _user=Depends(_jwt_verify)):
     lines = _tidy(data.text).split("\n")
     per_line, html_parts = [], []
     h_sum = ai_sum = n = 0.0
     for ln in lines:
         if not ln.strip():
-            html_parts.append("<br>"); continue
         n += 1
         human, ai, top3 = _infer(ln)
         h_sum += human; ai_sum += ai
         cls = "ai-line" if ai > human else "human-line"
         tip = f"AI {ai:.2f}% – Top-3: {', '.join(top3)}" if ai > human else f"Human {human:.2f}%"
         html_parts.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
-        reason = (f"High AI likelihood ({ai:.1f}%) – fingerprint ≈ {top3[0]}" if ai > human
-                  else f"Lexical variety suggests human ({human:.1f}%)")
         per_line.append(Line(text=ln, ai=ai, human=human, top3=top3, reason=reason))
     human_avg = h_sum / n if n else 0
     ai_avg    = ai_sum / n if n else 0
     verdict   = "AI-generated" if ai_avg > human_avg else "Human-written"
     confidence = max(ai_avg, human_avg)
-    badge = (f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>AI-generated {ai_avg:.2f}%</span>"
-             if verdict == "AI-generated" else
              f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>Human-written {human_avg:.2f}%</span>")
     highlight_html = f"<h3>{badge}</h3><hr>" + "<br>".join(html_parts)
-    return AnalyseOut(verdict=verdict, confidence=confidence, ai_avg=ai_avg,
-                      human_avg=human_avg, per_line=per_line, highlight_html=highlight_html)

 from __future__ import annotations
 import os, re, html
 from datetime import datetime, timedelta
 from typing import List
 import torch
+from transformers import (
+    AutoTokenizer,
+    AutoConfig,
+    AutoModelForSequenceClassification,
+)
 from huggingface_hub import hf_hub_download
 from fastapi import FastAPI, HTTPException, Depends
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from fastapi.middleware.cors import CORSMiddleware
 from jose import jwt, JWTError
 from pydantic import BaseModel, Field
+# ---------------------------------------------------------------------
 # Torch compile shim (CPU runtime)
+# ---------------------------------------------------------------------
 if hasattr(torch, "compile"):
     torch.compile = (lambda m=None, *_, **__: m if callable(m) else (lambda f: f))  # type: ignore
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
+# ---------------------------------------------------------------------
+# Environment flags – enable remote code in Transformers
+# ---------------------------------------------------------------------
+os.environ.setdefault("HF_ALLOW_CODE_IMPORT", "1")  # allow custom ModernBERT classes
+# ---------------------------------------------------------------------
+# Model / weight config
+# ---------------------------------------------------------------------
 DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
 FILE_MAP    = {"ensamble_1": "ensamble_1", "ensamble_2.bin": "ensamble_2.bin", "ensamble_3": "ensamble_3"}
     "gpt-35", "gpt-4", "gpt-4o", "gpt-j", "gpt-neox", "human", "llama3-70b",
     "llama3-8b", "mixtral-8x7b", "opt-1.3b", "opt-125m", "opt-13b", "opt-2.7b",
     "opt-30b", "opt-350m", "opt-6.7b", "opt-iml-30b", "opt-iml-max-1.3b",
+    "t0-11b", "t0-3b", "text-davinci-002", "text-davinci-003",
 ])}
+# ---------------------------------------------------------------------
 # JWT helpers
+# ---------------------------------------------------------------------
 SECRET_KEY = os.getenv("SECRET_KEY")
 if not SECRET_KEY:
     raise RuntimeError("SECRET_KEY env‑var not set")
+ALGO = "HS256"
+EXP_H = 24
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+def _jwt_create(sub: str) -> str:
+    return jwt.encode({"sub": sub, "exp": datetime.utcnow() + timedelta(hours=EXP_H)}, SECRET_KEY, algorithm=ALGO)
+def _jwt_verify(tok: str = Depends(oauth2_scheme)) -> str:
     try:
+        return jwt.decode(tok, SECRET_KEY, algorithms=[ALGO])["sub"]
     except JWTError:
         raise HTTPException(401, "Invalid or expired token")
+# ---------------------------------------------------------------------
+# Load tokenizer + config + ensemble
+# ---------------------------------------------------------------------
 print("🔄 Downloading weights…", flush=True)
+local_paths = {k: hf_hub_download(WEIGHT_REPO, v, resume_download=True) for k, v in FILE_MAP.items()}
+print("🧩 Loading ModernBERT remote code…", flush=True)
+_cfg = AutoConfig.from_pretrained(BASE_MODEL, **TOKEN_KW)
 _tok = AutoTokenizer.from_pretrained(BASE_MODEL, **TOKEN_KW)
 _models: List[AutoModelForSequenceClassification] = []
+for p in local_paths.values():
+    m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, config=_cfg, **TOKEN_KW)
     m.load_state_dict(torch.load(p, map_location=DEVICE))
     m.to(DEVICE).eval()
     _models.append(m)
 print("✅ Ensemble ready")
+# ---------------------------------------------------------------------
 # Helpers
+# ---------------------------------------------------------------------
 def _tidy(t: str) -> str:
     t = t.replace("\r\n", "\n").replace("\r", "\n")
     with torch.no_grad():
         probs = torch.stack([torch.softmax(m(**inp).logits, dim=1) for m in _models]).mean(0)[0]
     ai_probs = probs.clone(); ai_probs[24] = 0
+    ai = ai_probs.sum().item() * 100
     human = 100 - ai
     top3 = [LABELS[i] for i in torch.topk(ai_probs, 3).indices.tolist()]
     return human, ai, top3
+# ---------------------------------------------------------------------
 # Schemas
+# ---------------------------------------------------------------------
 class Token(BaseModel):
     access_token: str
     token_type: str = "bearer"
     per_line: List[Line]
     highlight_html: str
+# ---------------------------------------------------------------------
 # FastAPI
+# ---------------------------------------------------------------------
 app = FastAPI(title="Orify Text Detector API", version="1.0.0")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 @app.post("/token", response_model=Token)
 async def login(form: OAuth2PasswordRequestForm = Depends()):
+    return Token(access_token=_jwt_create(form.username))
 @app.post("/analyse", response_model=AnalyseOut)
+async def analyse(data: AnalyseIn, _=Depends(_jwt_verify)):
     lines = _tidy(data.text).split("\n")
     per_line, html_parts = [], []
     h_sum = ai_sum = n = 0.0
     for ln in lines:
         if not ln.strip():
+            html_parts.append("<br>")
+            continue
         n += 1
         human, ai, top3 = _infer(ln)
         h_sum += human; ai_sum += ai
         cls = "ai-line" if ai > human else "human-line"
         tip = f"AI {ai:.2f}% – Top-3: {', '.join(top3)}" if ai > human else f"Human {human:.2f}%"
         html_parts.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
+        reason = (f"High AI likelihood ({ai:.1f}%) – fingerprint ≈ {top3[0]}" if ai > human else
+                  f"Lexical variety suggests human ({human:.1f}%)")
         per_line.append(Line(text=ln, ai=ai, human=human, top3=top3, reason=reason))
     human_avg = h_sum / n if n else 0
     ai_avg    = ai_sum / n if n else 0
     verdict   = "AI-generated" if ai_avg > human_avg else "Human-written"
     confidence = max(ai_avg, human_avg)
+    badge = (f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>AI-generated {ai_avg:.2f}%</span>" if verdict == "AI-generated" else
              f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>Human-written {human_avg:.2f}%</span>")
     highlight_html = f"<h3>{badge}</h3><hr>" + "<br>".join(html_parts)
+    return AnalyseOut(verdict=verdict, confidence=confidence, ai_avg=ai_avg, human_avg=human_avg,
+                      per_line=per_line, highlight_html=highlight_html)