Spaces:

imnim
/

Multi-labelEmailClassifier

Sleeping

App Files Files Community

imnim commited on 20 days ago

Commit

011aa0f

verified ·

1 Parent(s): b9b5220

Updated with 16 bit instead of 32 bit params

Browse files

Files changed (1) hide show

app.py +34 -21

app.py CHANGED Viewed

@@ -4,10 +4,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel, PeftConfig
 from fastapi.middleware.cors import CORSMiddleware
 import torch
 app = FastAPI()
-# Allow CORS for all origins (adjust this in production)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -16,44 +17,56 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Path to your HF Hub repo with full model + adapter
 adapter_path = "imnim/multi-label-email-classifier"
 try:
-    # Load PEFT config to get base model path
-    peft_config = PeftConfig.from_pretrained(adapter_path, use_auth_token=True)
-    # Load base model and tokenizer with HF auth token
-    base_model = AutoModelForCausalLM.from_pretrained(
-        peft_config.base_model_name_or_path,
-        torch_dtype=torch.bfloat16,
-        device_map={"": "cpu"},
-        use_auth_token=True
-    )
     tokenizer = AutoTokenizer.from_pretrained(
         peft_config.base_model_name_or_path,
-        use_auth_token=True
     )
-    # Load adapter with HF auth token
     model = PeftModel.from_pretrained(
-        base_model, adapter_path,
-        device_map={"": "cpu"},
-        use_auth_token=True
     )
-    # Setup text-generation pipeline
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
 except Exception as e:
     raise RuntimeError(f"❌ Failed to load model + adapter: {str(e)}")
-# Request schema
 class EmailInput(BaseModel):
     subject: str
     body: str
-# POST /classify endpoint
 @app.post("/classify")
 async def classify_email(data: EmailInput):
     prompt = f"""### Subject:\n{data.subject}\n\n### Body:\n{data.body}\n\n### Labels:"""

 from peft import PeftModel, PeftConfig
 from fastapi.middleware.cors import CORSMiddleware
 import torch
+import os
 app = FastAPI()
+# Allow CORS (customize in production)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Hugging Face access token (from env)
+hf_token = os.getenv("HF_TOKEN")
+# HF model repo (includes adapter + full model)
 adapter_path = "imnim/multi-label-email-classifier"
 try:
+    # Load PEFT adapter config
+    peft_config = PeftConfig.from_pretrained(adapter_path, token=hf_token)
+    # Try loading in bfloat16, fallback to float32
+    try:
+        base_model = AutoModelForCausalLM.from_pretrained(
+            peft_config.base_model_name_or_path,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            token=hf_token
+        )
+    except Exception:
+        base_model = AutoModelForCausalLM.from_pretrained(
+            peft_config.base_model_name_or_path,
+            torch_dtype=torch.float32,
+            device_map="auto",
+            token=hf_token
+        )
     tokenizer = AutoTokenizer.from_pretrained(
         peft_config.base_model_name_or_path,
+        token=hf_token
     )
+    # Load the adapter
     model = PeftModel.from_pretrained(
+        base_model,
+        adapter_path,
+        token=hf_token
     )
+    # Create the pipeline — no device argument (handled by accelerate)
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 except Exception as e:
     raise RuntimeError(f"❌ Failed to load model + adapter: {str(e)}")
+# === Request Schema ===
 class EmailInput(BaseModel):
     subject: str
     body: str
+# === Endpoint ===
 @app.post("/classify")
 async def classify_email(data: EmailInput):
     prompt = f"""### Subject:\n{data.subject}\n\n### Body:\n{data.body}\n\n### Labels:"""