Spaces:

JeffMII
/

CEC-Learning

Running

Jeff Myers II commited on 13 days ago

Commit

32b2ab9

1 Parent(s): 7ba0657

Attempting to enable 8-bit quantization

Files changed (2) hide show

Gemma.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import pipeline
 from huggingface_hub import login
 import spaces
 import torch
@@ -8,13 +8,13 @@ import os
 __export__ = ["GemmaLLM"]
 class GemmaLLM:
     def __init__(self):
         login(token=os.environ.get("GEMMA_TOKEN"))
         model_id = "google/gemma-3-4b-it"
-        self.model = pipeline("text-generation", model=model_id, torch_dtype=torch.bfloat16, device="cuda")
     @spaces.GPU
     def generate(self, message) -> str:

+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login
 import spaces
 import torch
 __export__ = ["GemmaLLM"]
 class GemmaLLM:
     def __init__(self):
         login(token=os.environ.get("GEMMA_TOKEN"))
         model_id = "google/gemma-3-4b-it"
+        model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, pad_token_id=0)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.model = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device="auto")
     @spaces.GPU
     def generate(self, message) -> str:

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ newspaper3k==0.2.8
 transformers==4.50.0
 lxml_html_clean==0.4.1
 accelerate==1.5.2
 spaces

 transformers==4.50.0
 lxml_html_clean==0.4.1
 accelerate==1.5.2
+bitsandbytes==0.45.3
 spaces