Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,9 @@ from pydantic import BaseModel
|
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
import os
|
6 |
|
7 |
-
#
|
8 |
-
|
|
|
9 |
|
10 |
app = FastAPI()
|
11 |
|
@@ -18,19 +19,19 @@ app.add_middleware(
|
|
18 |
allow_headers=["*"]
|
19 |
)
|
20 |
|
21 |
-
# Model name
|
22 |
model_name = "ethzanalytics/RedPajama-INCITE-7B-Base-sharded-bf16"
|
23 |
|
24 |
# Load tokenizer
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
26 |
|
27 |
-
# Load model with offload folder
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
model_name,
|
30 |
torch_dtype="bfloat16",
|
31 |
device_map="auto",
|
32 |
low_cpu_mem_usage=True,
|
33 |
-
offload_folder=
|
34 |
)
|
35 |
|
36 |
# Request body schema
|
|
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
import os
|
6 |
|
7 |
+
# Use a writable folder for offloading weights (Hugging Face Spaces restricts /app)
|
8 |
+
offload_dir = "/tmp/offload"
|
9 |
+
os.makedirs(offload_dir, exist_ok=True)
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
|
|
19 |
allow_headers=["*"]
|
20 |
)
|
21 |
|
22 |
+
# Model name (7B model - large, will offload to /tmp)
|
23 |
model_name = "ethzanalytics/RedPajama-INCITE-7B-Base-sharded-bf16"
|
24 |
|
25 |
# Load tokenizer
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
27 |
|
28 |
+
# Load model with /tmp offload folder
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
model_name,
|
31 |
torch_dtype="bfloat16",
|
32 |
device_map="auto",
|
33 |
low_cpu_mem_usage=True,
|
34 |
+
offload_folder=offload_dir
|
35 |
)
|
36 |
|
37 |
# Request body schema
|