sarthak501 commited on
Commit
ec50ee8
·
verified ·
1 Parent(s): e0f91c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -4,8 +4,9 @@ from pydantic import BaseModel
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import os
6
 
7
- # Create a folder for offloading weights if needed
8
- os.makedirs("offload", exist_ok=True)
 
9
 
10
  app = FastAPI()
11
 
@@ -18,19 +19,19 @@ app.add_middleware(
18
  allow_headers=["*"]
19
  )
20
 
21
- # Model name
22
  model_name = "ethzanalytics/RedPajama-INCITE-7B-Base-sharded-bf16"
23
 
24
  # Load tokenizer
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
 
27
- # Load model with offload folder
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_name,
30
  torch_dtype="bfloat16",
31
  device_map="auto",
32
  low_cpu_mem_usage=True,
33
- offload_folder="offload" # Required after HF update
34
  )
35
 
36
  # Request body schema
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import os
6
 
7
+ # Use a writable folder for offloading weights (Hugging Face Spaces restricts /app)
8
+ offload_dir = "/tmp/offload"
9
+ os.makedirs(offload_dir, exist_ok=True)
10
 
11
  app = FastAPI()
12
 
 
19
  allow_headers=["*"]
20
  )
21
 
22
+ # Model name (7B model - large, will offload to /tmp)
23
  model_name = "ethzanalytics/RedPajama-INCITE-7B-Base-sharded-bf16"
24
 
25
  # Load tokenizer
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
 
28
+ # Load model with /tmp offload folder
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_name,
31
  torch_dtype="bfloat16",
32
  device_map="auto",
33
  low_cpu_mem_usage=True,
34
+ offload_folder=offload_dir
35
  )
36
 
37
  # Request body schema