Update app.py
Browse filesUpdate the use of HF_TOKEN when loading the weights of Llama-3-8B-Instruct model.
app.py
CHANGED
@@ -12,15 +12,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
|
|
12 |
# (Keep Constants as is)
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
15 |
|
16 |
@spaces.GPU
|
17 |
def load_llm():
|
18 |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
19 |
-
tok = AutoTokenizer.from_pretrained(model_id)
|
20 |
mod = AutoModelForCausalLM.from_pretrained(
|
21 |
model_id,
|
22 |
torch_dtype=torch.float16,
|
23 |
-
device_map="auto" # auto-distributes to GPU
|
|
|
24 |
)
|
25 |
return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
|
26 |
|
|
|
12 |
# (Keep Constants as is)
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
15 |
+
hf_token = os.environ["HF_TOKEN"]
|
16 |
|
17 |
@spaces.GPU
|
18 |
def load_llm():
|
19 |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
20 |
+
tok = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
21 |
mod = AutoModelForCausalLM.from_pretrained(
|
22 |
model_id,
|
23 |
torch_dtype=torch.float16,
|
24 |
+
device_map="auto", # auto-distributes to GPU
|
25 |
+
token=hf_token
|
26 |
)
|
27 |
return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
|
28 |
|