ai_agent_course_final_project

Sleeping

keynes42 commited on May 20

Commit

4cb1cfd

verified ·

1 Parent(s): f0e1fa8

Update app.py

Update the use of HF_TOKEN when loading the weights of Llama-3-8B-Instruct model.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,15 +12,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @spaces.GPU
 def load_llm():
     model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-    tok = AutoTokenizer.from_pretrained(model_id)
     mod = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
-        device_map="auto"  # auto-distributes to GPU
     )
     return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)

 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+hf_token = os.environ["HF_TOKEN"]
 @spaces.GPU
 def load_llm():
     model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    tok = AutoTokenizer.from_pretrained(model_id, token=hf_token)
     mod = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
+        device_map="auto",  # auto-distributes to GPU
+        token=hf_token
     )
     return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)