keynes42 commited on
Commit
4cb1cfd
·
verified ·
1 Parent(s): f0e1fa8

Update app.py

Browse files

Update the use of HF_TOKEN when loading the weights of Llama-3-8B-Instruct model.

Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -12,15 +12,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
12
  # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
15
 
16
  @spaces.GPU
17
  def load_llm():
18
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
19
- tok = AutoTokenizer.from_pretrained(model_id)
20
  mod = AutoModelForCausalLM.from_pretrained(
21
  model_id,
22
  torch_dtype=torch.float16,
23
- device_map="auto" # auto-distributes to GPU
 
24
  )
25
  return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
26
 
 
12
  # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+ hf_token = os.environ["HF_TOKEN"]
16
 
17
  @spaces.GPU
18
  def load_llm():
19
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
20
+ tok = AutoTokenizer.from_pretrained(model_id, token=hf_token)
21
  mod = AutoModelForCausalLM.from_pretrained(
22
  model_id,
23
  torch_dtype=torch.float16,
24
+ device_map="auto", # auto-distributes to GPU
25
+ token=hf_token
26
  )
27
  return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
28