keynes42 commited on
Commit
f0e1fa8
·
verified ·
1 Parent(s): 80c9841

Update app.py

Browse files

Update to load model weights to local machine.

Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
  import smolagents
7
  from smolagents import CodeAgent, HfApiModel
8
  from huggingface_hub import InferenceClient, hf_hub_download
@@ -11,6 +13,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  # --- Basic Agent Definition ---
15
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
  class BasicAgent:
@@ -28,22 +41,25 @@ class BasicAgent:
28
  # print(client.text_generation("Hello, my name is", max_new_tokens=20))
29
 
30
  # Initialize the model
31
- model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
32
- # format="text-generation",
33
- token=os.environ["HF_TOKEN"],
34
- max_tokens=2048,
35
- temperature=0.0
36
- )
37
 
38
  # Initialize the tools other than the base tools
39
  # See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
40
 
41
  # Initialize the agent
42
- self.agent = CodeAgent(
43
- model=model,
44
- tools=[],
45
- add_base_tools=True
46
- )
 
 
 
47
 
48
  def __call__(self, question: str) -> str:
49
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import torch, spaces
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
8
  import smolagents
9
  from smolagents import CodeAgent, HfApiModel
10
  from huggingface_hub import InferenceClient, hf_hub_download
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ @spaces.GPU
17
+ def load_llm():
18
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
19
+ tok = AutoTokenizer.from_pretrained(model_id)
20
+ mod = AutoModelForCausalLM.from_pretrained(
21
+ model_id,
22
+ torch_dtype=torch.float16,
23
+ device_map="auto" # auto-distributes to GPU
24
+ )
25
+ return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
26
+
27
  # --- Basic Agent Definition ---
28
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
29
  class BasicAgent:
 
41
  # print(client.text_generation("Hello, my name is", max_new_tokens=20))
42
 
43
  # Initialize the model
44
+ # model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
45
+ # # format="text-generation",
46
+ # token=os.environ["HF_TOKEN"],
47
+ # max_tokens=2048,
48
+ # temperature=0.0
49
+ # )
50
 
51
  # Initialize the tools other than the base tools
52
  # See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
53
 
54
  # Initialize the agent
55
+ self.pipe = load_llm()
56
+ self.agent = CodeAgent(llm=self.pipe, tools=[], add_base_tools=True)
57
+
58
+ # self.agent = CodeAgent(
59
+ # model=model,
60
+ # tools=[],
61
+ # add_base_tools=True
62
+ # )
63
 
64
  def __call__(self, question: str) -> str:
65
  print(f"Agent received question (first 50 chars): {question[:50]}...")