Update app.py
Browse filesUpdate to load model weights to local machine.
app.py
CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
|
|
6 |
import smolagents
|
7 |
from smolagents import CodeAgent, HfApiModel
|
8 |
from huggingface_hub import InferenceClient, hf_hub_download
|
@@ -11,6 +13,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
|
|
11 |
# --- Constants ---
|
12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# --- Basic Agent Definition ---
|
15 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
16 |
class BasicAgent:
|
@@ -28,22 +41,25 @@ class BasicAgent:
|
|
28 |
# print(client.text_generation("Hello, my name is", max_new_tokens=20))
|
29 |
|
30 |
# Initialize the model
|
31 |
-
model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
# Initialize the tools other than the base tools
|
39 |
# See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
|
40 |
|
41 |
# Initialize the agent
|
42 |
-
self.
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
47 |
|
48 |
def __call__(self, question: str) -> str:
|
49 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
import torch, spaces
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
8 |
import smolagents
|
9 |
from smolagents import CodeAgent, HfApiModel
|
10 |
from huggingface_hub import InferenceClient, hf_hub_download
|
|
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
15 |
|
16 |
+
@spaces.GPU
|
17 |
+
def load_llm():
|
18 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
19 |
+
tok = AutoTokenizer.from_pretrained(model_id)
|
20 |
+
mod = AutoModelForCausalLM.from_pretrained(
|
21 |
+
model_id,
|
22 |
+
torch_dtype=torch.float16,
|
23 |
+
device_map="auto" # auto-distributes to GPU
|
24 |
+
)
|
25 |
+
return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
|
26 |
+
|
27 |
# --- Basic Agent Definition ---
|
28 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
29 |
class BasicAgent:
|
|
|
41 |
# print(client.text_generation("Hello, my name is", max_new_tokens=20))
|
42 |
|
43 |
# Initialize the model
|
44 |
+
# model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
|
45 |
+
# # format="text-generation",
|
46 |
+
# token=os.environ["HF_TOKEN"],
|
47 |
+
# max_tokens=2048,
|
48 |
+
# temperature=0.0
|
49 |
+
# )
|
50 |
|
51 |
# Initialize the tools other than the base tools
|
52 |
# See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
|
53 |
|
54 |
# Initialize the agent
|
55 |
+
self.pipe = load_llm()
|
56 |
+
self.agent = CodeAgent(llm=self.pipe, tools=[], add_base_tools=True)
|
57 |
+
|
58 |
+
# self.agent = CodeAgent(
|
59 |
+
# model=model,
|
60 |
+
# tools=[],
|
61 |
+
# add_base_tools=True
|
62 |
+
# )
|
63 |
|
64 |
def __call__(self, question: str) -> str:
|
65 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|