ai_agent_course_final_project

Sleeping

App Files Files Community

keynes42 commited on May 20

Commit

f0e1fa8

verified ·

1 Parent(s): 80c9841

Update app.py

Browse files

Update to load model weights to local machine.

Files changed (1) hide show

app.py +27 -11

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 import smolagents
 from smolagents import CodeAgent, HfApiModel
 from huggingface_hub import InferenceClient, hf_hub_download
@@ -11,6 +13,17 @@ from huggingface_hub import InferenceClient, hf_hub_download
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
@@ -28,22 +41,25 @@ class BasicAgent:
         # print(client.text_generation("Hello, my name is", max_new_tokens=20))
         # Initialize the model
-        model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
-                           # format="text-generation",
-                           token=os.environ["HF_TOKEN"],
-                           max_tokens=2048,
-                           temperature=0.0
-                          )
         # Initialize the tools other than the base tools
         # See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
         # Initialize the agent
-        self.agent = CodeAgent(
-            model=model,
-            tools=[],
-            add_base_tools=True
-        )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")

 import requests
 import inspect
 import pandas as pd
+import torch, spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import smolagents
 from smolagents import CodeAgent, HfApiModel
 from huggingface_hub import InferenceClient, hf_hub_download
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+@spaces.GPU
+def load_llm():
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    tok = AutoTokenizer.from_pretrained(model_id)
+    mod = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16,
+        device_map="auto"  # auto-distributes to GPU
+    )
+    return pipeline("text-generation", model=mod, tokenizer=tok, max_new_tokens=512)
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
         # print(client.text_generation("Hello, my name is", max_new_tokens=20))
         # Initialize the model
+        # model = HfApiModel(model_id="meta-llama/Llama-3.1-8B-Instruct",
+        #                   # format="text-generation",
+        #                   token=os.environ["HF_TOKEN"],
+        #                   max_tokens=2048,
+        #                   temperature=0.0
+        #                  )
         # Initialize the tools other than the base tools
         # See list of base tools in https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py
         # Initialize the agent
+        self.pipe = load_llm()
+        self.agent = CodeAgent(llm=self.pipe, tools=[], add_base_tools=True)
+        # self.agent = CodeAgent(
+        #     model=model,
+        #     tools=[],
+        #     add_base_tools=True
+        # )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")