Spaces:
Running
Running
lezaf
commited on
Commit
·
799013a
1
Parent(s):
b9b64d4
Perform some cleanup
Browse files- .env.example +3 -2
- .gitignore +0 -1
- README.md +44 -12
- agent.py +5 -7
- app.py +5 -14
- rate_limiters.py +20 -0
- tools/web_search.py +1 -1
.env.example
CHANGED
@@ -10,5 +10,6 @@ LANGFUSE_HOST=<YOUR_LANGFUSE_HOST>
|
|
10 |
TAVILY_API_KEY=<YOUR_TAVILY_KEY>
|
11 |
|
12 |
# Run configurations
|
13 |
-
USE_DDGS=
|
14 |
-
USE_RATE_LIMITER=True
|
|
|
|
10 |
TAVILY_API_KEY=<YOUR_TAVILY_KEY>
|
11 |
|
12 |
# Run configurations
|
13 |
+
USE_DDGS=True
|
14 |
+
USE_RATE_LIMITER=True
|
15 |
+
RECURSION_LIMIT=<YOUR_RECURSION_LIMIT>
|
.gitignore
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
.venv/
|
2 |
.env
|
3 |
-
# Python cache files
|
4 |
__pycache__/
|
5 |
.dist/
|
6 |
test_*
|
|
|
1 |
.venv/
|
2 |
.env
|
|
|
3 |
__pycache__/
|
4 |
.dist/
|
5 |
test_*
|
README.md
CHANGED
@@ -1,15 +1,47 @@
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
---
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🕵️♂️ Super Agent: The AI Answer Machine 🚀
|
2 |
+
|
3 |
+
Welcome to **Super Agent** – the AI agent built to be evaluated on [GAAI](https://huggingface.co/papers/2311.12983) benchmark to complete the [AI agents](https://huggingface.co/learn/agents-course/unit0/introduction) course by [Hugging Face](https://huggingface.co/)!
|
4 |
+
|
5 |
---
|
6 |
+
|
7 |
+
## ✨ Features
|
8 |
+
|
9 |
+
- 🔍 **Web Search**: Finds up-to-date info using DuckDuckGo or Tavily.
|
10 |
+
- 📊 **Excel & Data Extraction**: Reads and analyzes Excel files.
|
11 |
+
- 🎧 **Audio & YouTube Transcripts**: Extracts transcripts from audio files and YouTube videos.
|
12 |
+
- 🧮 **Math Tools**: Adds numbers, checks commutativity, and more.
|
13 |
+
- 🧠 **RAG-Ready**: Retrieval-Augmented Generation for smarter, more accurate answers.
|
14 |
+
- 🤖 **LLM Integration**: Works with Google Gemini, HuggingFace, and OpenAI models.
|
15 |
+
- 🛡️ **Exact Match Mode**: Answers are evaluated for exactness.
|
16 |
+
- 🛠️ **Extensible Tooling**: Easily add new tools for more superpowers.
|
17 |
+
- ⏳ **Gemini Rate Limiter**: Includes a rate limiter implementation for Gemini, perfect for handling free tier API limits.
|
18 |
+
|
19 |
+
---
|
20 |
+
|
21 |
+
## 🚀 Quickstart
|
22 |
+
|
23 |
+
1. **Clone the repo**
|
24 |
+
`git clone https://github.com/yourusername/super_agent.git`
|
25 |
+
|
26 |
+
2. **Install dependencies**
|
27 |
+
`pip install -r requirements.txt`
|
28 |
+
|
29 |
+
3. **Set up your `.env` file**
|
30 |
+
Copy `.env.example` to `.env` and fill in your API keys.
|
31 |
+
|
32 |
+
4. **Run the app**
|
33 |
+
`python app.py`
|
34 |
+
|
35 |
+
5. **Test the agent**
|
36 |
+
Try the test scripts or use the Gradio interface!
|
37 |
+
|
38 |
---
|
39 |
|
40 |
+
## 🧩 Supported Tools
|
41 |
+
|
42 |
+
- `web_search` – Search the web for up-to-date info
|
43 |
+
- `extract_data_from_excel` – Download and parse Excel files
|
44 |
+
- `extract_transcript_from_youtube` – Get YouTube video transcripts
|
45 |
+
- `extract_transcript_from_audio` – Get transcripts from audio files
|
46 |
+
- `add_numbers_in_list` – Add up numbers
|
47 |
+
- `check_commutativity` – Test if a binary operation is commutative
|
agent.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import os
|
2 |
-
import getpass
|
3 |
import requests
|
4 |
-
import datetime
|
5 |
from dotenv import load_dotenv
|
6 |
from langgraph.graph import StateGraph, MessagesState, START
|
7 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
@@ -150,14 +148,14 @@ if __name__ == "__main__":
|
|
150 |
# task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier ?
|
151 |
# task_id = "f918266a-b3e0-4914-865d-4faa564f1aef" # Code example
|
152 |
# task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats ?
|
153 |
-
task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file
|
154 |
-
# task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition
|
155 |
-
# task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film
|
156 |
-
# task_id = "bda648d7-d618-4883-88f4-3466eabd860e" # Vietnamese
|
157 |
# task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
|
158 |
# task_id = "a0c07678-e491-4bbc-8f0b-07405144218f" # pitchers
|
159 |
# task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
|
160 |
-
# task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen
|
161 |
# task_id = "1f975693-876d-457b-a649-393859e79bf3" # Audio (pages)
|
162 |
# task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3" # Audio (recipe)
|
163 |
|
|
|
1 |
import os
|
|
|
2 |
import requests
|
|
|
3 |
from dotenv import load_dotenv
|
4 |
from langgraph.graph import StateGraph, MessagesState, START
|
5 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
|
|
148 |
# task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier ?
|
149 |
# task_id = "f918266a-b3e0-4914-865d-4faa564f1aef" # Code example
|
150 |
# task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats ?
|
151 |
+
task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file
|
152 |
+
# task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition
|
153 |
+
# task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film
|
154 |
+
# task_id = "bda648d7-d618-4883-88f4-3466eabd860e" # Vietnamese
|
155 |
# task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
|
156 |
# task_id = "a0c07678-e491-4bbc-8f0b-07405144218f" # pitchers
|
157 |
# task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
|
158 |
+
# task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen
|
159 |
# task_id = "1f975693-876d-457b-a649-393859e79bf3" # Audio (pages)
|
160 |
# task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3" # Audio (recipe)
|
161 |
|
app.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
"""
|
2 |
NOTE:
|
3 |
-
-
|
4 |
-
cannot handle right now. The task ids to exclude are in the `excluded_tasks.txt` file.
|
5 |
-
- There is a 30 sec delay after each question is answered to avoid rate limiting issues.
|
6 |
"""
|
7 |
|
8 |
import os
|
@@ -11,7 +9,6 @@ import requests
|
|
11 |
import inspect
|
12 |
import pandas as pd
|
13 |
import datetime
|
14 |
-
import time
|
15 |
from agent import build_agent
|
16 |
from langchain_core.messages import HumanMessage
|
17 |
from langfuse.langchain import CallbackHandler
|
@@ -30,6 +27,7 @@ class SuperAgent:
|
|
30 |
def __init__(self):
|
31 |
print("SuperAgent initialized.")
|
32 |
self.agent = build_agent(provider="google") # Change to "hf" for HuggingFace
|
|
|
33 |
|
34 |
def __call__(self, data: dict) -> str:
|
35 |
"""
|
@@ -86,7 +84,7 @@ class SuperAgent:
|
|
86 |
try:
|
87 |
answer = self.agent.invoke(
|
88 |
{"messages": [human_msg]},
|
89 |
-
config={"callbacks": [langfuse_handler], "recursion_limit":
|
90 |
)
|
91 |
|
92 |
# for message in answer["messages"]:
|
@@ -184,11 +182,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
184 |
except Exception as e:
|
185 |
print(f"Error running agent on task {task_id}: {e}")
|
186 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
187 |
-
|
188 |
-
# If it's not the last question sleep
|
189 |
-
# if idx < len(questions_data) - 1:
|
190 |
-
# print(f"[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] Sleeping for 60 seconds to avoid rate limiting issues...")
|
191 |
-
# time.sleep(60) # Sleep to avoid rate limiting issues
|
192 |
|
193 |
if not answers_payload:
|
194 |
print("Agent did not produce any answers to submit.")
|
@@ -248,11 +241,9 @@ with gr.Blocks() as demo:
|
|
248 |
gr.Markdown("# Super Agent Evaluation Runner")
|
249 |
gr.Markdown(
|
250 |
"""
|
251 |
-
Welcome to my **Super Agent** Runner!
|
252 |
|
253 |
-
Please, sit back and relax while the evaluation runs!
|
254 |
-
|
255 |
-
There is a 60 second delay after each question is answered to avoid rate limiting issues.
|
256 |
"""
|
257 |
)
|
258 |
|
|
|
1 |
"""
|
2 |
NOTE:
|
3 |
+
- If USE_RATE_LIMITER env variable is True, the agent will use a rate limiter to avoid hitting API limits.
|
|
|
|
|
4 |
"""
|
5 |
|
6 |
import os
|
|
|
9 |
import inspect
|
10 |
import pandas as pd
|
11 |
import datetime
|
|
|
12 |
from agent import build_agent
|
13 |
from langchain_core.messages import HumanMessage
|
14 |
from langfuse.langchain import CallbackHandler
|
|
|
27 |
def __init__(self):
|
28 |
print("SuperAgent initialized.")
|
29 |
self.agent = build_agent(provider="google") # Change to "hf" for HuggingFace
|
30 |
+
self.recursion_limit = os.getenv("RECURSION_LIMIT", "25")
|
31 |
|
32 |
def __call__(self, data: dict) -> str:
|
33 |
"""
|
|
|
84 |
try:
|
85 |
answer = self.agent.invoke(
|
86 |
{"messages": [human_msg]},
|
87 |
+
config={"callbacks": [langfuse_handler], "recursion_limit": self.recursion_limit}
|
88 |
)
|
89 |
|
90 |
# for message in answer["messages"]:
|
|
|
182 |
except Exception as e:
|
183 |
print(f"Error running agent on task {task_id}: {e}")
|
184 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
if not answers_payload:
|
187 |
print("Agent did not produce any answers to submit.")
|
|
|
241 |
gr.Markdown("# Super Agent Evaluation Runner")
|
242 |
gr.Markdown(
|
243 |
"""
|
244 |
+
## Welcome to my **Super Agent** Runner!
|
245 |
|
246 |
+
Please, sit back and relax while the evaluation runs on the GAAI benchmark!
|
|
|
|
|
247 |
"""
|
248 |
)
|
249 |
|
rate_limiters.py
CHANGED
@@ -3,6 +3,26 @@ import datetime
|
|
3 |
from google.api_core.exceptions import ResourceExhausted
|
4 |
|
5 |
def safe_invoke_with_retry_gemini(llm_with_tools, messages, max_retries=3, wait_seconds=60):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
for attempt in range(1, max_retries + 1):
|
7 |
try:
|
8 |
return llm_with_tools.invoke(messages)
|
|
|
3 |
from google.api_core.exceptions import ResourceExhausted
|
4 |
|
5 |
def safe_invoke_with_retry_gemini(llm_with_tools, messages, max_retries=3, wait_seconds=60):
|
6 |
+
"""
|
7 |
+
Safely invokes a Gemini LLM with automatic retries on rate limit errors.
|
8 |
+
|
9 |
+
This function attempts to call the provided LLM with the given messages. If a ResourceExhausted
|
10 |
+
(rate limit) error occurs, it waits for a specified number of seconds and retries, up to a maximum
|
11 |
+
number of retries. Other exceptions are raised immediately.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
llm_with_tools: The Gemini LLM instance with tools bound.
|
15 |
+
messages (list): List of messages to send to the LLM.
|
16 |
+
max_retries (int): Maximum number of retry attempts on rate limit errors.
|
17 |
+
wait_seconds (int): Seconds to wait between retries.
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
The result of llm_with_tools.invoke(messages) if successful.
|
21 |
+
|
22 |
+
Raises:
|
23 |
+
ResourceExhausted: If the maximum number of retries is reached due to rate limiting.
|
24 |
+
Exception: Any other exception encountered during invocation.
|
25 |
+
"""
|
26 |
for attempt in range(1, max_retries + 1):
|
27 |
try:
|
28 |
return llm_with_tools.invoke(messages)
|
tools/web_search.py
CHANGED
@@ -138,7 +138,7 @@ def web_search(query: str) -> str:
|
|
138 |
chunks (str): Concatenated string of most relevant chunks.
|
139 |
"""
|
140 |
|
141 |
-
USE_DDGS = os.getenv("USE_DDGS").lower() == "true"
|
142 |
# ----- STEP 1: Find the most relevant webpages
|
143 |
if USE_DDGS:
|
144 |
results = DDGS(timeout=30).text(query, max_results=MAX_RESULTS)
|
|
|
138 |
chunks (str): Concatenated string of most relevant chunks.
|
139 |
"""
|
140 |
|
141 |
+
USE_DDGS = os.getenv("USE_DDGS", "false").lower() == "true"
|
142 |
# ----- STEP 1: Find the most relevant webpages
|
143 |
if USE_DDGS:
|
144 |
results = DDGS(timeout=30).text(query, max_results=MAX_RESULTS)
|