lezaf commited on
Commit
799013a
·
1 Parent(s): b9b64d4

Perform some cleanup

Browse files
Files changed (7) hide show
  1. .env.example +3 -2
  2. .gitignore +0 -1
  3. README.md +44 -12
  4. agent.py +5 -7
  5. app.py +5 -14
  6. rate_limiters.py +20 -0
  7. tools/web_search.py +1 -1
.env.example CHANGED
@@ -10,5 +10,6 @@ LANGFUSE_HOST=<YOUR_LANGFUSE_HOST>
10
  TAVILY_API_KEY=<YOUR_TAVILY_KEY>
11
 
12
  # Run configurations
13
- USE_DDGS=False
14
- USE_RATE_LIMITER=True
 
 
10
  TAVILY_API_KEY=<YOUR_TAVILY_KEY>
11
 
12
  # Run configurations
13
+ USE_DDGS=True
14
+ USE_RATE_LIMITER=True
15
+ RECURSION_LIMIT=<YOUR_RECURSION_LIMIT>
.gitignore CHANGED
@@ -1,6 +1,5 @@
1
  .venv/
2
  .env
3
- # Python cache files
4
  __pycache__/
5
  .dist/
6
  test_*
 
1
  .venv/
2
  .env
 
3
  __pycache__/
4
  .dist/
5
  test_*
README.md CHANGED
@@ -1,15 +1,47 @@
 
 
 
 
1
  ---
2
- title: Template Final Assignment
3
- emoji: 🕵🏻‍♂️
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.25.2
8
- app_file: app.py
9
- pinned: false
10
- hf_oauth: true
11
- # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
12
- hf_oauth_expiration_minutes: 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
+ # 🕵️‍♂️ Super Agent: The AI Answer Machine 🚀
2
+
3
+ Welcome to **Super Agent** – the AI agent built to be evaluated on [GAAI](https://huggingface.co/papers/2311.12983) benchmark to complete the [AI agents](https://huggingface.co/learn/agents-course/unit0/introduction) course by [Hugging Face](https://huggingface.co/)!
4
+
5
  ---
6
+
7
+ ## ✨ Features
8
+
9
+ - 🔍 **Web Search**: Finds up-to-date info using DuckDuckGo or Tavily.
10
+ - 📊 **Excel & Data Extraction**: Reads and analyzes Excel files.
11
+ - 🎧 **Audio & YouTube Transcripts**: Extracts transcripts from audio files and YouTube videos.
12
+ - 🧮 **Math Tools**: Adds numbers, checks commutativity, and more.
13
+ - 🧠 **RAG-Ready**: Retrieval-Augmented Generation for smarter, more accurate answers.
14
+ - 🤖 **LLM Integration**: Works with Google Gemini, HuggingFace, and OpenAI models.
15
+ - 🛡️ **Exact Match Mode**: Answers are evaluated for exactness.
16
+ - 🛠️ **Extensible Tooling**: Easily add new tools for more superpowers.
17
+ - ⏳ **Gemini Rate Limiter**: Includes a rate limiter implementation for Gemini, perfect for handling free tier API limits.
18
+
19
+ ---
20
+
21
+ ## 🚀 Quickstart
22
+
23
+ 1. **Clone the repo**
24
+ `git clone https://github.com/yourusername/super_agent.git`
25
+
26
+ 2. **Install dependencies**
27
+ `pip install -r requirements.txt`
28
+
29
+ 3. **Set up your `.env` file**
30
+ Copy `.env.example` to `.env` and fill in your API keys.
31
+
32
+ 4. **Run the app**
33
+ `python app.py`
34
+
35
+ 5. **Test the agent**
36
+ Try the test scripts or use the Gradio interface!
37
+
38
  ---
39
 
40
+ ## 🧩 Supported Tools
41
+
42
+ - `web_search` – Search the web for up-to-date info
43
+ - `extract_data_from_excel` – Download and parse Excel files
44
+ - `extract_transcript_from_youtube` – Get YouTube video transcripts
45
+ - `extract_transcript_from_audio` – Get transcripts from audio files
46
+ - `add_numbers_in_list` – Add up numbers
47
+ - `check_commutativity` – Test if a binary operation is commutative
agent.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
- import getpass
3
  import requests
4
- import datetime
5
  from dotenv import load_dotenv
6
  from langgraph.graph import StateGraph, MessagesState, START
7
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
@@ -150,14 +148,14 @@ if __name__ == "__main__":
150
  # task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier ?
151
  # task_id = "f918266a-b3e0-4914-865d-4faa564f1aef" # Code example
152
  # task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats ?
153
- task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file (passed)
154
- # task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition (PASS)
155
- # task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film (FAIL)
156
- # task_id = "bda648d7-d618-4883-88f4-3466eabd860e" # Vietnamese (FAIL)
157
  # task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
158
  # task_id = "a0c07678-e491-4bbc-8f0b-07405144218f" # pitchers
159
  # task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
160
- # task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen (FAIL)
161
  # task_id = "1f975693-876d-457b-a649-393859e79bf3" # Audio (pages)
162
  # task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3" # Audio (recipe)
163
 
 
1
  import os
 
2
  import requests
 
3
  from dotenv import load_dotenv
4
  from langgraph.graph import StateGraph, MessagesState, START
5
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 
148
  # task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier ?
149
  # task_id = "f918266a-b3e0-4914-865d-4faa564f1aef" # Code example
150
  # task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats ?
151
+ task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file
152
+ # task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition
153
+ # task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film
154
+ # task_id = "bda648d7-d618-4883-88f4-3466eabd860e" # Vietnamese
155
  # task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
156
  # task_id = "a0c07678-e491-4bbc-8f0b-07405144218f" # pitchers
157
  # task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
158
+ # task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen
159
  # task_id = "1f975693-876d-457b-a649-393859e79bf3" # Audio (pages)
160
  # task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3" # Audio (recipe)
161
 
app.py CHANGED
@@ -1,8 +1,6 @@
1
  """
2
  NOTE:
3
- - The agent only runs on a subset of tasks to avoid unnecessary token/api usage for questions that the agent
4
- cannot handle right now. The task ids to exclude are in the `excluded_tasks.txt` file.
5
- - There is a 30 sec delay after each question is answered to avoid rate limiting issues.
6
  """
7
 
8
  import os
@@ -11,7 +9,6 @@ import requests
11
  import inspect
12
  import pandas as pd
13
  import datetime
14
- import time
15
  from agent import build_agent
16
  from langchain_core.messages import HumanMessage
17
  from langfuse.langchain import CallbackHandler
@@ -30,6 +27,7 @@ class SuperAgent:
30
  def __init__(self):
31
  print("SuperAgent initialized.")
32
  self.agent = build_agent(provider="google") # Change to "hf" for HuggingFace
 
33
 
34
  def __call__(self, data: dict) -> str:
35
  """
@@ -86,7 +84,7 @@ class SuperAgent:
86
  try:
87
  answer = self.agent.invoke(
88
  {"messages": [human_msg]},
89
- config={"callbacks": [langfuse_handler], "recursion_limit": 15}
90
  )
91
 
92
  # for message in answer["messages"]:
@@ -184,11 +182,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
184
  except Exception as e:
185
  print(f"Error running agent on task {task_id}: {e}")
186
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
187
-
188
- # If it's not the last question sleep
189
- # if idx < len(questions_data) - 1:
190
- # print(f"[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] Sleeping for 60 seconds to avoid rate limiting issues...")
191
- # time.sleep(60) # Sleep to avoid rate limiting issues
192
 
193
  if not answers_payload:
194
  print("Agent did not produce any answers to submit.")
@@ -248,11 +241,9 @@ with gr.Blocks() as demo:
248
  gr.Markdown("# Super Agent Evaluation Runner")
249
  gr.Markdown(
250
  """
251
- Welcome to my **Super Agent** Runner!
252
 
253
- Please, sit back and relax while the evaluation runs!
254
-
255
- There is a 60 second delay after each question is answered to avoid rate limiting issues.
256
  """
257
  )
258
 
 
1
  """
2
  NOTE:
3
+ - If USE_RATE_LIMITER env variable is True, the agent will use a rate limiter to avoid hitting API limits.
 
 
4
  """
5
 
6
  import os
 
9
  import inspect
10
  import pandas as pd
11
  import datetime
 
12
  from agent import build_agent
13
  from langchain_core.messages import HumanMessage
14
  from langfuse.langchain import CallbackHandler
 
27
  def __init__(self):
28
  print("SuperAgent initialized.")
29
  self.agent = build_agent(provider="google") # Change to "hf" for HuggingFace
30
+ self.recursion_limit = os.getenv("RECURSION_LIMIT", "25")
31
 
32
  def __call__(self, data: dict) -> str:
33
  """
 
84
  try:
85
  answer = self.agent.invoke(
86
  {"messages": [human_msg]},
87
+ config={"callbacks": [langfuse_handler], "recursion_limit": self.recursion_limit}
88
  )
89
 
90
  # for message in answer["messages"]:
 
182
  except Exception as e:
183
  print(f"Error running agent on task {task_id}: {e}")
184
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
185
 
186
  if not answers_payload:
187
  print("Agent did not produce any answers to submit.")
 
241
  gr.Markdown("# Super Agent Evaluation Runner")
242
  gr.Markdown(
243
  """
244
+ ## Welcome to my **Super Agent** Runner!
245
 
246
+ Please, sit back and relax while the evaluation runs on the GAAI benchmark!
 
 
247
  """
248
  )
249
 
rate_limiters.py CHANGED
@@ -3,6 +3,26 @@ import datetime
3
  from google.api_core.exceptions import ResourceExhausted
4
 
5
  def safe_invoke_with_retry_gemini(llm_with_tools, messages, max_retries=3, wait_seconds=60):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  for attempt in range(1, max_retries + 1):
7
  try:
8
  return llm_with_tools.invoke(messages)
 
3
  from google.api_core.exceptions import ResourceExhausted
4
 
5
  def safe_invoke_with_retry_gemini(llm_with_tools, messages, max_retries=3, wait_seconds=60):
6
+ """
7
+ Safely invokes a Gemini LLM with automatic retries on rate limit errors.
8
+
9
+ This function attempts to call the provided LLM with the given messages. If a ResourceExhausted
10
+ (rate limit) error occurs, it waits for a specified number of seconds and retries, up to a maximum
11
+ number of retries. Other exceptions are raised immediately.
12
+
13
+ Args:
14
+ llm_with_tools: The Gemini LLM instance with tools bound.
15
+ messages (list): List of messages to send to the LLM.
16
+ max_retries (int): Maximum number of retry attempts on rate limit errors.
17
+ wait_seconds (int): Seconds to wait between retries.
18
+
19
+ Returns:
20
+ The result of llm_with_tools.invoke(messages) if successful.
21
+
22
+ Raises:
23
+ ResourceExhausted: If the maximum number of retries is reached due to rate limiting.
24
+ Exception: Any other exception encountered during invocation.
25
+ """
26
  for attempt in range(1, max_retries + 1):
27
  try:
28
  return llm_with_tools.invoke(messages)
tools/web_search.py CHANGED
@@ -138,7 +138,7 @@ def web_search(query: str) -> str:
138
  chunks (str): Concatenated string of most relevant chunks.
139
  """
140
 
141
- USE_DDGS = os.getenv("USE_DDGS").lower() == "true"
142
  # ----- STEP 1: Find the most relevant webpages
143
  if USE_DDGS:
144
  results = DDGS(timeout=30).text(query, max_results=MAX_RESULTS)
 
138
  chunks (str): Concatenated string of most relevant chunks.
139
  """
140
 
141
+ USE_DDGS = os.getenv("USE_DDGS", "false").lower() == "true"
142
  # ----- STEP 1: Find the most relevant webpages
143
  if USE_DDGS:
144
  results = DDGS(timeout=30).text(query, max_results=MAX_RESULTS)