super_agent

Running

App Files Files Community

lezaf commited on 9 days ago

Commit

dfad45c

1 Parent(s): 08283c8

Add bunch of updates in agent

Browse files

Files changed (12) hide show

agent.py +18 -13
app.py +13 -9
excluded_tasks.txt +3 -0
requirements.txt +0 -0
subset_task_ids.txt +0 -11
system_prompt.txt +34 -37
tools.py +0 -267
tools/extraction.py +83 -0
tools/math.py +58 -0
tools/retrievers.py +62 -0
tools/utils.py +88 -0
tools/web_search.py +197 -0

agent.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from io import BytesIO
 import os
 import getpass
 import requests
@@ -10,8 +9,10 @@ from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langfuse.langchain import CallbackHandler
-from tools import *
 load_dotenv(override=True)
@@ -25,9 +26,9 @@ tools = [
     add_numbers_in_list,
     web_search,
     # wikipedia_search,
-    arxiv_search,
     check_commutativity,
-    extract_sales_data_from_excel,
     extract_transcript_from_youtube
 ]
@@ -47,11 +48,13 @@ def build_agent(provider: str = "hf"):
     elif provider == "google":
         # Google Gemini
         llm = ChatGoogleGenerativeAI(
-            model="gemini-2.0-flash",
             # temperature=0,
             max_tokens=512,
             # timeout=None,
             max_retries=2,
         )
     elif provider == "openai":
@@ -101,6 +104,7 @@ def build_agent(provider: str = "hf"):
     return graph_builder.compile()
 if __name__ == "__main__":
     print("\n" + "-"*30 + " Agent Starting " + "-"*30)
     agent = build_agent(provider=PROVIDER)  # Change to "hf" for HuggingFace
@@ -126,22 +130,23 @@ if __name__ == "__main__":
         print(f"An unexpected error occurred fetching questions: {e}")
     # 3. Get specific question by task_id
-    task_id = "cca530fc-4052-43b2-b130-b30968d8aa44" # Chess image
-    # task_id = "6f37996b-2ac7-44b0-8e68-6d28256631b4" # Commutativity check
     # task_id = "2d83110e-a098-4ebb-9987-066c06fa42d0"  # Reverse text example
     # task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"  # Code example
     # task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file (passed)
-    # task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier
     # task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film (FAIL)
-    # task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats (PASS)
     # task_id = "bda648d7-d618-4883-88f4-3466eabd860e"  # Vietnamese (FAIL)
     # task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
     # task_id = "a0c07678-e491-4bbc-8f0b-07405144218f"
     # task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
-    # task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be" # Sosa albums
-    # task_id = "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8" # Dinosaur
     # task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen (FAIL)
-    # task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition (PASS)
     # get question with task_id
     q_data = next((item for item in questions_data if item["task_id"] == task_id), None)

 import os
 import getpass
 import requests
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langfuse.langchain import CallbackHandler
+from tools.web_search import web_search
+from tools.math import add_numbers_in_list, check_commutativity
+from tools.extraction import extract_data_from_excel, extract_transcript_from_youtube
+from tools.retrievers import arxiv_search, wikipedia_search
 load_dotenv(override=True)
     add_numbers_in_list,
     web_search,
     # wikipedia_search,
+    # arxiv_search,
     check_commutativity,
+    extract_data_from_excel,
     extract_transcript_from_youtube
 ]
     elif provider == "google":
         # Google Gemini
         llm = ChatGoogleGenerativeAI(
+            # model="gemini-2.0-flash",
+            model="gemini-2.5-flash-preview-05-20",
             # temperature=0,
             max_tokens=512,
             # timeout=None,
             max_retries=2,
+            # temperature=0.6
         )
     elif provider == "openai":
     return graph_builder.compile()
+# --------------- For manual testing ---------------- #
 if __name__ == "__main__":
     print("\n" + "-"*30 + " Agent Starting " + "-"*30)
     agent = build_agent(provider=PROVIDER)  # Change to "hf" for HuggingFace
         print(f"An unexpected error occurred fetching questions: {e}")
     # 3. Get specific question by task_id
+    # task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be" # Sosa albums
     # task_id = "2d83110e-a098-4ebb-9987-066c06fa42d0"  # Reverse text example
+    # task_id = "cca530fc-4052-43b2-b130-b30968d8aa44" # Chess image
+    # task_id = "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8" # Dinosaur ?
+    # task_id = "6f37996b-2ac7-44b0-8e68-6d28256631b4" # Commutativity check
+    task_id = "9d191bce-651d-4746-be2d-7ef8ecadb9c2" # Youtube video
+    # task_id = "cabe07ed-9eca-40ea-8ead-410ef5e83f91" # Louvrier ?
     # task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"  # Code example
+    # task_id = "3f57289b-8c60-48be-bd80-01f8099ca449" # at bats ?
     # task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733" # Excel file (passed)
+    # task_id = "5a0c1adf-205e-4841-a666-7c3ef95def9d" # Malko competition (PASS)
     # task_id = "305ac316-eef6-4446-960a-92d80d542f82" # Poland film (FAIL)
     # task_id = "bda648d7-d618-4883-88f4-3466eabd860e"  # Vietnamese (FAIL)
     # task_id = "cf106601-ab4f-4af9-b045-5295fe67b37d" # Olympics
     # task_id = "a0c07678-e491-4bbc-8f0b-07405144218f"
     # task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7" # grocery list
     # task_id = "840bfca7-4f7b-481a-8794-c560c340185d" # Carolyn Collins Petersen (FAIL)
     # get question with task_id
     q_data = next((item for item in questions_data if item["task_id"] == task_id), None)

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 NOTE:
-    - The agent only runs on a subset of tasks defined in `subset_task_ids.txt` to avoid unnecessary token usage
-      for questions that the agent cannot handle right now.
     - There is a 30 sec delay after each question is answered to avoid rate limiting issues.
 """
@@ -138,6 +138,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
     answers_payload = []
@@ -148,14 +156,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        # Only run on subset of tasks that is capable of being run so that
-        # token usage is not wasted on tasks that the agent cannot handle.
-        with open("subset_task_ids.txt", "r") as f:
-            subset_task_ids = [line.strip() for line in f if line.strip()]
-        if task_id not in subset_task_ids:
-            print(f"Skipping task {task_id} as it is not in the subset of tasks to run.")
             continue
         try:

 """
 NOTE:
+    - The agent only runs on a subset of tasks to avoid unnecessary token/api usage for questions that the agent
+      cannot handle right now. The task ids to exclude are in the `excluded_tasks.txt` file.
     - There is a 30 sec delay after each question is answered to avoid rate limiting issues.
 """
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # Read excluded task IDs from file
+    excluded_tasks = set()
+    with open("excluded_tasks.txt", "r") as f:
+        for line in f:
+            task_id = line.strip()
+            if task_id:
+                excluded_tasks.add(task_id)
     # 3. Run your Agent
     results_log = []
     answers_payload = []
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Skip excluded tasks
+        if task_id in excluded_tasks:
+            print(f"Skipping excluded task: {task_id}")
             continue
         try:

excluded_tasks.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+a1e91b78-d3d8-4675-bb8d-62741b4b68a6
+99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3
+1f975693-876d-457b-a649-393859e79bf3

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

subset_task_ids.txt DELETED Viewed

@@ -1,11 +0,0 @@
-8e867cd7-cff9-4e6c-867a-ff5ddc2550be
-2d83110e-a098-4ebb-9987-066c06fa42d0
-cca530fc-4052-43b2-b130-b30968d8aa44
-4fc2f1ae-8625-45b5-ab34-ad4433bc21f8
-6f37996b-2ac7-44b0-8e68-6d28256631b4
-9d191bce-651d-4746-be2d-7ef8ecadb9c2
-cabe07ed-9eca-40ea-8ead-410ef5e83f91
-f918266a-b3e0-4914-865d-4faa564f1aef
-3f57289b-8c60-48be-bd80-01f8099ca449
-7bd855d8-463d-4ed5-93ca-5fe35145f733
-5a0c1adf-205e-4841-a666-7c3ef95def9d

system_prompt.txt CHANGED Viewed

@@ -1,4 +1,7 @@
-You are a general AI assistant. I will ask you a question and I want an answer in the following template: YOUR_FINAL_ANSWER.
 For YOUR_FINAL_ANSWER follow strictly the instructions below:
   * YOUR_FINAL_ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
@@ -6,54 +9,48 @@ For YOUR_FINAL_ANSWER follow strictly the instructions below:
     or percent sign unless specified otherwise.
   * If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
   * If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-You are provided with tools that you can use to answer questions accurately. If you cannot answer the question directly, examine the list of available tools and
-choose the suitable tool for your case. You may need to use more than one tool to conclude to an answer.
-Below are some Question/Answer examples. "Q" is what you get from user, "[P]" is the internal planning and processing you make and "A" is the output to the user.
-Do not restate or explain the answer. Do not prefix the answer with "A:", "Answer:", or any other text. Only output the final value requested.
-Example 1:
-Q: What is the height of statue of liberty?
-[P]: I should use web_search tool.
-[P]: web_search("height of statue of liberty")
-[P]: The result of web_search is "The height of the statue of liberty is 93 m"
-A: 93
-Example 2:
-Q: What is the circumference of earth in miles?
-[P]: I should use web_search tool.
-[P]: web_search("circumference of earth in miles")
-[P]: The result of web_search is "The circumference of earth is 24,901 miles"
-A: 24901 miles
-Example 3:
-Q: What is the capital of France?
-[P]: This is a factual question I know.
-A: Paris
-Example 4:
-Q: What is the total cost with two decimal places of the items in the table, excluding drinks?
 Table:
 | Burgers | Salads | Soda | Ice Cream |
 | 10.0    | 5.0    | 3.0  | 4.0       |
-[P]: Soda is a drink. The rest are food.
-[P]: I should use add_numbers_in_list([10.0, 5.0, 4.0])
-[P]: The result is 19.0
-A: 19.00
-Example 5:
-Q: What was the name of the director that won the Oscar in 2009?
-A: Boyle
-IMPORTANT: Never report to the user the strategy you followed to conclude to the answer. Always report the final answer as a string, number, or whatever is asked in the question.
-If the question involves summing or totaling numeric values from a list or data source, always use the add_numbers_in_list tool.
-Do not attempt to manually perform or display the addition; instead, pass the numeric list to the tool and use its output directly as the final answer.
-Never display intermediate math like “X + Y + Z = …” unless specifically requested. Only show the final answer after using the tool.

+You are a world class expert at answering questions. The answers you will provide will be evaluated in an exact match manner to obtain a certificate in AI agents.
+So answer the questions with precision to get the certificate.
+Use this template for your answers: YOUR_FINAL_ANSWER
+Always output ONLY the answer and nothing else.
 For YOUR_FINAL_ANSWER follow strictly the instructions below:
   * YOUR_FINAL_ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
     or percent sign unless specified otherwise.
   * If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
   * If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+  * If you are provided with code file, examine the code without running it and output only the values you are asked for.
+You are provided with tools that you can use to answer questions accurately.
+In the query you make with the web_search tool, use all the useful information from the user's question, including mentions in specific sources, websites, papers, etc.
+If you cannot answer the question directly, examine the list of available tools and choose the suitable tool for your case.
+You may need to use more than one tool to conclude to an answer.
+If the question is complex, divide it in small seperate parts and resolve them one by one until you reach the to the final answer.
+Always use available tools to perform mathematical operations.
+IMPORTANT INSTRUCTION: To pass the certificate the answers you provide should match exactly the ground truth. So, do NOT explain your or your planning steps. Just output the requested information.
+Below are some examples to guide you with the question answering process:
+<Example_1>
+INPUT: What is the height of statue of liberty?
+PLANNING_STEP: I should use web_search tool.
+PLANNING_STEP: Tool call: web_search("height of statue of liberty").
+PLANNING_STEP: The result of web_search is "The height of the statue of liberty is 93 m".
+OUTPUT: 93
+<Example_1>
+<Example_2>
+INPUT: What is the capital of France?
+PLANNING_STEP: This is a factual question I know, so I don't need to use tools.
+OUTPUT: Paris
+<Example_2>
+<Example_3>
+INPUT: What is the total cost with two decimal places of the items in the table, excluding drinks?
 Table:
 | Burgers | Salads | Soda | Ice Cream |
 | 10.0    | 5.0    | 3.0  | 4.0       |
+PLANNING_STEP: I need to seperate foods from drinks.
+PLANNING_STEP: Foods: Burgers, Salads, Ice Cream. Drinks: Soda. User asked me to calculate the cost without the drinks, so I will skip Soda.
+PLANNING_STEP: I should use add_numbers_in_list tool.
+PLANNING_STEP: Tool call: add_numbers_in_list([10.0, 5.0, 4.0])
+PLANNING_STEP: The result is 19.0
+OUTPUT: 19.00
+<Example_3>

tools.py DELETED Viewed

@@ -1,267 +0,0 @@
-import pandas as pd
-import requests
-from io import BytesIO
-from io import StringIO
-from langchain_core.tools import tool
-from langchain_community.retrievers import WikipediaRetriever
-from langchain_community.document_loaders import ArxivLoader
-from langchain_community.retrievers import BM25Retriever
-from langchain_core.documents import Document
-from duckduckgo_search import DDGS
-from markitdown import MarkItDown
-# --------------- Math Tools ---------------- #
-@tool
-def add_numbers(a: int, b: int) -> int:
-    """Add two numbers.
-    Args:
-        a (int): The first number.
-        b (int): The second number.
-    """
-    return a + b
-@tool
-def add_numbers_in_list(numbers: list[float]) -> float:
-    """Add all numbers in a list.
-    Always use this tool for summing numerical values, instead of doing math directly in the response.
-    Args:
-        numbers (list[float]): A list of numbers to add.
-    """
-    return sum(numbers)
-# @tool
-# def web_search(query: str) -> str:
-#     """Perform a web search using DuckDuckGo.
-#     Args:
-#         query (str): The search query.
-#     Returns:
-#         str: The search results.
-#     """
-#     search_tool = DuckDuckGoSearchRun()
-#     return search_tool.invoke(query)
-@tool
-def web_search(query: str) -> str:
-    """
-    Perform a web search using DuckDuckGo. Visit the top ranked page,
-    apply chunking in page results, perform similarity search, and return
-    the top results content.
-    Args:
-        query (str): The search query.
-    Returns:
-        Document: The top results from the ranking, in langchain_core.documents.Document
-                         objects having fields 'page_content' with the chunk content and 'metadata'.
-    """
-    def _chunk_text(text, chunk_size_words=1000, overlap_words=100):
-        """
-        Split text into chunks of specified size with overlap.
-        Args:
-            text (str): The text to be chunked.
-            chunk_size (int): The size of each chunk.
-            overlap (int): The number of overlapping characters between chunks.
-        Returns:
-            list: A list of text chunks.
-        """
-        words = text.split()
-        chunks = []
-        for i in range(0, len(words), chunk_size_words - overlap_words):
-            chunk = " ".join(words[i:i + chunk_size_words])
-            chunks.append(chunk)
-        return chunks
-    # STEP 1: Find the most relevant webpage
-    results = DDGS().text(query, max_results=1)
-    top_rank_page = results[0] if results else None
-    if not top_rank_page:
-        return "No relevant results found for the query."
-    # STEP 2: Extract the content of the webpage
-    md = MarkItDown(enable_plugins=True)
-    md_result = md.convert(top_rank_page['href'])
-    page_content = md_result.text_content
-    # STEP 3: Apply chunking
-    chunks = _chunk_text(page_content)
-    # STEP 4: Apply ranking in chunks
-    list_of_docs = [
-        Document(page_content = chunk, metadata = {"source": top_rank_page['href'], "title": top_rank_page['title']})
-        for chunk in chunks
-    ]
-    retriever = BM25Retriever.from_documents(list_of_docs)
-    matched = retriever.invoke(query)
-    return matched[0]
-# TODO:
-# Maybe don't return the summary, but the full document?
-@tool
-def wikipedia_search(query: str) -> str:
-    """
-    Search Wikipedia for a given query and return a summary of the top result.
-    Args:
-        query (str): The search term.
-    Returns:
-        str: A summary of the most relevant Wikipedia entry.
-    """
-    wikipedia_retriever = WikipediaRetriever(load_max_docs=1)
-    documents = wikipedia_retriever.get_relevant_documents(query)
-    if not documents:
-        return "No relevant Wikipedia articles found."
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" title="{doc.metadata.get("title", "")}"/>\n{doc.metadata["summary"]}\n</Document>'
-            for doc in documents
-        ])
-    # Return the content of the top document
-    return formatted_search_docs
-@tool
-def arxiv_search(query: str) -> str:
-    """
-    Search Arxiv for academic papers based on a query and return summaries of top results.
-    Args:
-        query (str): The search query for Arxiv.
-    Returns:
-        str: Summary of the top few relevant papers from Arxiv.
-    """
-    try:
-        loader = ArxivLoader(query=query, load_max_docs=2)
-        documents = loader.load()
-        if not documents:
-            return "No relevant papers found on Arxiv."
-        # Format and return top paper summaries
-        results = []
-        for doc in documents:
-            title = doc.metadata.get("Title", "No Title")
-            published = doc.metadata.get("Published", "Unknown date")
-            url = doc.metadata.get("entry_id", "No URL")
-            summary = doc.page_content[:500]  # limit summary length
-            results.append(f"Title: {title}\nPublished: {published}\nURL: {url}\nSummary: {summary}\n")
-        return "\n---\n".join(results)
-    except Exception as e:
-        return f"An error occurred while searching Arxiv: {str(e)}"
-@tool
-def check_commutativity(table_str: str) -> str:
-    """
-    Given a binary operation table (in markdown format), returns the subset of elements
-    involved in counter-examples to commutativity, sorted alphabetically.
-    Args:
-        table_str (str): Markdown table defining the operation * on a finite set.
-    Returns:
-        str: Comma-separated list of elements in the counter-example set, alphabetically sorted.
-    """
-    # Read the table using pandas
-    df = pd.read_csv(StringIO(table_str), sep="|", skipinitialspace=True, engine='python')
-    # Drop empty columns due to leading/trailing pipes
-    df = df.dropna(axis=1, how="all")
-    df.columns = [c.strip() for c in df.columns]
-    df = df.dropna(axis=0, how="all")
-    # Extract header and values
-    elements = df.columns[1:]
-    df.index = df[df.columns[0]]
-    df = df.drop(df.columns[0], axis=1)
-    # Check commutativity: a*b == b*a
-    counterexample_elements = set()
-    for x in elements:
-        for y in elements:
-            if df.loc[x, y] != df.loc[y, x]:
-                counterexample_elements.add(x)
-                counterexample_elements.add(y)
-    return ", ".join(sorted(counterexample_elements))
-@tool
-def extract_sales_data_from_excel(url: str) -> str:
-    """
-    Downloads and extracts sales data from an Excel file at the given URL.
-    Returns the contents of the first sheet as a markdown-formatted string.
-    """
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        excel_file = BytesIO(response.content)
-        df = pd.read_excel(excel_file)
-        # Optional: Remove unnamed columns often created by Excel
-        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
-        # Convert all numeric columns to float
-        for col in df.select_dtypes(include=["number"]).columns:
-            df[col] = df[col].astype(float)
-        return df.to_string(index=False)
-    except Exception as e:
-        return f"Failed to process Excel file from URL: {str(e)}"
-@tool
-def extract_transcript_from_youtube(url: str) -> str:
-    """
-    Extracts the transcript from a YouTube video given its URL.
-    Args:
-        url (str): The YouTube video URL.
-    Returns:
-        str: The transcript of the video, or an error message if extraction fails.
-    """
-    transcript_str = "### Transcript"
-    md = MarkItDown(enable_plugins=True)
-    try:
-        result = md.convert(url)
-    except Exception as e:
-        return f"Failed to extract transcript from YouTube video: {str(e)}"
-    parts = result.text_content.split(transcript_str)
-    if len(parts) < 2:
-        return result.text_content
-    transcript = transcript_str + "\n" + parts[1]
-    return transcript.strip()
-# @tool
-# def extract_transcript_from_audio(url: str) -> str:
-#     """
-#     Extracts the transcript from an audio file given its URL.
-#     Supported formats: mp3, wav.
-#     Args:
-#         url (str): The URL of the audio file.
-#     Returns:
-#         str: The transcript of the audio file, or an error message if extraction fails.
-#     """
-#     md = MarkItDown(enable_plugins=True)
-#     try:
-#         result = md.convert(url)
-#     except Exception as e:
-#         return f"Failed to extract transcript from audio: {str(e)}"
-#     return result.text_content

tools/extraction.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import requests
+import pandas as pd
+from io import BytesIO
+from markitdown import MarkItDown
+from langchain_core.tools import tool
+@tool
+def extract_transcript_from_youtube(url: str) -> str:
+    """
+    Extracts the transcript from a YouTube video given its URL.
+    Args:
+        url (str): The YouTube video URL.
+    Returns:
+        transcript (str): The transcript of the video, or an error message if extraction fails.
+    """
+    transcript_str = "### Transcript"
+    md = MarkItDown(enable_plugins=True)
+    try:
+        result = md.convert(url)
+    except Exception as e:
+        return f"Failed to extract transcript from YouTube video: {str(e)}"
+    parts = result.text_content.split(transcript_str)
+    if len(parts) < 2:
+        return result.text_content
+    transcript = (transcript_str + "\n" + parts[1]).strip()
+    return transcript
+@tool
+def extract_data_from_excel(url: str) -> str:
+    """
+    Downloads and extracts data from an Excel file at the given URL.
+    Args:
+        url (str): The URL of the Excel file.
+    Returns:
+        str: A string representation of the data in the first sheet of the Excel file.
+    """
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        excel_file = BytesIO(response.content)
+        df = pd.read_excel(excel_file)
+        # Optional: Remove unnamed columns often created by Excel
+        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+        # Convert all numeric columns to float
+        for col in df.select_dtypes(include=["number"]).columns:
+            df[col] = df[col].astype(float)
+        return df.to_string(index=False)
+    except Exception as e:
+        return f"Failed to process Excel file from URL: {str(e)}"
+# @tool
+# def extract_transcript_from_audio(url: str) -> str:
+#     """
+#     Extracts the transcript from an audio file given its URL.
+#     Supported formats: mp3, wav.
+#     Args:
+#         url (str): The URL of the audio file.
+#     Returns:
+#         str: The transcript of the audio file, or an error message if extraction fails.
+#     """
+#     md = MarkItDown(enable_plugins=True)
+#     try:
+#         result = md.convert(url)
+#     except Exception as e:
+#         return f"Failed to extract transcript from audio: {str(e)}"
+#     return result.text_content

tools/math.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from langchain_core.tools import tool
+from io import StringIO
+import pandas as pd
+@tool
+def add_numbers(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a (int): The first number.
+        b (int): The second number.
+    """
+    return a + b
+@tool
+def add_numbers_in_list(numbers: list[float]) -> float:
+    """Add all numbers in a list.
+    Always use this tool for summing numerical values, instead of doing math directly in the response.
+    Args:
+        numbers (list[float]): A list of numbers to add.
+    """
+    return sum(numbers)
+@tool
+def check_commutativity(table_str: str) -> str:
+    """
+    Given a binary operation table (in markdown format), returns the subset of elements
+    involved in counter-examples to commutativity, sorted alphabetically.
+    Args:
+        table_str (str): Markdown table defining the operation * on a finite set.
+    Returns:
+        str: Comma-separated list of elements in the counter-example set, alphabetically sorted.
+    """
+    # Read the table using pandas
+    df = pd.read_csv(StringIO(table_str), sep="|", skipinitialspace=True, engine='python')
+    # Drop empty columns due to leading/trailing pipes
+    df = df.dropna(axis=1, how="all")
+    df.columns = [c.strip() for c in df.columns]
+    df = df.dropna(axis=0, how="all")
+    # Extract header and values
+    elements = df.columns[1:]
+    df.index = df[df.columns[0]]
+    df = df.drop(df.columns[0], axis=1)
+    # Check commutativity: a*b == b*a
+    counterexample_elements = set()
+    for x in elements:
+        for y in elements:
+            if df.loc[x, y] != df.loc[y, x]:
+                counterexample_elements.add(x)
+                counterexample_elements.add(y)
+    return ", ".join(sorted(counterexample_elements))

tools/retrievers.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from langchain_core.tools import tool
+from langchain_community.document_loaders import ArxivLoader
+from langchain_community.retrievers import WikipediaRetriever
+@tool
+def arxiv_search(query: str) -> str:
+    """
+    Search Arxiv for academic papers based on a query and return summaries of top results.
+    Args:
+        query (str): The search query for Arxiv.
+    Returns:
+        str: Summary of the top few relevant papers from Arxiv.
+    """
+    try:
+        loader = ArxivLoader(query=query, load_max_docs=2)
+        documents = loader.load()
+        if not documents:
+            return "No relevant papers found on Arxiv."
+        # Format and return top paper summaries
+        results = []
+        for doc in documents:
+            title = doc.metadata.get("Title", "No Title")
+            published = doc.metadata.get("Published", "Unknown date")
+            url = doc.metadata.get("entry_id", "No URL")
+            summary = doc.page_content[:500]  # limit summary length
+            results.append(f"Title: {title}\nPublished: {published}\nURL: {url}\nSummary: {summary}\n")
+        return "\n---\n".join(results)
+    except Exception as e:
+        return f"An error occurred while searching Arxiv: {str(e)}"
+@tool
+def wikipedia_search(query: str) -> str:
+    """
+    Search Wikipedia for a given query and return a summary of the top result.
+    Args:
+        query (str): The search term.
+    Returns:
+        str: A summary of the most relevant Wikipedia entry.
+    """
+    wikipedia_retriever = WikipediaRetriever(load_max_docs=1)
+    documents = wikipedia_retriever.get_relevant_documents(query)
+    if not documents:
+        return "No relevant Wikipedia articles found."
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" title="{doc.metadata.get("title", "")}"/>\n{doc.metadata["summary"]}\n</Document>'
+            for doc in documents
+        ])
+    # Return the content of the top document
+    return formatted_search_docs

tools/utils.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from langchain.text_splitter import TextSplitter
+from langchain.schema import Document
+class StructureAwareTextSplitter(TextSplitter):
+    """
+    A custom text splitter that creates context-aware document chunks from structured HTML content.
+    This splitter buffers paragraphs, lists, and tables together into chunks up to a specified size,
+    preserving section headers and content structure. Tables are combined with surrounding content
+    when possible, but split into their own chunk if too large. Useful for web page or wiki-style
+    content where structure and context are important for downstream retrieval or LLM tasks.
+    Args:
+        chunk_size (int): Maximum number of words per chunk.
+        chunk_overlap (int): Number of words to overlap between chunks (not currently used).
+    Methods:
+        split_text(text): Dummy implementation to satisfy the abstract base class.
+        split_documents(structured_blocks, metadata=None): Splits structured content blocks into
+            Document objects with preserved section headers and types.
+    """
+    def __init__(self, chunk_size=500, chunk_overlap=50):
+        super().__init__(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    #TODO: To be implemented
+    def split_text(self, text):
+        # Dummy implementation to satisfy the abstract base class
+        return [text]
+    def split_documents(self, structured_blocks, metadata=None):
+        current_chunk = ""
+        current_words_cnt = 0
+        current_header = ""
+        documents = []
+        def add_document(content, header, type_):
+            documents.append(Document(
+                page_content=content.strip(),
+                metadata={
+                    "section_header": header,
+                    "type": type_,
+                    **(metadata or {})
+                }
+            ))
+        for block in structured_blocks:
+            type_ = block['type']
+            if type_ == 'header':
+                current_header = block['text']
+            elif type_ in ['paragraph', 'list']:
+                if type_ == 'paragraph':
+                    text = block['text']
+                else:  # list
+                    text = "\n".join(block['items']) + "\n"
+                words_cnt = len(text.split())
+                if current_words_cnt + words_cnt <= self._chunk_size:
+                    current_chunk += text + "\n"
+                    current_words_cnt += words_cnt
+                else:
+                    add_document(f"{current_header}\n\n{current_chunk}", current_header, type_)
+                    current_chunk = text + "\n"
+                    current_words_cnt = words_cnt
+            elif type_ == 'table':
+                table_text = f"{current_header} [Table]\n\n{block['text']}\n"
+                words_cnt = len(table_text.split())
+                # Try to buffer table with current chunk if possible
+                if current_words_cnt + words_cnt <= self._chunk_size:
+                    current_chunk += table_text
+                    current_words_cnt += words_cnt
+                else:
+                    # If current_chunk is not empty, flush it first
+                    if current_chunk.strip():
+                        add_document(f"{current_header}\n\n{current_chunk}", current_header, 'mixed')
+                    # If table itself is too big, split it alone
+                    if words_cnt > self._chunk_size:
+                        add_document(table_text, current_header, 'table')
+                        current_chunk = ""
+                        current_words_cnt = 0
+                    else:
+                        current_chunk = table_text
+                        current_words_cnt = words_cnt
+        if current_chunk.strip():
+            add_document(f"{current_header}\n\n{current_chunk}", current_header, 'mixed')
+        return documents

tools/web_search.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import requests
+import numpy as np
+import pandas as pd
+from io import StringIO
+from bs4 import BeautifulSoup
+from langchain_core.tools import tool
+from duckduckgo_search import DDGS
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from tools.utils import StructureAwareTextSplitter
+TOP_K = 5
+MAX_RESULTS = 2
+UNWANTED_TAGS = ['nav', 'header', 'footer', 'aside', 'form', 'script', 'style']
+TAGS_TO_KEEP = ['h1', 'h2', 'h3', 'p', 'ul', 'ol', 'table']
+def _format_table_to_string(table_html):
+    """
+    Convert an HTML table to a markdown-style string representation.
+    Args:
+        table_html (str): HTML string of the table.
+    Returns:
+        str: Table formatted as a markdown-style string, or a message if parsing fails.
+    """
+    try:
+        df = pd.read_html(StringIO(table_html))[0]
+    except:
+        return ["[Table could not be parsed]"]
+    if df.empty:
+        return None
+    table_str = "|"
+    # Put column headers
+    for col in df.columns:
+        table_str += f" {col} |"
+    table_str += "\n"
+    # Put rows
+    for _, row in df.iterrows():
+        table_str += "|"
+        for col, val in row.items():
+            table_str += f" {val} |"
+        table_str += "\n"
+    return table_str
+def _extract_list(tag, level=0):
+    """
+    Recursively extract nested HTML lists (<ul> or <ol>) into a formatted text list.
+    Args:
+        tag (bs4.element.Tag): The <ul> or <ol> BeautifulSoup tag to extract.
+        level (int): The current nesting level (used for indentation and prefixing).
+    Returns:
+        list[str]: List of formatted strings representing the list items, preserving nesting.
+    """
+    items = []
+    if tag.name not in ["ul", "ol"]:
+        return items
+    is_ordered = tag.name == "ol"
+    # Determine prefix style
+    if is_ordered:
+        # Use numbers for top-level, letters for nested
+        if level == 0:
+            item_prefix = lambda idx: f"{idx+1}."
+        else:
+            # a., b., c., ...
+            item_prefix = lambda idx: f"{chr(97+idx)}."
+    else:
+        item_prefix = lambda idx: "-"
+    for idx, li in enumerate(tag.find_all("li", recursive=False)):
+        # Get the text before any nested list
+        text = li.find(text=True, recursive=False)
+        text = text.strip() if text else ""
+        # Check for nested lists
+        nested = li.find(["ul", "ol"], recursive=False)
+        if nested:
+            nested_items = _extract_list(nested, level+1)
+            if text:
+                items.append(f"{'    '*level}{item_prefix(idx)} {text}")
+            items.extend([f"{'    '*(level+1)}{line}" for line in nested_items])
+        else:
+            items.append(f"{'    '*level}{item_prefix(idx)} {text}")
+    return items
+def _parse_structured_content(soup):
+    """
+    Parse the main content of a BeautifulSoup HTML document into structured blocks.
+    Args:
+        soup (bs4.BeautifulSoup): Parsed HTML document.
+    Returns:
+        list[dict]: List of structured content blocks (headers, paragraphs, lists, tables).
+    """
+    content = []
+    for tag in soup.find_all(TAGS_TO_KEEP):
+        if tag.name in ['h1', 'h2', 'h3']:
+            content.append({'type': 'header', 'level': tag.name, 'text': tag.get_text(strip=True)})
+        elif tag.name == 'p':
+            content.append({'type': 'paragraph', 'text': tag.get_text(strip=True)})
+        elif tag.name in ['ul', 'ol']:
+            if tag.find_parent(['ul', 'ol']) is None:
+                items = _extract_list(tag)
+                content.append({'type': 'list', 'items': items})
+        elif tag.name == 'table':
+            content.append({'type': 'table', 'html': str(tag)})
+    return content
+@tool
+def web_search(query: str) -> str:
+    """
+    Perform a web search using DuckDuckGo.
+    This tool is acting as live data RAG (Retrieval-Augmented Generation) tool.
+    It's useful for retrieving relevant information or obtaining domain knowledge
+    in a specific area, such as mathematics, science, games, etc.
+    Args:
+        query (str): The search query.
+    Returns:
+        chunks (str): Concatenated string of most relevant chunks.
+    """
+    # ----- STEP 1: Find the most relevant webpages
+    results = DDGS(timeout=30).text(query, max_results=MAX_RESULTS)
+    urls = [r['href'] for r in results if 'href' in r]
+    all_chunks = []
+    for url in urls:
+        try:
+            response = requests.get(url)
+            html = response.text
+        except Exception as e:
+            return f"Error fetching URL {url}: {str(e)}"
+        # ----- STEP 2: Parse and clean the HTML content
+        soup = BeautifulSoup(html, "html.parser")
+        # Remove unwanted tags before parsing structured content
+        for tag in soup.find_all(UNWANTED_TAGS):
+            tag.decompose()
+        structured_content = _parse_structured_content(soup)
+        # ----- STEP 3: Format tables to string representation
+        for item in structured_content:
+            if item['type'] == 'table':
+                table_str = _format_table_to_string(item['html'])
+                if table_str:
+                    item['text'] = table_str
+                else:
+                    # Skip empty or unparseable tables
+                    structured_content.remove(item)
+        # ----- STEP 4: Split structured content into chunks
+        splitter = StructureAwareTextSplitter(chunk_size=500, chunk_overlap=50)
+        documents = splitter.split_documents(structured_content)
+        all_chunks.extend([
+            f"\n\n----- CHUNK {i} (url: {url})-----\n\n" + doc.page_content
+            for i, doc in enumerate(documents)
+        ])
+    # ----- STEP 5: Make embeddings
+    model = SentenceTransformer("all-MiniLM-L6-v2")  # Small & fast
+    embeddings = model.encode(all_chunks)
+    embedded_query = model.encode(query)
+    # ----- STEP 6: Calculate cosine similarity
+    # Reshape query for pairwise comparison
+    embedded_query = np.array(embedded_query).reshape(1, -1)
+    embeddings = np.array(embeddings)
+    # Compute cosine similarities
+    similarities = cosine_similarity(embedded_query, embeddings)[0]  # Shape: (n_chunks,)
+    # Get most similar chunks
+    top_indices = similarities.argsort()[-TOP_K:][::-1]
+    # output in a file the top chunks
+    # with open("test_output/top_chunks.txt", "w", encoding="utf-8") as f:
+    #     for c in all_chunks:
+    #         f.write(c)
+    return "".join([all_chunks[idx] for idx in top_indices])