AIAgentFinal_Assignment

Runtime error

App Files Files Community

Celine1026 commited on May 9

Commit

75703a7

verified ·

1 Parent(s): 81917a3

test

Browse files

Files changed (11) hide show

.gitattributes +2 -0
agent.py +216 -0
app.py +11 -3
cheatsheet-transformers-large-language-models.pdf +3 -0
explore_metadata.ipynb +0 -0
metadata.jsonl +0 -0
requirements.txt +22 -1
retriever.py +57 -0
steps.txt +43 -0
system_prompt.txt +46 -0
生成特定图片.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+生成特定图片.png filter=lfs diff=lfs merge=lfs -text
+cheatsheet-transformers-large-language-models.pdf filter=lfs diff=lfs merge=lfs -text

agent.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import os
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
+from langchain.tools.retriever import create_retriever_tool
+from supabase.client import Client, create_client
+load_dotenv()
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+@tool
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"wiki_results": formatted_search_docs}
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"web_results": formatted_search_docs}
+@tool
+def arvix_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"arvix_results": formatted_search_docs}
+# load the system prompt from the file
+with open("system_prompt.txt", "r", encoding="utf-8") as f:
+    system_prompt = f.read()
+# System message
+sys_msg = SystemMessage(content=system_prompt)
+# build a retriever with existing supabase
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #  dim=768
+supabase: Client = create_client(
+    os.environ.get("SUPABASE_URL"),
+    os.environ.get("SUPABASE_SERVICE_KEY"))
+vector_store = SupabaseVectorStore(
+    client=supabase,
+    embedding= embeddings,
+    table_name=os.getenv('TABLE_NAME'),
+    query_name=os.getenv('QUERY_NAME'),
+)
+create_retriever_tool = create_retriever_tool(
+    retriever=vector_store.as_retriever(),
+    name="Question Search",
+    description="A tool to retrieve similar questions from a vector store.",
+)
+tools = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
+    wiki_search,
+    web_search,
+    arvix_search,
+]
+# Build graph function
+def build_graph(provider: str = "groq"):
+    """Build the graph"""
+    # Load environment variables from .env file
+    if provider == "google":
+        # Google Gemini
+        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
+    elif provider == "groq":
+        print("choose groq=====================================")
+        # Groq https://console.groq.com/docs/models
+        llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
+    elif provider == "huggingface":
+        print("choose huggingface===============================================")
+        # TODO: Add huggingface endpoint
+        llm = ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                model='Meta-DeepLearning/llama-2-7b-chat-hf',
+                endpoint_url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
+                temperature=0,
+            ),
+        )
+    else:
+        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
+    # Bind tools to LLM
+    llm_with_tools = llm.bind_tools(tools)
+    # Node
+    def assistant(state: MessagesState):
+        """Assistant node"""
+        return {"messages": [llm_with_tools.invoke( state["messages"])]}
+    def retriever(state: MessagesState):
+        """Retriever node"""
+        similar_question = vector_store.similarity_search(state["messages"][0].content)
+        example_msg = [HumanMessage(
+            content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
+        )]
+        return {"messages": [sys_msg] +state["messages"] + example_msg}
+    builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+    )
+    builder.add_edge("tools", "assistant")
+    # Compile graph
+    return builder.compile()
+# test
+if __name__ == "__main__":
+    question = "What's the last line of the rhyme under the flavor name on the headstone visible in the background of the photo of the oldest flavor's headstone in the Ben & Jerry's online flavor graveyard as of the end of 2022?"
+    # Build the graph
+    graph = build_graph(provider="groq")
+    # Run the graph
+    messages = [HumanMessage(content=question)]
+    messages = graph.invoke({"messages": messages})
+    for m in messages["messages"]:
+        m.pretty_print()

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -13,11 +15,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import requests
 import inspect
 import pandas as pd
+from agent import build_graph
+from langchain_core.messages import HumanMessage, SystemMessage
 # (Keep Constants as is)
 # --- Constants ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        self.graph = build_graph()
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        messages = [HumanMessage(content=question)]
+        messages = self.graph.invoke({"messages": messages})
+        answer = messages['messages'][-1].content.split("FINAL ANSWER: ")[-1]
+        print(f"Agent returning answer: {answer}")
+        return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

cheatsheet-transformers-large-language-models.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f4cba7c54bbe86caf70122b665b1b14d51abad2634bf5c6481eb62fd6a1a3f
+size 1587084

explore_metadata.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

metadata.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,2 +1,23 @@
 gradio
-requests

 gradio
+requests
+langchain
+langchain-community
+langchain-core
+langchain-google-genai
+langchain-huggingface
+langchain-groq
+langchain-tavily
+langchain-chroma
+langgraph
+huggingface_hub
+supabase
+arxiv
+pymupdf
+wikipedia
+pgvector
+python-dotenv
+gradio[oauth]>=4.25.0
+sentence-transformers
+numpy<2
+duckduckgo-search
+langchain_openai

retriever.py ADDED Viewed

	@@ -0,0 +1,57 @@

+#build retriever on supabase
+#create project, table, indexes, and functions
+#create client with url and key
+#insert data with embedding
+#
+# Load metadata.jsonl
+import json
+import os
+from dotenv import load_dotenv
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from supabase.client import Client, create_client
+from langchain.schema import Document
+# Load the metadata.jsonl file
+with open('metadata.jsonl', 'r') as jsonl_file:
+    json_list = list(jsonl_file)
+json_QA = []
+for json_str in json_list:
+    json_data = json.loads(json_str)
+    json_QA.append(json_data)
+### build a vector database based on the metadata.jsonl
+# https://python.langchain.com/docs/integrations/vectorstores/supabase/
+load_dotenv()
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #  dim=768
+supabase_url = os.environ.get("SUPABASE_URL")
+supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
+supabase: Client = create_client(supabase_url, supabase_key)
+# wrap the metadata.jsonl's questions and answers into a list of document
+docs = []
+for sample in json_QA:
+    content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}"
+    doc = {
+        "content" : content,
+        "metadata" : { # meatadata的格式必须时source键，否则会报错
+            "source" : sample['task_id']
+        },
+        "embedding" : embeddings.embed_query(content),
+    }
+    docs.append(doc)
+table_name = os.environ.get('TABLE_NAME')
+# upload the documents to the vector database
+try:
+    response = (
+        supabase.table("documents")
+        .insert(docs)
+        .execute()
+    )
+except Exception as exception:
+    print("Error inserting data into Supabase:", exception)

steps.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+#give yourself more patiences
+1. explore metadata, check each keys
+2. define retriever
+supabase?
+relational database?, embeddings, content, id, ...
+create a project, and a table + columns first emm...
+https://supabase.com/dashboard/project/ohzwldyjckkuzbybaixs/editor/17248
+enable vector in extensions under database
+create table public.documents (
+  id bigint generated by default as identity primary key,
+  content text,
+  metadata json,
+  embedding vector(768),
+  similarity float
+);
+create index for embedding!!!
+add functions, advanced settings, sql language
+create index on documents using hnsw (embedding vector_ip_ops);
+alter table documents enable row level security;
+create function match_documents_langchain (
+  query_embedding vector (768)
+)
+returns setof documents
+language plpgsql
+as $$
+begin
+  return query
+  select *
+  from documents
+  order by documents.embedding <#> query_embedding
+  limit 1;
+end;
+$$;
+3. define agent
+4. define gradio

system_prompt.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+You are a helpful assistant tasked with answering questions using a set of tools.
+If the tool is not available, you can try to find the information online. You can also use your own knowledge to answer the question.
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
+Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+==========================
+Here is a few examples showing you how to answer the question step by step.
+Question 1: Compute the check digit the Tropicos ID for the Order Helotiales would have if it were an ISBN-10 number.
+Steps:
+1. Search "Tropicos ID Order Helotiales"
+2. Find the correct ID on the first result
+3. Search "isbn 10 check digit calculator" or calculate check digit by hand
+Tools:
+1. web browser
+2. search engine
+3. calculator
+Final Answer: 3
+Question 2: What's the last line of the rhyme under the flavor name on the headstone visible in the background of the photo of the oldest flavor's headstone in the Ben & Jerry's online flavor graveyard as of the end of 2022?
+Steps:
+1. Searched "ben and jerrys flavor graveyard" on Google search.
+2. Opened "Flavor Graveyard" on www.benjerry.com.
+3. Opened each flavor to find the oldest one (Dastardly Mash).
+4. Deciphered the blurry name on the headstone behind it (Miz Jelena's Sweet Potato Pie).
+5. Scrolled down to Miz Jelena's Sweet Potato Pie.
+6. Copied the last line of the rhyme.
+7. (Optional) Copied the URL.
+8. Searched "internet archive" on Google search.
+9. Opened the Wayback Machine.
+10. Entered the URL.
+11. Loaded the last 2022 page.
+12. Confirmed the information was the same.
+Tools:
+1. Image recognition tools
+2. Web browser
+3. Search engine
+Final Answer: So we had to let it die.
+==========================
+Now, please answer the following question step by step.

生成特定图片.png ADDED Viewed

Git LFS Details

SHA256: 38d59b59423140d151c4fefa3b43876f9602043b0f8674da46a7b3cfac68d194
Pointer size: 131 Bytes
Size of remote file: 922 kB