Update new files
Browse files- agent.py +240 -217
- app.py +3 -7
- prompt.txt +5 -0
- requirements.txt +8 -12
agent.py
CHANGED
@@ -1,259 +1,307 @@
|
|
1 |
-
"""LangGraph Agent"""
|
2 |
import os
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import
|
6 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from langgraph.graph import START, StateGraph, MessagesState
|
8 |
from langgraph.prebuilt import tools_condition
|
9 |
from langgraph.prebuilt import ToolNode
|
10 |
-
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
11 |
-
from langchain_groq import ChatGroq
|
12 |
-
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
|
13 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
14 |
-
from langchain_community.document_loaders import WikipediaLoader
|
15 |
-
from langchain_community.document_loaders import ArxivLoader
|
16 |
from langchain_community.vectorstores import SupabaseVectorStore
|
17 |
-
from langchain_core.messages import SystemMessage, HumanMessage
|
18 |
-
from langchain_core.tools import tool
|
19 |
-
from langchain.tools.retriever import create_retriever_tool
|
20 |
from supabase.client import Client, create_client
|
21 |
-
from
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
|
30 |
Args:
|
31 |
-
|
32 |
-
|
|
|
33 |
|
34 |
Returns:
|
35 |
-
|
36 |
"""
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
"""
|
42 |
-
Add two integers.
|
43 |
|
44 |
Args:
|
45 |
-
|
46 |
-
b (int): The second integer.
|
47 |
|
48 |
Returns:
|
49 |
-
|
50 |
"""
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
"""
|
56 |
-
Subtract one integer from another.
|
57 |
|
58 |
Args:
|
59 |
-
|
60 |
-
|
61 |
|
62 |
Returns:
|
63 |
-
|
64 |
"""
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
@tool
|
68 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
"""
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
|
72 |
Args:
|
73 |
-
a (int):
|
74 |
-
b (int):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
|
|
|
|
|
|
|
|
76 |
Returns:
|
77 |
-
float:
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
Raises:
|
80 |
-
ValueError: If b is zero
|
81 |
"""
|
82 |
if b == 0:
|
83 |
-
raise ValueError("Cannot divide by zero
|
84 |
-
return a / b
|
85 |
|
86 |
@tool
|
87 |
def modulus(a: int, b: int) -> int:
|
88 |
-
"""
|
89 |
-
Compute the modulus (remainder) of two integers.
|
90 |
|
91 |
Args:
|
92 |
-
a (int):
|
93 |
-
b (int):
|
94 |
-
|
95 |
Returns:
|
96 |
-
int:
|
|
|
|
|
|
|
97 |
"""
|
|
|
|
|
98 |
return a % b
|
99 |
|
100 |
@tool
|
101 |
-
def power(
|
102 |
-
"""
|
103 |
-
Raise a number to the power of another number.
|
104 |
|
105 |
Args:
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
Returns:
|
110 |
-
float:
|
111 |
"""
|
112 |
-
return
|
113 |
|
114 |
@tool
|
115 |
-
def square_root(
|
116 |
-
"""
|
117 |
-
Compute the square root of a number. Returns a complex number if input is negative.
|
118 |
|
119 |
Args:
|
120 |
-
|
121 |
-
|
122 |
Returns:
|
123 |
-
float
|
|
|
|
|
|
|
124 |
"""
|
125 |
-
if
|
126 |
-
|
127 |
-
return
|
128 |
-
|
129 |
-
### =============== DOCUMENT PROCESSING TOOLS =============== ###
|
130 |
|
131 |
@tool
|
132 |
-
def
|
133 |
-
"""
|
134 |
-
Save text content to a file and return the file path.
|
135 |
|
136 |
Args:
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
Returns:
|
141 |
-
|
|
|
|
|
|
|
142 |
"""
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
filepath = temp_file.name
|
147 |
-
else:
|
148 |
-
filepath = os.path.join(temp_dir, filename)
|
149 |
-
|
150 |
-
with open(filepath, "w") as f:
|
151 |
-
f.write(content)
|
152 |
-
|
153 |
-
return f"File saved to {filepath}. You can read this file to process its contents."
|
154 |
|
155 |
@tool
|
156 |
-
def
|
157 |
-
"""
|
158 |
-
Analyze a CSV file and answer a question about its data.
|
159 |
|
160 |
Args:
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
Returns:
|
165 |
-
|
166 |
"""
|
167 |
-
|
168 |
-
df = pd.read_csv(file_path)
|
169 |
-
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
|
170 |
-
result += f"Columns: {', '.join(df.columns)}\n\n"
|
171 |
-
result += "Summary statistics:\n"
|
172 |
-
result += str(df.describe())
|
173 |
-
return result
|
174 |
-
except Exception as e:
|
175 |
-
return f"Error analyzing CSV file: {str(e)}"
|
176 |
|
177 |
@tool
|
178 |
-
def
|
179 |
-
"""
|
180 |
-
Analyze an Excel file and answer a question about its data.
|
181 |
|
182 |
Args:
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
186 |
Returns:
|
187 |
-
str:
|
188 |
"""
|
189 |
try:
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
except Exception as e:
|
199 |
-
return f"
|
200 |
|
201 |
@tool
|
202 |
-
def
|
203 |
-
"""
|
204 |
-
Search Wikipedia for a query and return up to 2 results.
|
205 |
|
206 |
Args:
|
207 |
-
|
208 |
-
|
|
|
|
|
209 |
Returns:
|
210 |
-
str:
|
211 |
-
"""
|
212 |
-
search_docs = WikipediaLoader(query=input, load_max_docs=2).load()
|
213 |
-
formatted_search_docs = "\n\n---\n\n".join(
|
214 |
-
[
|
215 |
-
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
216 |
-
for doc in search_docs
|
217 |
-
])
|
218 |
-
return {"wiki_results": formatted_search_docs}
|
219 |
-
|
220 |
-
@tool
|
221 |
-
def web_search(input: str) -> str:
|
222 |
"""
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
formatted_search_docs = "\n\n---\n\n".join(
|
233 |
-
[
|
234 |
-
(
|
235 |
-
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
236 |
-
if hasattr(doc, "metadata") and hasattr(doc, "page_content")
|
237 |
-
else
|
238 |
-
f'<Document source="{doc.get("source", "")}" page="{doc.get("page", "")}"/>\n{doc.get("content", doc.get("page_content", ""))}\n</Document>'
|
239 |
-
)
|
240 |
-
for doc in search_docs
|
241 |
-
]
|
242 |
-
)
|
243 |
-
return {"web_results": formatted_search_docs}
|
244 |
|
245 |
@tool
|
246 |
-
def arvix_search(
|
247 |
-
"""
|
248 |
-
Search Arxiv for a query and return up to 3 results.
|
249 |
|
250 |
Args:
|
251 |
-
|
252 |
-
|
253 |
Returns:
|
254 |
-
str:
|
255 |
"""
|
256 |
-
search_docs = ArxivLoader(query=
|
257 |
formatted_search_docs = "\n\n---\n\n".join(
|
258 |
[
|
259 |
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
|
@@ -261,24 +309,22 @@ def arvix_search(input: str) -> str:
|
|
261 |
])
|
262 |
return {"arvix_results": formatted_search_docs}
|
263 |
|
264 |
-
#
|
265 |
-
with open("
|
266 |
system_prompt = f.read()
|
267 |
|
268 |
-
# System message
|
269 |
sys_msg = SystemMessage(content=system_prompt)
|
270 |
|
271 |
-
#
|
272 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
273 |
-
#embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")
|
274 |
supabase: Client = create_client(
|
275 |
os.environ.get("SUPABASE_URL"),
|
276 |
-
os.environ.get("
|
277 |
vector_store = SupabaseVectorStore(
|
278 |
client=supabase,
|
279 |
-
embedding=
|
280 |
table_name="documents",
|
281 |
-
query_name="
|
282 |
)
|
283 |
create_retriever_tool = create_retriever_tool(
|
284 |
retriever=vector_store.as_retriever(),
|
@@ -287,34 +333,27 @@ create_retriever_tool = create_retriever_tool(
|
|
287 |
)
|
288 |
|
289 |
tools = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
multiply,
|
291 |
-
add,
|
292 |
subtract,
|
293 |
-
|
294 |
-
modulus,
|
295 |
-
power,
|
296 |
-
square_root,
|
297 |
-
wiki_search,
|
298 |
-
web_search,
|
299 |
-
arvix_search,
|
300 |
-
save_and_read_file,
|
301 |
-
analyze_csv_file,
|
302 |
-
analyze_excel_file,
|
303 |
-
# create_retriever_tool
|
304 |
]
|
305 |
|
306 |
-
# Build graph function
|
307 |
def build_graph(provider: str = "groq"):
|
308 |
"""Build the graph"""
|
309 |
-
# Load environment variables from .env file
|
310 |
if provider == "google":
|
311 |
-
# Google Gemini
|
312 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
313 |
elif provider == "groq":
|
314 |
-
|
315 |
-
llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
|
316 |
elif provider == "huggingface":
|
317 |
-
# TODO: Add huggingface endpoint
|
318 |
llm = ChatHuggingFace(
|
319 |
llm=HuggingFaceEndpoint(
|
320 |
url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
|
@@ -323,26 +362,17 @@ def build_graph(provider: str = "groq"):
|
|
323 |
)
|
324 |
else:
|
325 |
raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
|
326 |
-
|
327 |
llm_with_tools = llm.bind_tools(tools)
|
328 |
|
329 |
-
# Node
|
330 |
def assistant(state: MessagesState):
|
331 |
-
"""Assistant node"""
|
332 |
return {"messages": [llm_with_tools.invoke(state["messages"])]}
|
333 |
|
334 |
def retriever(state: MessagesState):
|
335 |
-
"""Retriever node"""
|
336 |
similar_question = vector_store.similarity_search(state["messages"][0].content)
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
|
341 |
-
)
|
342 |
-
else:
|
343 |
-
example_msg = HumanMessage(
|
344 |
-
content="No similar questions found in the database.",
|
345 |
-
)
|
346 |
return {"messages": [sys_msg] + state["messages"] + [example_msg]}
|
347 |
|
348 |
builder = StateGraph(MessagesState)
|
@@ -357,17 +387,10 @@ def build_graph(provider: str = "groq"):
|
|
357 |
)
|
358 |
builder.add_edge("tools", "assistant")
|
359 |
|
360 |
-
# Compile graph
|
361 |
return builder.compile()
|
362 |
|
363 |
-
# test
|
364 |
if __name__ == "__main__":
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
# Run the graph
|
370 |
-
messages = [HumanMessage(content=question)]
|
371 |
-
messages = graph.invoke({"messages": messages})
|
372 |
-
for m in messages["messages"]:
|
373 |
-
m.pretty_print()
|
|
|
|
|
1 |
import os
|
2 |
+
from langchain.tools import tool
|
3 |
+
from typing import Union, List
|
4 |
+
from decimal import Decimal, getcontext
|
5 |
from dotenv import load_dotenv
|
6 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
7 |
+
import warnings
|
8 |
+
import wikipedia
|
9 |
+
from bs4 import BeautifulSoup
|
10 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
11 |
+
import requests
|
12 |
+
from typing import Optional
|
13 |
+
import re
|
14 |
+
from langchain_community.document_loaders import ArxivLoader
|
15 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
16 |
+
from langchain_groq import ChatGroq
|
17 |
+
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
|
18 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
19 |
from langgraph.graph import START, StateGraph, MessagesState
|
20 |
from langgraph.prebuilt import tools_condition
|
21 |
from langgraph.prebuilt import ToolNode
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
from langchain_community.vectorstores import SupabaseVectorStore
|
|
|
|
|
|
|
23 |
from supabase.client import Client, create_client
|
24 |
+
from langchain.tools.retriever import create_retriever_tool
|
25 |
|
26 |
+
# Initialize search tool
|
27 |
+
search_tool = DuckDuckGoSearchRun()
|
28 |
+
getcontext().prec = 10
|
29 |
+
# Initial configuration
|
30 |
+
load_dotenv() # Load environment variables
|
31 |
|
32 |
+
# Fix for parser warning
|
33 |
+
wikipedia.wikipedia._BeautifulSoup = lambda html: BeautifulSoup(html, 'html.parser')
|
34 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="wikipedia")
|
35 |
+
|
36 |
+
def configure_wikipedia(language: str = 'en', top_k_results: int = 3, max_chars: int = 4000):
|
37 |
+
"""Configure Wikipedia search settings
|
38 |
|
39 |
Args:
|
40 |
+
language (str): Search language (default 'en')
|
41 |
+
top_k_results (int): Number of results to return
|
42 |
+
max_chars (int): Maximum character limit per result
|
43 |
|
44 |
Returns:
|
45 |
+
WikipediaAPIWrapper: Configured WikipediaAPIWrapper instance
|
46 |
"""
|
47 |
+
wikipedia.set_lang(language)
|
48 |
+
return WikipediaAPIWrapper(
|
49 |
+
wiki_client=wikipedia,
|
50 |
+
top_k_results=top_k_results,
|
51 |
+
doc_content_chars_max=max_chars
|
52 |
+
)
|
53 |
|
54 |
+
def format_search_result(raw_result: str) -> str:
|
55 |
+
"""Format Wikipedia search results for better readability
|
|
|
|
|
56 |
|
57 |
Args:
|
58 |
+
raw_result (str): Raw output from WikipediaAPIWrapper
|
|
|
59 |
|
60 |
Returns:
|
61 |
+
str: Formatted search result
|
62 |
"""
|
63 |
+
if "Page: " in raw_result and "Summary: " in raw_result:
|
64 |
+
parts = raw_result.split("Summary: ")
|
65 |
+
page_part = parts[0].replace("Page: ", "").strip()
|
66 |
+
summary_part = parts[1].strip()
|
67 |
+
return f"📚 Page: {page_part}\n\n📝 Summary: {summary_part}"
|
68 |
+
return raw_result
|
69 |
|
70 |
+
def search_wikipedia(query: str, language: str = 'en') -> str:
|
71 |
+
"""Perform Wikipedia searches with error handling
|
|
|
|
|
72 |
|
73 |
Args:
|
74 |
+
query (str): Search term
|
75 |
+
language (str): Search language (optional)
|
76 |
|
77 |
Returns:
|
78 |
+
str: Formatted result or error message
|
79 |
"""
|
80 |
+
try:
|
81 |
+
wikipedia_tool = configure_wikipedia(language=language)
|
82 |
+
result = wikipedia_tool.run(query)
|
83 |
+
return format_search_result(result)
|
84 |
+
except Exception as e:
|
85 |
+
return f"Search error: {str(e)}"
|
86 |
|
87 |
@tool
|
88 |
+
def add(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
|
89 |
+
"""Add two numbers together.
|
90 |
+
|
91 |
+
Args:
|
92 |
+
a (Union[int, float]): First number
|
93 |
+
b (Union[int, float]): Second number
|
94 |
+
|
95 |
+
Returns:
|
96 |
+
Union[int, float]: Sum of a and b
|
97 |
"""
|
98 |
+
return float(Decimal(str(a)) + Decimal(str(b)))
|
99 |
+
|
100 |
+
@tool
|
101 |
+
def subtract(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
|
102 |
+
"""Subtract b from a.
|
103 |
|
104 |
Args:
|
105 |
+
a (Union[int, float]): Minuend
|
106 |
+
b (Union[int, float]): Subtrahend
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
Union[int, float]: Difference between a and b
|
110 |
+
"""
|
111 |
+
return float(Decimal(str(a)) - Decimal(str(b)))
|
112 |
+
|
113 |
+
@tool
|
114 |
+
def multiply(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
|
115 |
+
"""Multiply two numbers.
|
116 |
|
117 |
+
Args:
|
118 |
+
a (Union[int, float]): First factor
|
119 |
+
b (Union[int, float]): Second factor
|
120 |
+
|
121 |
Returns:
|
122 |
+
Union[int, float]: Product of a and b
|
123 |
+
"""
|
124 |
+
return float(Decimal(str(a)) * Decimal(str(b)))
|
125 |
+
|
126 |
+
@tool
|
127 |
+
def divide(a: Union[int, float], b: Union[int, float]) -> float:
|
128 |
+
"""Divide a by b.
|
129 |
|
130 |
+
Args:
|
131 |
+
a (Union[int, float]): Dividend
|
132 |
+
b (Union[int, float]): Divisor
|
133 |
+
|
134 |
+
Returns:
|
135 |
+
float: Quotient of a divided by b
|
136 |
+
|
137 |
Raises:
|
138 |
+
ValueError: If b is zero
|
139 |
"""
|
140 |
if b == 0:
|
141 |
+
raise ValueError("Cannot divide by zero")
|
142 |
+
return float(Decimal(str(a)) / Decimal(str(b)))
|
143 |
|
144 |
@tool
|
145 |
def modulus(a: int, b: int) -> int:
|
146 |
+
"""Find the remainder of a divided by b.
|
|
|
147 |
|
148 |
Args:
|
149 |
+
a (int): Dividend
|
150 |
+
b (int): Divisor
|
151 |
+
|
152 |
Returns:
|
153 |
+
int: Remainder of a divided by b
|
154 |
+
|
155 |
+
Raises:
|
156 |
+
ValueError: If b is zero
|
157 |
"""
|
158 |
+
if b == 0:
|
159 |
+
raise ValueError("Cannot divide by zero for modulus")
|
160 |
return a % b
|
161 |
|
162 |
@tool
|
163 |
+
def power(base: Union[int, float], exponent: Union[int, float]) -> Union[int, float]:
|
164 |
+
"""Raise base to the power of exponent.
|
|
|
165 |
|
166 |
Args:
|
167 |
+
base (Union[int, float]): The base number
|
168 |
+
exponent (Union[int, float]): The exponent
|
169 |
+
|
170 |
Returns:
|
171 |
+
Union[int, float]: Result of base^exponent
|
172 |
"""
|
173 |
+
return float(Decimal(str(base)) ** Decimal(str(exponent)))
|
174 |
|
175 |
@tool
|
176 |
+
def square_root(x: Union[int, float]) -> float:
|
177 |
+
"""Calculate the square root of a number.
|
|
|
178 |
|
179 |
Args:
|
180 |
+
x (Union[int, float]): Number to find the square root of
|
181 |
+
|
182 |
Returns:
|
183 |
+
float: Square root of x
|
184 |
+
|
185 |
+
Raises:
|
186 |
+
ValueError: If x is negative
|
187 |
"""
|
188 |
+
if x < 0:
|
189 |
+
raise ValueError("Cannot calculate square root of negative number")
|
190 |
+
return float(Decimal(str(x)).sqrt())
|
|
|
|
|
191 |
|
192 |
@tool
|
193 |
+
def average(numbers: List[Union[int, float]]) -> float:
|
194 |
+
"""Calculate the average of a list of numbers.
|
|
|
195 |
|
196 |
Args:
|
197 |
+
numbers (List[Union[int, float]]): List of numbers
|
198 |
+
|
|
|
199 |
Returns:
|
200 |
+
float: Average of the numbers
|
201 |
+
|
202 |
+
Raises:
|
203 |
+
ValueError: If list is empty
|
204 |
"""
|
205 |
+
if not numbers:
|
206 |
+
raise ValueError("Cannot calculate average of empty list")
|
207 |
+
return float(sum(Decimal(str(n)) for n in numbers) / Decimal(len(numbers)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
@tool
|
210 |
+
def percentage(value: Union[int, float], percent: Union[int, float]) -> float:
|
211 |
+
"""Calculate percentage of a value.
|
|
|
212 |
|
213 |
Args:
|
214 |
+
value (Union[int, float]): Base value
|
215 |
+
percent (Union[int, float]): Percentage to calculate
|
216 |
+
|
217 |
Returns:
|
218 |
+
float: Result of value * (percent/100)
|
219 |
"""
|
220 |
+
return float(Decimal(str(value)) * (Decimal(str(percent)) / Decimal(100)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
@tool
|
223 |
+
def web_search(query: str, site: Optional[str] = None, max_results: int = 5) -> str:
|
224 |
+
"""Perform internet searches. Can search the entire web or specific websites.
|
|
|
225 |
|
226 |
Args:
|
227 |
+
query (str): Search terms
|
228 |
+
site (Optional[str]): Specific website to search (e.g., 'wikipedia.org')
|
229 |
+
max_results (int): Maximum number of results to return
|
230 |
+
|
231 |
Returns:
|
232 |
+
str: Formatted search results
|
233 |
"""
|
234 |
try:
|
235 |
+
if site and not re.match(r'^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', site.split('/')[0]):
|
236 |
+
return "Error: Invalid website format. Use 'domain.ext'"
|
237 |
+
|
238 |
+
search_query = f"{query} site:{site}" if site else query
|
239 |
+
results = search_tool.run(search_query)
|
240 |
+
|
241 |
+
formatted = []
|
242 |
+
for i, result in enumerate(results.split('\n\n')[:max_results]):
|
243 |
+
if result.strip():
|
244 |
+
formatted.append(f"{i+1}. {result.strip()}")
|
245 |
+
|
246 |
+
header = f"Results from {site}" if site else "Search results"
|
247 |
+
return f"{header}:\n\n" + '\n\n'.join(formatted) if formatted else "No results found"
|
248 |
+
|
249 |
except Exception as e:
|
250 |
+
return f"Search error: {str(e)}"
|
251 |
|
252 |
@tool
|
253 |
+
def scrape_page(url: str, search_term: Optional[str] = None, max_length: int = 3000) -> str:
|
254 |
+
"""Extract content from a specific webpage, optionally filtering by search term.
|
|
|
255 |
|
256 |
Args:
|
257 |
+
url (str): Full URL of the page to scrape
|
258 |
+
search_term (Optional[str]): Term to search within the page content
|
259 |
+
max_length (int): Maximum character length of returned content
|
260 |
+
|
261 |
Returns:
|
262 |
+
str: Relevant page content with source attribution
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
"""
|
264 |
+
try:
|
265 |
+
if not re.match(r'^https?://[^\s/$.?#].[^\s]*$', url):
|
266 |
+
return "Error: Invalid URL format"
|
267 |
+
|
268 |
+
headers = {
|
269 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
270 |
+
}
|
271 |
+
response = requests.get(url, headers=headers, timeout=15)
|
272 |
+
response.raise_for_status()
|
273 |
+
|
274 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
275 |
+
|
276 |
+
for element in soup(['script', 'style', 'nav', 'footer', 'iframe', 'img']):
|
277 |
+
element.decompose()
|
278 |
+
|
279 |
+
text = '\n'.join(line.strip() for line in soup.get_text().split('\n') if line.strip())
|
280 |
+
|
281 |
+
if search_term:
|
282 |
+
lines = [line for line in text.split('\n') if search_term.lower() in line.lower()]
|
283 |
+
text = '\n'.join(lines[:15])
|
284 |
+
|
285 |
+
text = text[:max_length] + ('...' if len(text) > max_length else '')
|
286 |
+
|
287 |
+
return f"Content from {url}:\n\n{text}"
|
288 |
|
289 |
+
except requests.exceptions.RequestException as e:
|
290 |
+
return f"Network error: {str(e)}"
|
291 |
+
except Exception as e:
|
292 |
+
return f"Scraping error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
@tool
|
295 |
+
def arvix_search(query: str) -> str:
|
296 |
+
"""Search Arxiv for a query and return maximum 3 results.
|
|
|
297 |
|
298 |
Args:
|
299 |
+
query (str): The search query
|
300 |
+
|
301 |
Returns:
|
302 |
+
str: Formatted search results
|
303 |
"""
|
304 |
+
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
|
305 |
formatted_search_docs = "\n\n---\n\n".join(
|
306 |
[
|
307 |
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
|
|
|
309 |
])
|
310 |
return {"arvix_results": formatted_search_docs}
|
311 |
|
312 |
+
# Load system prompt
|
313 |
+
with open("prompt.txt", "r", encoding="utf-8") as f:
|
314 |
system_prompt = f.read()
|
315 |
|
|
|
316 |
sys_msg = SystemMessage(content=system_prompt)
|
317 |
|
318 |
+
# Build retriever
|
319 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
|
|
320 |
supabase: Client = create_client(
|
321 |
os.environ.get("SUPABASE_URL"),
|
322 |
+
os.environ.get("SUPABASE_SERVICE_KEY"))
|
323 |
vector_store = SupabaseVectorStore(
|
324 |
client=supabase,
|
325 |
+
embedding=embeddings,
|
326 |
table_name="documents",
|
327 |
+
query_name="match_documents",
|
328 |
)
|
329 |
create_retriever_tool = create_retriever_tool(
|
330 |
retriever=vector_store.as_retriever(),
|
|
|
333 |
)
|
334 |
|
335 |
tools = [
|
336 |
+
arvix_search,
|
337 |
+
scrape_page,
|
338 |
+
web_search,
|
339 |
+
percentage,
|
340 |
+
average,
|
341 |
+
square_root,
|
342 |
+
power,
|
343 |
+
modulus,
|
344 |
+
divide,
|
345 |
multiply,
|
|
|
346 |
subtract,
|
347 |
+
add,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
]
|
349 |
|
|
|
350 |
def build_graph(provider: str = "groq"):
|
351 |
"""Build the graph"""
|
|
|
352 |
if provider == "google":
|
|
|
353 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
354 |
elif provider == "groq":
|
355 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
|
|
|
356 |
elif provider == "huggingface":
|
|
|
357 |
llm = ChatHuggingFace(
|
358 |
llm=HuggingFaceEndpoint(
|
359 |
url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
|
|
|
362 |
)
|
363 |
else:
|
364 |
raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
|
365 |
+
|
366 |
llm_with_tools = llm.bind_tools(tools)
|
367 |
|
|
|
368 |
def assistant(state: MessagesState):
|
|
|
369 |
return {"messages": [llm_with_tools.invoke(state["messages"])]}
|
370 |
|
371 |
def retriever(state: MessagesState):
|
|
|
372 |
similar_question = vector_store.similarity_search(state["messages"][0].content)
|
373 |
+
example_msg = HumanMessage(
|
374 |
+
content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
|
375 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
return {"messages": [sys_msg] + state["messages"] + [example_msg]}
|
377 |
|
378 |
builder = StateGraph(MessagesState)
|
|
|
387 |
)
|
388 |
builder.add_edge("tools", "assistant")
|
389 |
|
|
|
390 |
return builder.compile()
|
391 |
|
|
|
392 |
if __name__ == "__main__":
|
393 |
+
app = build_graph(provider="groq")
|
394 |
+
inputs = {"messages": [HumanMessage(content="I was reading this book last year, and it had this really cool description where it referred to something as looking like a manta ray. I\u2019m trying to figure out when I read that part, but I can\u2019t find what book it\u2019s from. This file I attached has a list of the books I read last year, with the date I started and finished reading each one. I\u2019d like for you to tell me the month in which I likely read the passage about the manta ray.")]}
|
395 |
+
result = app.invoke(inputs)
|
396 |
+
print(result)
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,24 +1,18 @@
|
|
1 |
-
""" Basic Agent Evaluation Runner"""
|
2 |
import os
|
3 |
-
import inspect
|
4 |
import gradio as gr
|
5 |
import requests
|
|
|
6 |
import pandas as pd
|
7 |
from langchain_core.messages import HumanMessage
|
8 |
from agent import build_graph
|
9 |
|
10 |
-
|
11 |
-
|
12 |
# (Keep Constants as is)
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
15 |
|
16 |
# --- Basic Agent Definition ---
|
17 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
18 |
-
|
19 |
-
|
20 |
class BasicAgent:
|
21 |
-
"""A langgraph agent."""
|
22 |
def __init__(self):
|
23 |
print("BasicAgent initialized.")
|
24 |
self.graph = build_graph()
|
@@ -159,9 +153,11 @@ with gr.Blocks() as demo:
|
|
159 |
gr.Markdown(
|
160 |
"""
|
161 |
**Instructions:**
|
|
|
162 |
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
163 |
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
164 |
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
|
|
165 |
---
|
166 |
**Disclaimers:**
|
167 |
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
|
|
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
import inspect
|
5 |
import pandas as pd
|
6 |
from langchain_core.messages import HumanMessage
|
7 |
from agent import build_graph
|
8 |
|
|
|
|
|
9 |
# (Keep Constants as is)
|
10 |
# --- Constants ---
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
|
13 |
# --- Basic Agent Definition ---
|
14 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
|
|
|
|
15 |
class BasicAgent:
|
|
|
16 |
def __init__(self):
|
17 |
print("BasicAgent initialized.")
|
18 |
self.graph = build_graph()
|
|
|
153 |
gr.Markdown(
|
154 |
"""
|
155 |
**Instructions:**
|
156 |
+
|
157 |
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
158 |
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
159 |
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
160 |
+
|
161 |
---
|
162 |
**Disclaimers:**
|
163 |
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
prompt.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a helpful assistant tasked with answering questions using a set of tools.
|
2 |
+
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
3 |
+
FINAL ANSWER: [YOUR FINAL ANSWER].
|
4 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
5 |
+
Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
|
requirements.txt
CHANGED
@@ -1,18 +1,14 @@
|
|
1 |
-
|
2 |
requests
|
|
|
3 |
langchain
|
4 |
langchain-community
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
8 |
langchain-groq
|
9 |
-
langchain-tavily
|
10 |
-
langchain-chroma
|
11 |
langgraph
|
12 |
-
huggingface_hub
|
13 |
supabase
|
14 |
-
|
15 |
-
|
16 |
-
wikipedia
|
17 |
-
pgvector
|
18 |
-
python-dotenv
|
|
|
1 |
+
gradio
|
2 |
requests
|
3 |
+
dotenv
|
4 |
langchain
|
5 |
langchain-community
|
6 |
+
wikipedia
|
7 |
+
langchain_experimental
|
8 |
+
duckduckgo-search
|
9 |
+
langchain_google_genai
|
10 |
langchain-groq
|
|
|
|
|
11 |
langgraph
|
|
|
12 |
supabase
|
13 |
+
langchain-huggingface
|
14 |
+
itsdangerous
|
|
|
|
|
|