super_agent / tools /retrievers.py
lezaf
Add bunch of updates in agent
dfad45c
raw
history blame
2.04 kB
from langchain_core.tools import tool
from langchain_community.document_loaders import ArxivLoader
from langchain_community.retrievers import WikipediaRetriever
@tool
def arxiv_search(query: str) -> str:
"""
Search Arxiv for academic papers based on a query and return summaries of top results.
Args:
query (str): The search query for Arxiv.
Returns:
str: Summary of the top few relevant papers from Arxiv.
"""
try:
loader = ArxivLoader(query=query, load_max_docs=2)
documents = loader.load()
if not documents:
return "No relevant papers found on Arxiv."
# Format and return top paper summaries
results = []
for doc in documents:
title = doc.metadata.get("Title", "No Title")
published = doc.metadata.get("Published", "Unknown date")
url = doc.metadata.get("entry_id", "No URL")
summary = doc.page_content[:500] # limit summary length
results.append(f"Title: {title}\nPublished: {published}\nURL: {url}\nSummary: {summary}\n")
return "\n---\n".join(results)
except Exception as e:
return f"An error occurred while searching Arxiv: {str(e)}"
@tool
def wikipedia_search(query: str) -> str:
"""
Search Wikipedia for a given query and return a summary of the top result.
Args:
query (str): The search term.
Returns:
str: A summary of the most relevant Wikipedia entry.
"""
wikipedia_retriever = WikipediaRetriever(load_max_docs=1)
documents = wikipedia_retriever.get_relevant_documents(query)
if not documents:
return "No relevant Wikipedia articles found."
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" title="{doc.metadata.get("title", "")}"/>\n{doc.metadata["summary"]}\n</Document>'
for doc in documents
])
# Return the content of the top document
return formatted_search_docs