Final_Assignment_Template_v2

Sleeping

App Files Files Community

patricksamuel commited on Jun 22

Commit

2815130

verified ·

1 Parent(s): c6b407a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -13

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ from bs4 import BeautifulSoup
 from pydantic import BaseModel, Field
 import nest_asyncio
 import requests
 #from agents.extensions.models.litellm_model import LitellmModel
@@ -29,26 +31,44 @@ nest_asyncio.apply()
 #Tools
 @function_tool
-def tavily_search(query: str) -> str:
     """
-    Perform a Tavily web search.
-    Args:
-        query (str): The search query string.
-    Returns:
-        str: Formatted search results.
     """
     try:
-        client = TavilyClient(os.getenv("TAVILY_API_KEY"))
-        results = client.search(query=query, max_results=5)
-        formatted = []
-        for result in results.get("results", []):
-            formatted.append(f"**Title**: {result['title']}\n**URL**: {result['url']}\n**Content**: {result['content']}\n")
-        return "\n\n".join(formatted) or "No results found."
     except Exception as e:
-        return f"Error using Tavily Search: {e}"
 @function_tool

 from pydantic import BaseModel, Field
 import nest_asyncio
 import requests
+from tavily import TavilyClient
+import re
 #from agents.extensions.models.litellm_model import LitellmModel
 #Tools
 @function_tool
+def visit_website(url: str) -> str:
     """
+    Extracts the main readable contents of a website at the given URL,
+    formats as markdown, and returns it as a string.
+    If there is an error, returns a concise error message.
     """
+    headers = {
+        "User-Agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/91.0.4472.124 Safari/537.36"
+        )
+    }
     try:
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        html_content = response.text
+        soup = BeautifulSoup(html_content, "html.parser")
+        # Remove unwanted tags for clarity
+        for tag in soup(["script", "style", "nav", "header", "footer", "aside", "meta"]):
+            tag.decompose()
+        # Extract main content; fallback to all text if .body missing
+        main_content = soup.body if soup.body else soup
+        markdown_text = markdownify(
+            str(main_content),
+            strip=["img", "iframe", "script", "meta", "button", "input", "svg"]
+        )
+        max_length = 5000  # Reduce if hitting timeouts or agent tool output limits
+        markdown_text = re.sub(r"\n\s*\n", "\n\n", markdown_text[:max_length])
+        return markdown_text.strip() if markdown_text else "No readable text found on this page."
     except Exception as e:
+        return f"Error fetching the website: {e}"
 @function_tool