patricksamuel commited on
Commit
2815130
·
verified ·
1 Parent(s): c6b407a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -13
app.py CHANGED
@@ -14,6 +14,8 @@ from bs4 import BeautifulSoup
14
  from pydantic import BaseModel, Field
15
  import nest_asyncio
16
  import requests
 
 
17
 
18
  #from agents.extensions.models.litellm_model import LitellmModel
19
 
@@ -29,26 +31,44 @@ nest_asyncio.apply()
29
  #Tools
30
 
31
  @function_tool
32
- def tavily_search(query: str) -> str:
33
  """
34
- Perform a Tavily web search.
35
- Args:
36
- query (str): The search query string.
37
- Returns:
38
- str: Formatted search results.
39
  """
 
 
 
 
 
 
 
40
  try:
41
- client = TavilyClient(os.getenv("TAVILY_API_KEY"))
42
- results = client.search(query=query, max_results=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- formatted = []
45
- for result in results.get("results", []):
46
- formatted.append(f"**Title**: {result['title']}\n**URL**: {result['url']}\n**Content**: {result['content']}\n")
47
 
48
- return "\n\n".join(formatted) or "No results found."
49
 
50
  except Exception as e:
51
- return f"Error using Tavily Search: {e}"
52
 
53
 
54
  @function_tool
 
14
  from pydantic import BaseModel, Field
15
  import nest_asyncio
16
  import requests
17
+ from tavily import TavilyClient
18
+ import re
19
 
20
  #from agents.extensions.models.litellm_model import LitellmModel
21
 
 
31
  #Tools
32
 
33
  @function_tool
34
+ def visit_website(url: str) -> str:
35
  """
36
+ Extracts the main readable contents of a website at the given URL,
37
+ formats as markdown, and returns it as a string.
38
+ If there is an error, returns a concise error message.
 
 
39
  """
40
+ headers = {
41
+ "User-Agent": (
42
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
43
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
44
+ "Chrome/91.0.4472.124 Safari/537.36"
45
+ )
46
+ }
47
  try:
48
+ response = requests.get(url, headers=headers, timeout=10)
49
+ response.raise_for_status()
50
+
51
+ html_content = response.text
52
+ soup = BeautifulSoup(html_content, "html.parser")
53
+
54
+ # Remove unwanted tags for clarity
55
+ for tag in soup(["script", "style", "nav", "header", "footer", "aside", "meta"]):
56
+ tag.decompose()
57
+
58
+ # Extract main content; fallback to all text if .body missing
59
+ main_content = soup.body if soup.body else soup
60
+ markdown_text = markdownify(
61
+ str(main_content),
62
+ strip=["img", "iframe", "script", "meta", "button", "input", "svg"]
63
+ )
64
 
65
+ max_length = 5000 # Reduce if hitting timeouts or agent tool output limits
66
+ markdown_text = re.sub(r"\n\s*\n", "\n\n", markdown_text[:max_length])
 
67
 
68
+ return markdown_text.strip() if markdown_text else "No readable text found on this page."
69
 
70
  except Exception as e:
71
+ return f"Error fetching the website: {e}"
72
 
73
 
74
  @function_tool