import requests import pandas as pd import os from bs4 import BeautifulSoup from typing import List, Dict, Any, Union from functools import lru_cache import urllib.parse from smolagents import Tool, WebSearchTool, WikipediaSearchTool, PythonInterpreterTool from pydantic import BaseModel, Field from transformers import pipeline # You'll need: pip install transformers torch accelerate from PIL import Image # ------------------ Simple wrapper tools to save loading time ------------------------ class CachedWebSearchTool(WebSearchTool): @lru_cache(maxsize=128) def run(self, query: str): # identical queries return instantly return super().run(query) class CachedWikiTool(WikipediaSearchTool): @lru_cache(maxsize=128) def run(self, page: str): return super().run(page) class PreloadedPythonTool(PythonInterpreterTool): """ A PythonInterpreterTool that automatically prepends the necessary imports (bs4, BeautifulSoup, regex) so you never hit NameError inside your code blocks. """ def run(self, code: str) -> str: preamble = ( "import bs4\n" "from bs4 import BeautifulSoup\n" "import regex\n" ) return super().run(preamble + code) # --------------------- Describe image file with text --------------------------- # class ImageContentDescriberTool(Tool): name: str = "describe_image_content" description: str = "Downloads an image from a URL and provides a textual description of its main content. It CANNOT solve complex puzzles like chess positions but can identify objects and scenes." inputs: Dict[str, Dict[str, Union[str, Any]]] = { "image_url": { "type": "string", "description": "The URL of the image to describe." } } output_type: str = "string" def forward(self, image_url: str) -> str: return describe_image_from_url(image_url) # Lazy-load the vision model image_captioner = None def describe_image_from_url(image_url: str) -> str: """Downloads an image from a URL and generates a text description.""" global image_captioner if image_captioner is None: try: print("Initializing Image Captioning model for the first time...") # Using a smaller, faster BLIP model. image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") print("Image Captioning model initialized.") except Exception as e: return f"Error: Could not initialize the image captioning model. Details: {e}" try: print(f"Downloading image from {image_url}...") image = Image.open(requests.get(image_url, stream=True, timeout=15).raw) print("Generating image description...") description = image_captioner(image)[0]['generated_text'] return f"Image description: {description}" except Exception as e: return f"An error occurred while processing the image file: {e}" # --------------------- Transcribe audio file to text ---------------------------- # class TranscribeAudioTool(Tool): """ A tool to transcribe a local audio file to text. """ name: str = "transcribe_audio_file" description: str = "Transcribes a local audio file (e.g., .mp3, .wav, .flac) from a file path into text." inputs: Dict[str, Dict[str, Union[str, Any]]] = { "file_path": { "type": "string", "description": "The local path to the audio file to transcribe." } } output_type: str = "string" def forward(self, file_path: str) -> str: return transcribe_local_audio(file_path) # --- Helper function for TranscribeAudioTool --- # Note: This requires ffmpeg to be installed on your system: sudo apt-get install ffmpeg # The first time this pipeline is created, it will download the model (e.g., Whisper). # We lazy-load it to avoid loading it if the tool is never used. audio_transcriber = None def transcribe_local_audio(file_path: str) -> str: """Downloads and transcribes an audio file from a URL.""" global audio_transcriber if audio_transcriber is None: try: # Using a smaller, faster Whisper model. Larger models can be used for higher accuracy. print("Initializing Speech-to-Text model...") audio_transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base") print("Speech-to-Text model initialized.") except Exception as e: return f"Error: Could not initialize the speech-to-text model. Details: {e}" if not os.path.exists(file_path): return f"Error: The audio file at path '{file_path}' was not found." try: print(f"Transcribing audio from {file_path}...") transcription = audio_transcriber(file_path) # The pipeline output is a dictionary, we want the text. return transcription['text'] if 'text' in transcription else "Transcription complete, but no text was found." except Exception as e: return f"An error occurred while processing the audio file '{file_path}': {e}" # --------------------- Read attachment file for general purpose ----------------- class ReadFileTool(Tool): """ A tool to read the content of a local text or code file. """ name: str = "read_file_content" description: str = ( "Reads the raw text content from a local file path and returns it as a string. " "Use this for .txt, .py, .md, .csv, and other plain text files." ) inputs: Dict[str, Dict[str, Any]] = { "file_path": { "type": "string", "description": "The local path to the file (e.g., 'data/my_document.txt')." } } output_type: str = "string" def forward(self, file_path: str) -> str: """Reads content from a local file.""" if not os.path.exists(file_path): return f"Error: The file at path '{file_path}' was not found." try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return content except Exception as e: return f"An error occurred while reading the file '{file_path}': {e}" # --------------------- Read attached Excel file --------------------------------- class ReadExcelTool(Tool): """ A tool to read data from a local Excel file and convert it to a structured format. """ name: str = "read_excel_file" description: str = ( "Reads data from a local Excel file (.xlsx, .xls) and returns its content " "as a Markdown-formatted table. This is the primary tool for analyzing spreadsheet data." ) inputs: Dict[str, Dict[str, Any]] = { "file_path": { "type": "string", "description": "The local path to the Excel file (e.g., 'data/sales_report.xlsx')." } } output_type: str = "string" def forward(self, file_path: str) -> str: """Reads an Excel file and converts it to a Markdown table.""" if not os.path.exists(file_path): return f"Error: The file at path '{file_path}' was not found." try: # Read the first sheet of the Excel file into a pandas DataFrame df = pd.read_excel(file_path) # Convert the DataFrame to a Markdown table string markdown_table = df.to_markdown(index=False) return markdown_table except Exception as e: return f"An error occurred while reading the Excel file '{file_path}': {e}" # --------------------- Read code file from URL ---------------------------------- class ReadContentFromURLTool(Tool): """ A tool to read the content of a code file from a URL. """ name: str = "read_code_from_url" description: str = ( "Reads the content of a code file from a given URL and returns it as a string. " "Use this to analyze Python scripts or other text files available on the web." ) inputs: Dict[str, Dict[str, Union[str, Any]]] = { "url": { "type": "string", "description": "The URL of the code file to read." } } output_type: str = "string" def forward(self, url: str) -> str: return read_content_from_url(url) def read_content_from_url(url: str) -> str: """ Reads the raw text content of a file from a given URL. Args: url: The URL of the file to read. Returns: The content of the file as a string, or an error message. """ # Define a User-Agent to mimic a browser, reducing chances of being blocked headers = { 'User-Agent': 'MyAgent/1.0 (https://example.com; myemail@example.com)' } try: response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) return response.text except requests.exceptions.HTTPError as http_err: return f"HTTP Error fetching content from {url}: {http_err}" except requests.exceptions.ConnectionError as conn_err: return f"Connection Error fetching content from {url}: {conn_err}" except requests.exceptions.Timeout as timeout_err: return f"Timeout Error fetching content from {url}: {timeout_err}" except requests.exceptions.RequestException as req_err: return f"Error fetching content from {url}: {req_err}" except Exception as e: return f"An unexpected error occurred while reading from URL: {e}" # --------------------- Webpage structure analyzer ------------------------------- class WebpageStructureAnalyzerTool(Tool): """ A tool to fetch a webpage and analyze its basic HTML structure. It helps in understanding the page layout before attempting detailed parsing. """ name: str = "analyze_webpage_structure" description: str = ( "Fetches a webpage and returns a summary of its HTML structure " "(title, headings H1/H2/H3, tables found and their headers/first row, " "and counts of lists and forms). Use this tool *first* to understand " "a webpage's layout *before* trying to write specific 'bs4' code " "to extract detailed information." ) # According to the comment: Dict[argument_name, Dict[key, Union[str, type, bool]]] # where the inner dict has 'type' and 'description' inputs: Dict[str, Dict[str, Union[str, type, bool]]] = { # Explicit type hint for clarity "url": { # Argument name "type": "string", # The actual Python type "description": "The URL of the webpage to analyze." } } output_type: str = "string" def forward(self, url) -> str: """ Executes the webpage structure analysis. Args: url: The URL of the webpage to analyze. Returns: A string containing the structure summary or an error message. """ # Ensure the core function is accessible here return analyze_webpage_structure(url) def analyze_webpage_structure(url: str) -> str: """ Fetches a webpage and returns a text summary of its key HTML structure. Args: url: The URL of the webpage to analyze. Returns: A string containing a summary of the HTML structure, or an error message. """ summary_lines: List[str] = [] # Define a User-Agent to mimic a browser, reducing chances of being blocked headers: Dict[str, str] = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } try: # Fetch the webpage content response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) # Parse the HTML content soup = BeautifulSoup(response.content, 'html.parser') summary_lines.append(f"--- Structure Summary for: {url} ---") # 1. Title title = soup.title.string if soup.title else "N/A" summary_lines.append(f"\n[Title]: {title.strip()}") # 2. Meta Description meta_desc = soup.find('meta', attrs={'name': 'description'}) description = meta_desc['content'] if meta_desc and meta_desc.has_attr('content') else "N/A" summary_lines.append(f"[Meta Description]: {description.strip()}") # 3. Headings (H1-H4) summary_lines.append("\n[Headings]:") for i in range(1, 5): headings = soup.find_all(f'h{i}') summary_lines.append(f" - H{i} Tags Found: {len(headings)}") # Show the first 5 headings for brevity for h in headings[:5]: summary_lines.append(f" - {h.get_text(strip=True)[:100]}") # Limit length # 4. Links links = soup.find_all('a') summary_lines.append(f"\n[Links]:") summary_lines.append(f" - Total Links Found: {len(links)}") # Show the first 5 links for link in links[:5]: href = link.get('href', 'N/A') text = link.get_text(strip=True)[:80] # Limit length summary_lines.append(f" - [{text}] -> {href}") # 5. Images images = soup.find_all('img') summary_lines.append(f"\n[Images]:") summary_lines.append(f" - Total Images Found: {len(images)}") # Show the first 5 image alts/srcs for img in images[:5]: alt = img.get('alt', 'No alt text')[:80] # Limit length src = img.get('src', 'N/A') summary_lines.append(f" - [Alt: {alt}] -> {src}") # 6. Tables tables = soup.find_all('table') summary_lines.append(f"\n[Tables]:") summary_lines.append(f" - Total Tables Found: {len(tables)}") for i, table in enumerate(tables[:3]): # Show info for first 3 tables headers = [th.get_text(strip=True) for th in table.find_all('th', limit=10)] rows = table.find_all('tr') if headers: summary_lines.append(f" - Table {i+1} (Rows: {len(rows)}): Headers = {headers}") else: summary_lines.append(f" - Table {i+1} (Rows: {len(rows)}): No