from smolagents import CodeAgent, InferenceClientModel from smolagents.default_tools import PythonInterpreterTool, DuckDuckGoSearchTool from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel from tools import tavily_search_tool, read_python_file_from_path from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path from audio_tools import transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization from community_tools import community_tools, get_youtube_transcript_from_url, search_tools from browser import browser_manager import os import logging import yaml from typing import List, Optional from smolagents.tools import Tool logging.basicConfig(level=logging.DEBUG) MODEL_CHOICES = { "audio": ["Qwen/Qwen2.5-Coder-32B-Instruct"], "vlm": ["Qwen/Qwen2.5-Coder-32B-Instruct"], "math": ["Qwen/Qwen2.5-Coder-7B-Instruct"], "context_search": ["Qwen/Qwen2.5-Coder-32B-Instruct"], "master": ["Qwen/Qwen2.5-Coder-32B-Instruct"] } with open("prompts/prompts.yaml", 'r') as stream: prompt_templates = yaml.safe_load(stream) with open("prompts/audio_prompts.yaml", 'r') as stream: audio_prompt_templates = yaml.safe_load(stream) with open("prompts/vlm_prompts.yaml", 'r') as stream: vlm_prompt_templates = yaml.safe_load(stream) with open("prompts/context_search_prompts.yaml", 'r') as stream: context_search_prompt_templates = yaml.safe_load(stream) PROMPT_TEMPLATE = { "master_agent": prompt_templates, "audio_agent": audio_prompt_templates, "vlm_agent": vlm_prompt_templates, "context_search_agent": context_search_prompt_templates } # Consolidated authorized imports for all agents AUTHORIZED_IMPORTS = [ # Audio processing "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis", # Image/Video processing "cv2", "cv2.dnn", "cv2.imread", "pytesseract", "onnxruntime", "PIL", "PIL.Image", "bs4", "tesseract", # Data processing "numpy", "pandas", "sklearn", "scipy", "math", "hmmlearn", # File handling "base64", "io", "json", "os", "pickle", "openpyxl", "pyxlsb" # Visualization "pyplot", "matplotlib", "matplotlib.pyplot", # Utilities "logging", "yaml", "datetime", "typing", "markdownify", "requests", "chess" ] audio_model = InferenceClientModel( model_id=MODEL_CHOICES["audio"][0], token=os.getenv("HUGGINGFACE_API_KEY"), max_tokens=18000 ) audio_agent = CodeAgent( model=audio_model, tools=[transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization], max_steps=4, additional_authorized_imports=AUTHORIZED_IMPORTS, planning_interval=4, name="audio_agent", prompt_templates=PROMPT_TEMPLATE["audio_agent"], description="This agent is responsible for processing audio, loading mp3 audio and converting it to base64, reducing noise, segmenting audio and transcribing audio (in base64 format). It cannot process videos." ) vlm_model = InferenceClientModel( model_id=MODEL_CHOICES["vlm"][0], token=os.getenv("HUGGINGFACE_API_KEY"), max_tokens=18000 ) vlm_agent = CodeAgent( model=vlm_model, tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path], max_steps=4, additional_authorized_imports=AUTHORIZED_IMPORTS, planning_interval=4, name="vlm_agent", prompt_templates=PROMPT_TEMPLATE["vlm_agent"], description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios." ) math_model = InferenceClientModel( model_id=MODEL_CHOICES["math"][0], token=os.getenv("HUGGINGFACE_API_KEY"), max_tokens=6000 ) math_agent = CodeAgent( model=math_model, tools=[operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel, to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby], max_steps=4, planning_interval=4, additional_authorized_imports=AUTHORIZED_IMPORTS, name="math_agent", description="This agent is responsible for performing arithmetic operations on two numbers. It can also perform dataframe operations such as converting data to a dataframe, performing calculations on such dataframe and converting the dataframe back to a json or a csv file" ) context_search_model = InferenceClientModel( model_id=MODEL_CHOICES["context_search"][0], token=os.getenv("HUGGINGFACE_API_KEY"), max_tokens=24000 ) context_search_agent = CodeAgent( model=context_search_model, tools=[*search_tools], max_steps=4, additional_authorized_imports=AUTHORIZED_IMPORTS, planning_interval=4, name="context_search_agent", prompt_templates=PROMPT_TEMPLATE["context_search_agent"], description="This agent is responsible for searching the web for context using wikipedia for general information and arxiv for scientific information." ) master_model = InferenceClientModel( model_id=MODEL_CHOICES["master"][0], token=os.getenv("HUGGINGFACE_API_KEY"), max_tokens=24000 ) class MasterAgentWrapper: """Wrapper class to manage master agent with thread-safe browser tools""" def __init__(self): self.base_tools = [ sort_list, get_youtube_transcript_from_url, read_python_file_from_path, PythonInterpreterTool(), DuckDuckGoSearchTool(), tavily_search_tool, *community_tools, ] self.master_agent = CodeAgent( model=master_model, managed_agents=[audio_agent, vlm_agent, math_agent], tools=self.base_tools, # Initialize without browser tools add_base_tools=False, max_steps=20, #One final plan step, 16 intermediate steps additional_authorized_imports=AUTHORIZED_IMPORTS, verbosity_level=logging.INFO, planning_interval=5, prompt_templates=PROMPT_TEMPLATE["master_agent"], name="master_agent", description="This agent is responsible for managing audio, vlm, context_search and math agents." ) def _run_with_browser_tools(self, question: str, browser_tools: List[Tool]) -> str: """Run agent with browser tools""" # Temporarily add browser tools original_tools = self.master_agent.tools.copy() # Copy the dictionary all_tools = original_tools.copy() # Add browser tools to the dictionary for tool in browser_tools: all_tools[tool.name] = tool self.master_agent.tools = all_tools try: # Run the agent directly since we're in a sync context result = self.master_agent.run(question) return result finally: # Restore original tools self.master_agent.tools = original_tools def run(self, question: str) -> str: """Run the agent with thread-safe browser tools""" try: # Get browser tools in the correct context with browser_manager.get_browser_tools() as browser_tools: # Run with browser tools return self._run_with_browser_tools(question, browser_tools) # return self.master_agent.run(question) # Try without browser tools except Exception as e: logging.error(f"Error in master agent run: {e}") raise # Create the wrapped master agent master_agent = MasterAgentWrapper() # For backward compatibility def run_master_agent(question: str) -> str: return master_agent.run(question) #TESTING 5