import pdb from dotenv import load_dotenv load_dotenv() import sys sys.path.append(".") import asyncio import os import sys from pprint import pprint from browser_use import Agent from browser_use.agent.views import AgentHistoryList from src.utils import utils async def test_browser_use_agent(): from browser_use.browser.browser import Browser, BrowserConfig from browser_use.browser.context import ( BrowserContextConfig ) from browser_use.agent.service import Agent from src.browser.custom_browser import CustomBrowser from src.controller.custom_controller import CustomController from src.utils import llm_provider from src.agent.browser_use.browser_use_agent import BrowserUseAgent llm = llm_provider.get_llm_model( provider="openai", model_name="gpt-4o", temperature=0.8, ) # llm = llm_provider.get_llm_model( # provider="google", # model_name="gemini-2.0-flash", # temperature=0.6, # api_key=os.getenv("GOOGLE_API_KEY", "") # ) # llm = utils.get_llm_model( # provider="deepseek", # model_name="deepseek-reasoner", # temperature=0.8 # ) # llm = utils.get_llm_model( # provider="deepseek", # model_name="deepseek-chat", # temperature=0.8 # ) # llm = utils.get_llm_model( # provider="ollama", model_name="qwen2.5:7b", temperature=0.5 # ) # llm = utils.get_llm_model( # provider="ollama", model_name="deepseek-r1:14b", temperature=0.5 # ) window_w, window_h = 1280, 1100 # llm = llm_provider.get_llm_model( # provider="azure_openai", # model_name="gpt-4o", # temperature=0.5, # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), # ) mcp_server_config = { "mcpServers": { # "markitdown": { # "command": "docker", # "args": [ # "run", # "--rm", # "-i", # "markitdown-mcp:latest" # ] # }, "desktop-commander": { "command": "npx", "args": [ "-y", "@wonderwhy-er/desktop-commander" ] }, } } controller = CustomController() await controller.setup_mcp_client(mcp_server_config) use_own_browser = True use_vision = True # Set to False when using DeepSeek max_actions_per_step = 10 browser = None browser_context = None try: extra_browser_args = [] if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) if browser_binary_path == "": browser_binary_path = None browser_user_data = os.getenv("BROWSER_USER_DATA", None) if browser_user_data: extra_browser_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None browser = CustomBrowser( config=BrowserConfig( headless=False, browser_binary_path=browser_binary_path, extra_browser_args=extra_browser_args, new_context_config=BrowserContextConfig( window_width=window_w, window_height=window_h, ) ) ) browser_context = await browser.new_context( config=BrowserContextConfig( trace_path=None, save_recording_path=None, save_downloads_path="./tmp/downloads", window_height=window_h, window_width=window_w, ) ) agent = BrowserUseAgent( # task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'", task="give me nvidia stock price", llm=llm, browser=browser, browser_context=browser_context, controller=controller, use_vision=use_vision, max_actions_per_step=max_actions_per_step, generate_gif=True ) history: AgentHistoryList = await agent.run(max_steps=100) print("Final Result:") pprint(history.final_result(), indent=4) print("\nErrors:") pprint(history.errors(), indent=4) except Exception: import traceback traceback.print_exc() finally: if browser_context: await browser_context.close() if browser: await browser.close() if controller: await controller.close_mcp_client() async def test_browser_use_parallel(): from browser_use.browser.browser import Browser, BrowserConfig from browser_use.browser.context import ( BrowserContextConfig, ) from browser_use.agent.service import Agent from src.browser.custom_browser import CustomBrowser from src.controller.custom_controller import CustomController from src.utils import llm_provider from src.agent.browser_use.browser_use_agent import BrowserUseAgent # llm = utils.get_llm_model( # provider="openai", # model_name="gpt-4o", # temperature=0.8, # base_url=os.getenv("OPENAI_ENDPOINT", ""), # api_key=os.getenv("OPENAI_API_KEY", ""), # ) # llm = utils.get_llm_model( # provider="google", # model_name="gemini-2.0-flash", # temperature=0.6, # api_key=os.getenv("GOOGLE_API_KEY", "") # ) # llm = utils.get_llm_model( # provider="deepseek", # model_name="deepseek-reasoner", # temperature=0.8 # ) # llm = utils.get_llm_model( # provider="deepseek", # model_name="deepseek-chat", # temperature=0.8 # ) # llm = utils.get_llm_model( # provider="ollama", model_name="qwen2.5:7b", temperature=0.5 # ) # llm = utils.get_llm_model( # provider="ollama", model_name="deepseek-r1:14b", temperature=0.5 # ) window_w, window_h = 1280, 1100 llm = llm_provider.get_llm_model( provider="azure_openai", model_name="gpt-4o", temperature=0.5, base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), ) mcp_server_config = { "mcpServers": { # "markitdown": { # "command": "docker", # "args": [ # "run", # "--rm", # "-i", # "markitdown-mcp:latest" # ] # }, "desktop-commander": { "command": "npx", "args": [ "-y", "@wonderwhy-er/desktop-commander" ] }, # "filesystem": { # "command": "npx", # "args": [ # "-y", # "@modelcontextprotocol/server-filesystem", # "/Users/xxx/ai_workspace", # ] # }, } } controller = CustomController() await controller.setup_mcp_client(mcp_server_config) use_own_browser = True use_vision = True # Set to False when using DeepSeek max_actions_per_step = 10 browser = None browser_context = None try: extra_browser_args = [] if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) if browser_binary_path == "": browser_binary_path = None browser_user_data = os.getenv("BROWSER_USER_DATA", None) if browser_user_data: extra_browser_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None browser = CustomBrowser( config=BrowserConfig( headless=False, browser_binary_path=browser_binary_path, extra_browser_args=extra_browser_args, new_context_config=BrowserContextConfig( window_width=window_w, window_height=window_h, ) ) ) browser_context = await browser.new_context( config=BrowserContextConfig( trace_path=None, save_recording_path=None, save_downloads_path="./tmp/downloads", window_height=window_h, window_width=window_w, force_new_context=True ) ) agents = [ BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller) for task in [ 'Search Google for weather in Tokyo', # 'Check Reddit front page title', # 'Find NASA image of the day', # 'Check top story on CNN', # 'Search latest SpaceX launch date', # 'Look up population of Paris', 'Find current time in Sydney', 'Check who won last Super Bowl', # 'Search trending topics on Twitter', ] ] history = await asyncio.gather(*[agent.run() for agent in agents]) print("Final Result:") pprint(history.final_result(), indent=4) print("\nErrors:") pprint(history.errors(), indent=4) pdb.set_trace() except Exception: import traceback traceback.print_exc() finally: if browser_context: await browser_context.close() if browser: await browser.close() if controller: await controller.close_mcp_client() async def test_deep_research_agent(): from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME from src.utils import llm_provider llm = llm_provider.get_llm_model( provider="openai", model_name="gpt-4o", temperature=0.5 ) # llm = llm_provider.get_llm_model( # provider="bedrock", # ) mcp_server_config = { "mcpServers": { "desktop-commander": { "command": "npx", "args": [ "-y", "@wonderwhy-er/desktop-commander" ] }, } } browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False} agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config) research_topic = "Give me investment advices of nvidia and tesla." task_id_to_resume = "" # Set this to resume a previous task ID print(f"Starting research on: {research_topic}") try: # Call run and wait for the final result dictionary result = await agent.run(research_topic, task_id=task_id_to_resume, save_dir="./tmp/deep_research", max_parallel_browsers=1, ) print("\n--- Research Process Ended ---") print(f"Status: {result.get('status')}") print(f"Message: {result.get('message')}") print(f"Task ID: {result.get('task_id')}") # Check the final state for the report final_state = result.get('final_state', {}) if final_state: print("\n--- Final State Summary ---") print( f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}") print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}") if final_state.get("final_report"): print(" Final Report: Generated (content omitted). You can find it in the output directory.") # print("\n--- Final Report ---") # Optionally print report # print(final_state["final_report"]) else: print(" Final Report: Not generated.") else: print("Final state information not available.") except Exception as e: print(f"\n--- An unhandled error occurred outside the agent run ---") print(e) if __name__ == "__main__": asyncio.run(test_browser_use_agent()) # asyncio.run(test_browser_use_parallel()) # asyncio.run(test_deep_research_agent())