WebCrawler / tests /test_agents.py
Carlos Gonzalez
Add application file
b1f90a5
import pdb
from dotenv import load_dotenv
load_dotenv()
import sys
sys.path.append(".")
import asyncio
import os
import sys
from pprint import pprint
from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
from src.utils import utils
async def test_browser_use_agent():
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import (
BrowserContextConfig
)
from browser_use.agent.service import Agent
from src.browser.custom_browser import CustomBrowser
from src.controller.custom_controller import CustomController
from src.utils import llm_provider
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
llm = llm_provider.get_llm_model(
provider="openai",
model_name="gpt-4o",
temperature=0.8,
)
# llm = llm_provider.get_llm_model(
# provider="google",
# model_name="gemini-2.0-flash",
# temperature=0.6,
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-chat",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
# )
window_w, window_h = 1280, 1100
# llm = llm_provider.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# temperature=0.5,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# )
mcp_server_config = {
"mcpServers": {
# "markitdown": {
# "command": "docker",
# "args": [
# "run",
# "--rm",
# "-i",
# "markitdown-mcp:latest"
# ]
# },
"desktop-commander": {
"command": "npx",
"args": [
"-y",
"@wonderwhy-er/desktop-commander"
]
},
}
}
controller = CustomController()
await controller.setup_mcp_client(mcp_server_config)
use_own_browser = True
use_vision = True # Set to False when using DeepSeek
max_actions_per_step = 10
browser = None
browser_context = None
try:
extra_browser_args = []
if use_own_browser:
browser_binary_path = os.getenv("BROWSER_PATH", None)
if browser_binary_path == "":
browser_binary_path = None
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
if browser_user_data:
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
else:
browser_binary_path = None
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
browser_binary_path=browser_binary_path,
extra_browser_args=extra_browser_args,
new_context_config=BrowserContextConfig(
window_width=window_w,
window_height=window_h,
)
)
)
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path=None,
save_recording_path=None,
save_downloads_path="./tmp/downloads",
window_height=window_h,
window_width=window_w,
)
)
agent = BrowserUseAgent(
# task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
task="give me nvidia stock price",
llm=llm,
browser=browser,
browser_context=browser_context,
controller=controller,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
generate_gif=True
)
history: AgentHistoryList = await agent.run(max_steps=100)
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
except Exception:
import traceback
traceback.print_exc()
finally:
if browser_context:
await browser_context.close()
if browser:
await browser.close()
if controller:
await controller.close_mcp_client()
async def test_browser_use_parallel():
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import (
BrowserContextConfig,
)
from browser_use.agent.service import Agent
from src.browser.custom_browser import CustomBrowser
from src.controller.custom_controller import CustomController
from src.utils import llm_provider
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
# llm = utils.get_llm_model(
# provider="openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
# api_key=os.getenv("OPENAI_API_KEY", ""),
# )
# llm = utils.get_llm_model(
# provider="google",
# model_name="gemini-2.0-flash",
# temperature=0.6,
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-chat",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
# )
window_w, window_h = 1280, 1100
llm = llm_provider.get_llm_model(
provider="azure_openai",
model_name="gpt-4o",
temperature=0.5,
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
)
mcp_server_config = {
"mcpServers": {
# "markitdown": {
# "command": "docker",
# "args": [
# "run",
# "--rm",
# "-i",
# "markitdown-mcp:latest"
# ]
# },
"desktop-commander": {
"command": "npx",
"args": [
"-y",
"@wonderwhy-er/desktop-commander"
]
},
# "filesystem": {
# "command": "npx",
# "args": [
# "-y",
# "@modelcontextprotocol/server-filesystem",
# "/Users/xxx/ai_workspace",
# ]
# },
}
}
controller = CustomController()
await controller.setup_mcp_client(mcp_server_config)
use_own_browser = True
use_vision = True # Set to False when using DeepSeek
max_actions_per_step = 10
browser = None
browser_context = None
try:
extra_browser_args = []
if use_own_browser:
browser_binary_path = os.getenv("BROWSER_PATH", None)
if browser_binary_path == "":
browser_binary_path = None
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
if browser_user_data:
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
else:
browser_binary_path = None
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
browser_binary_path=browser_binary_path,
extra_browser_args=extra_browser_args,
new_context_config=BrowserContextConfig(
window_width=window_w,
window_height=window_h,
)
)
)
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path=None,
save_recording_path=None,
save_downloads_path="./tmp/downloads",
window_height=window_h,
window_width=window_w,
force_new_context=True
)
)
agents = [
BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
for task in [
'Search Google for weather in Tokyo',
# 'Check Reddit front page title',
# 'Find NASA image of the day',
# 'Check top story on CNN',
# 'Search latest SpaceX launch date',
# 'Look up population of Paris',
'Find current time in Sydney',
'Check who won last Super Bowl',
# 'Search trending topics on Twitter',
]
]
history = await asyncio.gather(*[agent.run() for agent in agents])
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
pdb.set_trace()
except Exception:
import traceback
traceback.print_exc()
finally:
if browser_context:
await browser_context.close()
if browser:
await browser.close()
if controller:
await controller.close_mcp_client()
async def test_deep_research_agent():
from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME
from src.utils import llm_provider
llm = llm_provider.get_llm_model(
provider="openai",
model_name="gpt-4o",
temperature=0.5
)
# llm = llm_provider.get_llm_model(
# provider="bedrock",
# )
mcp_server_config = {
"mcpServers": {
"desktop-commander": {
"command": "npx",
"args": [
"-y",
"@wonderwhy-er/desktop-commander"
]
},
}
}
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
research_topic = "Give me investment advices of nvidia and tesla."
task_id_to_resume = "" # Set this to resume a previous task ID
print(f"Starting research on: {research_topic}")
try:
# Call run and wait for the final result dictionary
result = await agent.run(research_topic,
task_id=task_id_to_resume,
save_dir="./tmp/deep_research",
max_parallel_browsers=1,
)
print("\n--- Research Process Ended ---")
print(f"Status: {result.get('status')}")
print(f"Message: {result.get('message')}")
print(f"Task ID: {result.get('task_id')}")
# Check the final state for the report
final_state = result.get('final_state', {})
if final_state:
print("\n--- Final State Summary ---")
print(
f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}")
if final_state.get("final_report"):
print(" Final Report: Generated (content omitted). You can find it in the output directory.")
# print("\n--- Final Report ---") # Optionally print report
# print(final_state["final_report"])
else:
print(" Final Report: Not generated.")
else:
print("Final state information not available.")
except Exception as e:
print(f"\n--- An unhandled error occurred outside the agent run ---")
print(e)
if __name__ == "__main__":
asyncio.run(test_browser_use_agent())
# asyncio.run(test_browser_use_parallel())
# asyncio.run(test_deep_research_agent())