Coool2 commited on
Commit
f39446f
·
1 Parent(s): af77ca7

Update agent2.py

Browse files
Files changed (1) hide show
  1. agent2.py +102 -42
agent2.py CHANGED
@@ -50,47 +50,98 @@ class BM25RetrieverTool(Tool):
50
  ])
51
 
52
 
53
- @tool
54
- def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
55
- """
56
- Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
57
- """
58
- try:
59
- driver = helium.get_driver()
60
- elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
61
- if nth_result > len(elements):
62
- return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
63
- result = f"Found {len(elements)} matches for '{text}'."
64
- elem = elements[nth_result - 1]
65
- driver.execute_script("arguments[0].scrollIntoView(true);", elem)
66
- result += f"Focused on element {nth_result} of {len(elements)}"
67
- return result
68
- except Exception as e:
69
- return f"Error searching for text: {e}"
70
-
71
-
72
- @tool
73
- def go_back() -> str:
74
- """Goes back to previous page."""
75
- try:
76
- driver = helium.get_driver()
77
- driver.back()
78
- return "Navigated back to previous page"
79
- except Exception as e:
80
- return f"Error going back: {e}"
81
-
82
-
83
- @tool
84
- def close_popups() -> str:
85
- """
86
- Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
87
- """
88
- try:
89
- driver = helium.get_driver()
90
- webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
91
- return "Attempted to close popups"
92
- except Exception as e:
93
- return f"Error closing popups: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
@@ -165,7 +216,14 @@ Your final answer should be as few words as possible, a number, or a comma-separ
165
 
166
  def _create_agent(self):
167
  """Create the CodeAgent with tools"""
168
- base_tools = [self.retriever_tool, search_item_ctrl_f, go_back, close_popups]
 
 
 
 
 
 
 
169
 
170
  self.agent = CodeAgent(
171
  tools=base_tools,
@@ -187,6 +245,8 @@ Your final answer should be as few words as possible, a number, or a comma-separ
187
  chrome_options.add_argument("--window-size=1000,1350")
188
  chrome_options.add_argument("--disable-pdf-viewer")
189
  chrome_options.add_argument("--window-position=0,0")
 
 
190
 
191
  self.driver = helium.start_chrome(headless=False, options=chrome_options)
192
 
 
50
  ])
51
 
52
 
53
+ @tool
54
+ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
55
+ """Search for text on the current page via Ctrl + F and jump to the nth occurrence.
56
+
57
+ Args:
58
+ text: The text string to search for on the webpage
59
+ nth_result: Which occurrence to jump to (default is 1 for first occurrence)
60
+
61
+ Returns:
62
+ str: Result of the search operation with match count and navigation status
63
+ """
64
+ try:
65
+ driver = helium.get_driver()
66
+ elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
67
+ if nth_result > len(elements):
68
+ return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
69
+ result = f"Found {len(elements)} matches for '{text}'."
70
+ elem = elements[nth_result - 1]
71
+ driver.execute_script("arguments[0].scrollIntoView(true);", elem)
72
+ result += f"Focused on element {nth_result} of {len(elements)}"
73
+ return result
74
+ except Exception as e:
75
+ return f"Error searching for text: {e}"
76
+
77
+
78
+ @tool
79
+ def go_back() -> str:
80
+ """Navigate back to the previous page in browser history.
81
+
82
+ Returns:
83
+ str: Confirmation message or error description
84
+ """
85
+ try:
86
+ driver = helium.get_driver()
87
+ driver.back()
88
+ return "Navigated back to previous page"
89
+ except Exception as e:
90
+ return f"Error going back: {e}"
91
+
92
+
93
+ @tool
94
+ def close_popups() -> str:
95
+ """Close any visible modal or pop-up on the page by sending ESC key.
96
+
97
+ Returns:
98
+ str: Confirmation message or error description
99
+ """
100
+ try:
101
+ driver = helium.get_driver()
102
+ webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
103
+ return "Attempted to close popups"
104
+ except Exception as e:
105
+ return f"Error closing popups: {e}"
106
+
107
+
108
+ @tool
109
+ def scroll_page(direction: str = "down", amount: int = 3) -> str:
110
+ """Scroll the webpage in the specified direction.
111
+
112
+ Args:
113
+ direction: Direction to scroll, either 'up' or 'down'
114
+ amount: Number of scroll actions to perform
115
+
116
+ Returns:
117
+ str: Confirmation message or error description
118
+ """
119
+ try:
120
+ driver = helium.get_driver()
121
+ for _ in range(amount):
122
+ if direction.lower() == "down":
123
+ driver.execute_script("window.scrollBy(0, 300);")
124
+ elif direction.lower() == "up":
125
+ driver.execute_script("window.scrollBy(0, -300);")
126
+ sleep(0.5)
127
+ return f"Scrolled {direction} {amount} times"
128
+ except Exception as e:
129
+ return f"Error scrolling: {e}"
130
+
131
+
132
+ @tool
133
+ def get_page_text() -> str:
134
+ """Extract all visible text from the current webpage.
135
+
136
+ Returns:
137
+ str: The visible text content of the page
138
+ """
139
+ try:
140
+ driver = helium.get_driver()
141
+ text = driver.find_element(By.TAG_NAME, "body").text
142
+ return f"Page text (first 2000 chars): {text[:2000]}"
143
+ except Exception as e:
144
+ return f"Error getting page text: {e}"
145
 
146
 
147
  def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
 
216
 
217
  def _create_agent(self):
218
  """Create the CodeAgent with tools"""
219
+ base_tools = [
220
+ self.retriever_tool,
221
+ search_item_ctrl_f,
222
+ go_back,
223
+ close_popups,
224
+ scroll_page,
225
+ get_page_text
226
+ ]
227
 
228
  self.agent = CodeAgent(
229
  tools=base_tools,
 
245
  chrome_options.add_argument("--window-size=1000,1350")
246
  chrome_options.add_argument("--disable-pdf-viewer")
247
  chrome_options.add_argument("--window-position=0,0")
248
+ chrome_options.add_argument("--no-sandbox")
249
+ chrome_options.add_argument("--disable-dev-shm-usage")
250
 
251
  self.driver = helium.start_chrome(headless=False, options=chrome_options)
252