Coool2 commited on
Commit
6eb012b
·
1 Parent(s): 2d8a87a

Update agent2.py

Browse files
Files changed (1) hide show
  1. agent2.py +109 -109
agent2.py CHANGED
@@ -50,123 +50,123 @@ class BM25RetrieverTool(Tool):
50
  ])
51
 
52
 
53
- @tool
54
- def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
55
- """Search for text on the current page via Ctrl + F and jump to the nth occurrence.
56
-
57
- Args:
58
- text: The text string to search for on the webpage
59
- nth_result: Which occurrence to jump to (default is 1 for first occurrence)
60
-
61
- Returns:
62
- str: Result of the search operation with match count and navigation status
63
- """
64
- try:
65
- driver = helium.get_driver()
66
- elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
67
- if nth_result > len(elements):
68
- return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
69
- result = f"Found {len(elements)} matches for '{text}'."
70
- elem = elements[nth_result - 1]
71
- driver.execute_script("arguments[0].scrollIntoView(true);", elem)
72
- result += f"Focused on element {nth_result} of {len(elements)}"
73
- return result
74
- except Exception as e:
75
- return f"Error searching for text: {e}"
76
-
77
-
78
- @tool
79
- def go_back() -> str:
80
- """Navigate back to the previous page in browser history.
81
-
82
- Returns:
83
- str: Confirmation message or error description
84
- """
85
- try:
86
- driver = helium.get_driver()
87
- driver.back()
88
- return "Navigated back to previous page"
89
- except Exception as e:
90
- return f"Error going back: {e}"
91
-
92
 
93
- @tool
94
- def close_popups() -> str:
95
- """Close any visible modal or pop-up on the page by sending ESC key.
96
 
97
- Returns:
98
- str: Confirmation message or error description
99
- """
100
- try:
101
- driver = helium.get_driver()
102
- webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
103
- return "Attempted to close popups"
104
- except Exception as e:
105
- return f"Error closing popups: {e}"
106
-
107
-
108
- @tool
109
- def scroll_page(direction: str = "down", amount: int = 3) -> str:
110
- """Scroll the webpage in the specified direction.
111
-
112
- Args:
113
- direction: Direction to scroll, either 'up' or 'down'
114
- amount: Number of scroll actions to perform
115
-
116
- Returns:
117
- str: Confirmation message or error description
118
- """
119
- try:
120
- driver = helium.get_driver()
121
- for _ in range(amount):
122
- if direction.lower() == "down":
123
- driver.execute_script("window.scrollBy(0, 300);")
124
- elif direction.lower() == "up":
125
- driver.execute_script("window.scrollBy(0, -300);")
126
- sleep(0.5)
127
- return f"Scrolled {direction} {amount} times"
128
- except Exception as e:
129
- return f"Error scrolling: {e}"
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- @tool
133
- def get_page_text() -> str:
134
- """Extract all visible text from the current webpage.
135
-
136
- Returns:
137
- str: The visible text content of the page
138
- """
139
- try:
140
- driver = helium.get_driver()
141
- text = driver.find_element(By.TAG_NAME, "body").text
142
- return f"Page text (first 2000 chars): {text[:2000]}"
143
- except Exception as e:
144
- return f"Error getting page text: {e}"
145
 
146
 
147
- def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
148
- """Save screenshots for web browser automation"""
149
- try:
150
- sleep(1.0)
151
- driver = helium.get_driver()
152
- if driver is not None:
153
- # Clean up old screenshots
154
- for previous_memory_step in agent.memory.steps:
155
- if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= memory_step.step_number - 2:
156
- previous_memory_step.observations_images = None
157
 
158
- png_bytes = driver.get_screenshot_as_png()
159
- image = Image.open(BytesIO(png_bytes))
160
- memory_step.observations_images = [image.copy()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # Update observations with current URL
163
- url_info = f"Current url: {driver.current_url}"
164
- memory_step.observations = (
165
- url_info if memory_step.observations is None
166
- else memory_step.observations + "\n" + url_info
167
- )
168
- except Exception as e:
169
- print(f"Error in screenshot callback: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
 
172
  class GAIAAgent:
 
50
  ])
51
 
52
 
53
+ @tool
54
+ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
55
+ """Search for text on the current page via Ctrl + F and jump to the nth occurrence.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ Args:
58
+ text: The text string to search for on the webpage
59
+ nth_result: Which occurrence to jump to (default is 1 for first occurrence)
60
 
61
+ Returns:
62
+ str: Result of the search operation with match count and navigation status
63
+ """
64
+ try:
65
+ driver = helium.get_driver()
66
+ elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
67
+ if nth_result > len(elements):
68
+ return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
69
+ result = f"Found {len(elements)} matches for '{text}'."
70
+ elem = elements[nth_result - 1]
71
+ driver.execute_script("arguments[0].scrollIntoView(true);", elem)
72
+ result += f"Focused on element {nth_result} of {len(elements)}"
73
+ return result
74
+ except Exception as e:
75
+ return f"Error searching for text: {e}"
76
+
77
+
78
+ @tool
79
+ def go_back() -> str:
80
+ """Navigate back to the previous page in browser history.
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ Returns:
83
+ str: Confirmation message or error description
84
+ """
85
+ try:
86
+ driver = helium.get_driver()
87
+ driver.back()
88
+ return "Navigated back to previous page"
89
+ except Exception as e:
90
+ return f"Error going back: {e}"
91
+
92
+
93
+ @tool
94
+ def close_popups() -> str:
95
+ """Close any visible modal or pop-up on the page by sending ESC key.
96
 
97
+ Returns:
98
+ str: Confirmation message or error description
99
+ """
100
+ try:
101
+ driver = helium.get_driver()
102
+ webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
103
+ return "Attempted to close popups"
104
+ except Exception as e:
105
+ return f"Error closing popups: {e}"
 
 
 
 
106
 
107
 
108
+ @tool
109
+ def scroll_page(direction: str = "down", amount: int = 3) -> str:
110
+ """Scroll the webpage in the specified direction.
 
 
 
 
 
 
 
111
 
112
+ Args:
113
+ direction: Direction to scroll, either 'up' or 'down'
114
+ amount: Number of scroll actions to perform
115
+
116
+ Returns:
117
+ str: Confirmation message or error description
118
+ """
119
+ try:
120
+ driver = helium.get_driver()
121
+ for _ in range(amount):
122
+ if direction.lower() == "down":
123
+ driver.execute_script("window.scrollBy(0, 300);")
124
+ elif direction.lower() == "up":
125
+ driver.execute_script("window.scrollBy(0, -300);")
126
+ sleep(0.5)
127
+ return f"Scrolled {direction} {amount} times"
128
+ except Exception as e:
129
+ return f"Error scrolling: {e}"
130
+
131
+
132
+ @tool
133
+ def get_page_text() -> str:
134
+ """Extract all visible text from the current webpage.
135
 
136
+ Returns:
137
+ str: The visible text content of the page
138
+ """
139
+ try:
140
+ driver = helium.get_driver()
141
+ text = driver.find_element(By.TAG_NAME, "body").text
142
+ return f"Page text (first 2000 chars): {text[:2000]}"
143
+ except Exception as e:
144
+ return f"Error getting page text: {e}"
145
+
146
+
147
+ def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
148
+ """Save screenshots for web browser automation"""
149
+ try:
150
+ sleep(1.0)
151
+ driver = helium.get_driver()
152
+ if driver is not None:
153
+ # Clean up old screenshots
154
+ for previous_memory_step in agent.memory.steps:
155
+ if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= memory_step.step_number - 2:
156
+ previous_memory_step.observations_images = None
157
+
158
+ png_bytes = driver.get_screenshot_as_png()
159
+ image = Image.open(BytesIO(png_bytes))
160
+ memory_step.observations_images = [image.copy()]
161
+
162
+ # Update observations with current URL
163
+ url_info = f"Current url: {driver.current_url}"
164
+ memory_step.observations = (
165
+ url_info if memory_step.observations is None
166
+ else memory_step.observations + "\n" + url_info
167
+ )
168
+ except Exception as e:
169
+ print(f"Error in screenshot callback: {e}")
170
 
171
 
172
  class GAIAAgent: