Carlos Gonzalez commited on
Commit
b1f90a5
·
1 Parent(s): 08ed45b

Add application file

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +5 -0
  2. .env.example +60 -0
  3. .gitattributes +3 -0
  4. .github/workflows/build.yml +124 -0
  5. .gitignore +193 -0
  6. .vscode/settings.json +11 -0
  7. Dockerfile +99 -0
  8. LICENSE +21 -0
  9. README.md +151 -12
  10. SECURITY.md +19 -0
  11. app.py +30 -0
  12. assets/web-ui.png +3 -0
  13. debug_imports.py +32 -0
  14. docker-compose.yml +80 -0
  15. frontend/package-lock.json +0 -0
  16. package-lock.json +532 -0
  17. package.json +13 -0
  18. pnpm-lock.yaml +53 -0
  19. requirements.txt +10 -0
  20. setup.py +11 -0
  21. src.zip +3 -0
  22. src/__init__.py +0 -0
  23. src/agent/__init__.py +0 -0
  24. src/agent/browser_use/browser_use_agent.py +169 -0
  25. src/agent/deep_research/deep_research_agent.py +1256 -0
  26. src/browser/__init__.py +0 -0
  27. src/browser/custom_browser.py +109 -0
  28. src/browser/custom_context.py +22 -0
  29. src/controller/__init__.py +0 -0
  30. src/controller/custom_controller.py +182 -0
  31. src/utils/__init__.py +0 -0
  32. src/utils/config.py +8 -0
  33. src/utils/llm_provider.py +71 -0
  34. src/utils/mcp_client.py +254 -0
  35. src/utils/utils.py +39 -0
  36. src/webui/__init__.py +0 -0
  37. src/webui/components/__init__.py +0 -0
  38. src/webui/components/agent_settings_tab.py +162 -0
  39. src/webui/components/browser_settings_tab.py +161 -0
  40. src/webui/components/browser_use_agent_tab.py +1153 -0
  41. src/webui/components/browser_use_agent_tab.py.bak +1305 -0
  42. src/webui/components/deep_research_agent_tab.py +451 -0
  43. src/webui/components/documentation_tab.py +726 -0
  44. src/webui/components/load_save_config_tab.py +50 -0
  45. src/webui/components/vayner_client_research_tab.py +1252 -0
  46. src/webui/interface.py +81 -0
  47. src/webui/webui_manager.py +168 -0
  48. supervisord.conf +80 -0
  49. tests/test_agents.py +400 -0
  50. tests/test_controller.py +131 -0
.dockerignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ data
2
+ tmp
3
+ results
4
+
5
+ .env
.env.example ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_ENDPOINT=https://api.openai.com/v1
2
+ OPENAI_API_KEY=
3
+
4
+ ANTHROPIC_API_KEY=
5
+ ANTHROPIC_ENDPOINT=https://api.anthropic.com
6
+
7
+ GOOGLE_API_KEY=
8
+
9
+ AZURE_OPENAI_ENDPOINT=
10
+ AZURE_OPENAI_API_KEY=
11
+ AZURE_OPENAI_API_VERSION=2025-01-01-preview
12
+
13
+ DEEPSEEK_ENDPOINT=https://api.deepseek.com
14
+ DEEPSEEK_API_KEY=
15
+
16
+ MISTRAL_API_KEY=
17
+ MISTRAL_ENDPOINT=https://api.mistral.ai/v1
18
+
19
+ OLLAMA_ENDPOINT=http://localhost:11434
20
+
21
+ ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
22
+ ALIBABA_API_KEY=
23
+
24
+ MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
25
+ MOONSHOT_API_KEY=
26
+
27
+ UNBOUND_ENDPOINT=https://api.getunbound.ai
28
+ UNBOUND_API_KEY=
29
+
30
+ SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/
31
+ SiliconFLOW_API_KEY=
32
+
33
+ IBM_ENDPOINT=https://us-south.ml.cloud.ibm.com
34
+ IBM_API_KEY=
35
+ IBM_PROJECT_ID=
36
+
37
+ # Set to false to disable anonymized telemetry
38
+ ANONYMIZED_TELEMETRY=false
39
+
40
+ # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
41
+ BROWSER_USE_LOGGING_LEVEL=info
42
+
43
+ # Browser settings
44
+ BROWSER_PATH=
45
+ BROWSER_USER_DATA=
46
+ BROWSER_DEBUGGING_PORT=9222
47
+ BROWSER_DEBUGGING_HOST=localhost
48
+ # Set to true to keep browser open between AI tasks
49
+ KEEP_BROWSER_OPEN=true
50
+ BROWSER_CDP=
51
+ # Display settings
52
+ # Format: WIDTHxHEIGHTxDEPTH
53
+ RESOLUTION=1920x1080x24
54
+ # Width in pixels
55
+ RESOLUTION_WIDTH=1920
56
+ # Height in pixels
57
+ RESOLUTION_HEIGHT=1080
58
+
59
+ # VNC settings
60
+ VNC_PASSWORD=youvncpassword
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/examples/test.png filter=lfs diff=lfs merge=lfs -text
37
+ *.png filter=lfs diff=lfs merge=lfs -text
38
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.github/workflows/build.yml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build Docker Image
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ push:
7
+ branches: [main]
8
+
9
+ env:
10
+ GITHUB_CR_REPO: ghcr.io/${{ github.repository }}
11
+
12
+ jobs:
13
+ build:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ platform:
19
+ - linux/amd64
20
+ - linux/arm64
21
+ steps:
22
+ - name: Prepare
23
+ run: |
24
+ platform=${{ matrix.platform }}
25
+ echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
26
+
27
+ - name: Docker meta
28
+ id: meta
29
+ uses: docker/metadata-action@v5
30
+ with:
31
+ images: |
32
+ ${{ env.GITHUB_CR_REPO }}
33
+
34
+ - name: Login to GHCR
35
+ uses: docker/login-action@v3
36
+ with:
37
+ registry: ghcr.io
38
+ username: ${{ github.repository_owner }}
39
+ password: ${{ secrets.GITHUB_TOKEN }}
40
+
41
+ - name: Set up QEMU
42
+ uses: docker/setup-qemu-action@v3
43
+
44
+ - name: Set up Docker Buildx
45
+ uses: docker/setup-buildx-action@v3
46
+
47
+ - name: Build and push by digest
48
+ id: build
49
+ uses: docker/build-push-action@v6
50
+ with:
51
+ platforms: ${{ matrix.platform }}
52
+ labels: ${{ steps.meta.outputs.labels }}
53
+ tags: |
54
+ ${{ env.GITHUB_CR_REPO }}
55
+ build-args: |
56
+ TARGETPLATFORM=${{ matrix.platform }}
57
+ outputs: type=image,push-by-digest=true,name-canonical=true,push=true
58
+
59
+ - name: Export digest
60
+ run: |
61
+ mkdir -p ${{ runner.temp }}/digests
62
+ digest="${{ steps.build.outputs.digest }}"
63
+ touch "${{ runner.temp }}/digests/${digest#sha256:}"
64
+
65
+ - name: Upload digest
66
+ uses: actions/upload-artifact@v4
67
+ with:
68
+ name: digests-${{ env.PLATFORM_PAIR }}
69
+ path: ${{ runner.temp }}/digests/*
70
+ if-no-files-found: error
71
+ retention-days: 1
72
+
73
+ merge:
74
+ runs-on: ubuntu-latest
75
+ needs:
76
+ - build
77
+ steps:
78
+ - name: Download digests
79
+ uses: actions/download-artifact@v4
80
+ with:
81
+ path: ${{ runner.temp }}/digests
82
+ pattern: digests-*
83
+ merge-multiple: true
84
+
85
+ - name: Login to GHCR
86
+ uses: docker/login-action@v3
87
+ with:
88
+ registry: ghcr.io
89
+ username: ${{ github.repository_owner }}
90
+ password: ${{ secrets.GITHUB_TOKEN }}
91
+
92
+ - name: Set up Docker Buildx
93
+ uses: docker/setup-buildx-action@v3
94
+
95
+ - name: Docker meta
96
+ id: meta
97
+ uses: docker/metadata-action@v5
98
+ with:
99
+ images: |
100
+ ${{ env.GITHUB_CR_REPO }}
101
+ tags: |
102
+ type=ref,event=branch
103
+ type=ref,event=pr
104
+ type=semver,pattern={{version}}
105
+ type=semver,pattern={{major}}
106
+
107
+ - name: Docker tags
108
+ run: |
109
+ tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
110
+ if [ -z "$tags" ]; then
111
+ echo "DOCKER_METADATA_OUTPUT_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
112
+ tags="-t ${{ env.GITHUB_CR_REPO }}:${{ github.ref_name }}"
113
+ fi
114
+ echo "DOCKER_METADATA_TAGS=$tags" >> $GITHUB_ENV
115
+
116
+ - name: Create manifest list and push
117
+ working-directory: ${{ runner.temp }}/digests
118
+ run: |
119
+ docker buildx imagetools create ${{ env.DOCKER_METADATA_TAGS }} \
120
+ $(printf '${{ env.GITHUB_CR_REPO }}@sha256:%s ' *)
121
+
122
+ - name: Inspect image
123
+ run: |
124
+ docker buildx imagetools inspect ${{ env.GITHUB_CR_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION }}
.gitignore ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+ test_env/
133
+ myenv
134
+
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
159
+
160
+ # PyCharm
161
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
164
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165
+ .idea/
166
+ temp
167
+ tmp
168
+
169
+
170
+ .DS_Store
171
+
172
+ private_example.py
173
+ private_example
174
+
175
+ browser_cookies.json
176
+ cookies.json
177
+ AgentHistory.json
178
+ cv_04_24.pdf
179
+ AgentHistoryList.json
180
+ *.gif
181
+
182
+ # For Sharing (.pem files)
183
+ .gradio/
184
+
185
+ # For Docker
186
+ data/
187
+
188
+ # For Config Files (Current Settings)
189
+ .config.pkl
190
+ *.pdf
191
+
192
+ workflow
193
+ node_modules
.vscode/settings.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "python.analysis.typeCheckingMode": "basic",
3
+ "[python]": {
4
+ "editor.defaultFormatter": "charliermarsh.ruff",
5
+ "editor.formatOnSave": true,
6
+ "editor.codeActionsOnSave": {
7
+ "source.fixAll.ruff": "explicit",
8
+ "source.organizeImports.ruff": "explicit"
9
+ }
10
+ }
11
+ }
Dockerfile ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set platform for multi-arch builds (Docker Buildx will set this)
4
+ ARG TARGETPLATFORM
5
+ ARG NODE_MAJOR=20
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ wget \
10
+ netcat-traditional \
11
+ gnupg \
12
+ curl \
13
+ unzip \
14
+ xvfb \
15
+ libgconf-2-4 \
16
+ libxss1 \
17
+ libnss3 \
18
+ libnspr4 \
19
+ libasound2 \
20
+ libatk1.0-0 \
21
+ libatk-bridge2.0-0 \
22
+ libcups2 \
23
+ libdbus-1-3 \
24
+ libdrm2 \
25
+ libgbm1 \
26
+ libgtk-3-0 \
27
+ libxcomposite1 \
28
+ libxdamage1 \
29
+ libxfixes3 \
30
+ libxrandr2 \
31
+ xdg-utils \
32
+ fonts-liberation \
33
+ dbus \
34
+ xauth \
35
+ x11vnc \
36
+ tigervnc-tools \
37
+ supervisor \
38
+ net-tools \
39
+ procps \
40
+ git \
41
+ python3-numpy \
42
+ fontconfig \
43
+ fonts-dejavu \
44
+ fonts-dejavu-core \
45
+ fonts-dejavu-extra \
46
+ vim \
47
+ && rm -rf /var/lib/apt/lists/*
48
+
49
+ # Install noVNC
50
+ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
51
+ && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
52
+ && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
53
+
54
+ # Install Node.js using NodeSource PPA
55
+ RUN mkdir -p /etc/apt/keyrings \
56
+ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
57
+ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
58
+ && apt-get update \
59
+ && apt-get install nodejs -y \
60
+ && rm -rf /var/lib/apt/lists/*
61
+
62
+ # Verify Node.js and npm installation (optional, but good for debugging)
63
+ RUN node -v && npm -v && npx -v
64
+
65
+ # Set up working directory
66
+ WORKDIR /app
67
+
68
+ # Copy requirements and install Python dependencies
69
+ COPY requirements.txt .
70
+
71
+ RUN pip install --no-cache-dir -r requirements.txt
72
+
73
+ # Install playwright browsers and dependencies
74
+ # playwright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
75
+ # or that playwright installs to a similar default location that Playwright would.
76
+ # Let's assume playwright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
77
+ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
78
+ RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
79
+
80
+ # Install recommended: Google Chrome (instead of just Chromium for better undetectability)
81
+ # The 'playwright install chrome' command might download and place it.
82
+ # The '--with-deps' equivalent for playwright install is to run 'playwright install-deps chrome' after.
83
+ # RUN playwright install chrome --with-deps
84
+
85
+ # Alternative: Install Chromium if Google Chrome is problematic in certain environments
86
+ RUN playwright install chromium --with-deps
87
+
88
+
89
+ # Copy the application code
90
+ COPY . .
91
+
92
+ # Set up supervisor configuration
93
+ RUN mkdir -p /var/log/supervisor
94
+ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
95
+
96
+ EXPOSE 7788 6080 5901 9222
97
+
98
+ CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
99
+ #CMD ["/bin/bash"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Browser Use Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,151 @@
1
- ---
2
- title: WebCrawler
3
- emoji: 📚
4
- colorFrom: red
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.30.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <img src="./assets/web-ui.png" alt="Browser Use Web UI" width="full"/>
2
+
3
+ <br/>
4
+
5
+ [![GitHub stars](https://img.shields.io/github/stars/browser-use/web-ui?style=social)](https://github.com/browser-use/web-ui/stargazers)
6
+ [![Discord](https://img.shields.io/discord/1303749220842340412?color=7289DA&label=Discord&logo=discord&logoColor=white)](https://link.browser-use.com/discord)
7
+ [![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.browser-use.com)
8
+ [![WarmShao](https://img.shields.io/twitter/follow/warmshao?style=social)](https://x.com/warmshao)
9
+
10
+ This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents.
11
+
12
+ We would like to officially thank [WarmShao](https://github.com/warmshao) for his contribution to this project.
13
+
14
+ **WebUI:** is built on Gradio and supports most of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
15
+
16
+ **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Google, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
17
+
18
+ **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
19
+
20
+ **Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions.
21
+
22
+ <video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls">Your browser does not support playing this video!</video>
23
+
24
+ ## Installation Guide
25
+
26
+ ### Option 1: Local Installation
27
+
28
+ Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
29
+
30
+ #### Step 1: Clone the Repository
31
+ ```bash
32
+ git clone https://github.com/browser-use/web-ui.git
33
+ cd web-ui
34
+ ```
35
+
36
+ #### Step 2: Set Up Python Environment
37
+ We recommend using [uv](https://docs.astral.sh/uv/) for managing the Python environment.
38
+
39
+ Using uv (recommended):
40
+ ```bash
41
+ uv venv --python 3.11
42
+ ```
43
+
44
+ Activate the virtual environment:
45
+ - Windows (Command Prompt):
46
+ ```cmd
47
+ .venv\Scripts\activate
48
+ ```
49
+ - Windows (PowerShell):
50
+ ```powershell
51
+ .\.venv\Scripts\Activate.ps1
52
+ ```
53
+ - macOS/Linux:
54
+ ```bash
55
+ source .venv/bin/activate
56
+ ```
57
+
58
+ #### Step 3: Install Dependencies
59
+ Install Python packages:
60
+ ```bash
61
+ uv pip install -r requirements.txt
62
+ ```
63
+
64
+ Install Browsers in playwright.
65
+ ```bash
66
+ playwright install --with-deps
67
+ ```
68
+ Or you can install specific browsers by running:
69
+ ```bash
70
+ playwright install chromium --with-deps
71
+ ```
72
+
73
+ #### Step 4: Configure Environment
74
+ 1. Create a copy of the example environment file:
75
+ - Windows (Command Prompt):
76
+ ```bash
77
+ copy .env.example .env
78
+ ```
79
+ - macOS/Linux/Windows (PowerShell):
80
+ ```bash
81
+ cp .env.example .env
82
+ ```
83
+ 2. Open `.env` in your preferred text editor and add your API keys and other settings
84
+
85
+ #### Step 5: Enjoy the web-ui
86
+ 1. **Run the WebUI:**
87
+ ```bash
88
+ python webui.py --ip 127.0.0.1 --port 7788
89
+ ```
90
+ 2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
91
+ 3. **Using Your Own Browser(Optional):**
92
+ - Set `BROWSER_PATH` to the executable path of your browser and `BROWSER_USER_DATA` to the user data directory of your browser. Leave `BROWSER_USER_DATA` empty if you want to use local user data.
93
+ - Windows
94
+ ```env
95
+ BROWSER_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
96
+ BROWSER_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
97
+ ```
98
+ > Note: Replace `YourUsername` with your actual Windows username for Windows systems.
99
+ - Mac
100
+ ```env
101
+ BROWSER_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
102
+ BROWSER_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
103
+ ```
104
+ - Close all Chrome windows
105
+ - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
106
+ - Check the "Use Own Browser" option within the Browser Settings.
107
+
108
+ ### Option 2: Docker Installation
109
+
110
+ #### Prerequisites
111
+ - Docker and Docker Compose installed
112
+ - [Docker Desktop](https://www.docker.com/products/docker-desktop/) (For Windows/macOS)
113
+ - [Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) (For Linux)
114
+
115
+ #### Step 1: Clone the Repository
116
+ ```bash
117
+ git clone https://github.com/browser-use/web-ui.git
118
+ cd web-ui
119
+ ```
120
+
121
+ #### Step 2: Configure Environment
122
+ 1. Create a copy of the example environment file:
123
+ - Windows (Command Prompt):
124
+ ```bash
125
+ copy .env.example .env
126
+ ```
127
+ - macOS/Linux/Windows (PowerShell):
128
+ ```bash
129
+ cp .env.example .env
130
+ ```
131
+ 2. Open `.env` in your preferred text editor and add your API keys and other settings
132
+
133
+ #### Step 3: Docker Build and Run
134
+ ```bash
135
+ docker compose up --build
136
+ ```
137
+ For ARM64 systems (e.g., Apple Silicon Macs), please run follow command:
138
+ ```bash
139
+ TARGETPLATFORM=linux/arm64 docker compose up --build
140
+ ```
141
+
142
+ #### Step 4: Enjoy the web-ui and vnc
143
+ - Web-UI: Open `http://localhost:7788` in your browser
144
+ - VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
145
+ - Default VNC password: "youvncpassword"
146
+ - Can be changed by setting `VNC_PASSWORD` in your `.env` file
147
+
148
+ ## Changelog
149
+ - [x] **2025/01/26:** Thanks to @vvincent1234. Now browser-use-webui can combine with DeepSeek-r1 to engage in deep thinking!
150
+ - [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
151
+ - [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
SECURITY.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Reporting Security Issues
2
+
3
+ If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
4
+
5
+ **Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
6
+
7
+ Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
8
+
9
+ Please include as much of the information listed below as you can to help me better understand and resolve the issue:
10
+
11
+ * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
12
+ * Full paths of source file(s) related to the manifestation of the issue
13
+ * The location of the affected source code (tag/branch/commit or direct URL)
14
+ * Any special configuration required to reproduce the issue
15
+ * Step-by-step instructions to reproduce the issue
16
+ * Proof-of-concept or exploit code (if possible)
17
+ * Impact of the issue, including how an attacker might exploit the issue
18
+
19
+ This information will help me triage your report more quickly.
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+ import os
4
+ from src.webui.interface import theme_map, create_ui
5
+
6
+ def get_default_theme():
7
+ # Use "Ocean" or the first available theme
8
+ return "Ocean" if "Ocean" in theme_map else list(theme_map.keys())[0]
9
+
10
+ def run_gradio():
11
+ # Hugging Face Spaces: use default host/port, no CLI args
12
+ theme = os.environ.get("GRADIO_THEME", get_default_theme())
13
+ demo = create_ui(theme_name=theme)
14
+ demo.queue().launch() # Do NOT set server_name/server_port for Spaces
15
+
16
+ if __name__ == '__main__':
17
+ # If running locally, allow CLI args as before
18
+ import argparse
19
+ parser = argparse.ArgumentParser(description="Gradio WebUI for Browser Agent")
20
+ parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
21
+ parser.add_argument("--port", type=int, default=8888, help="Port to listen on")
22
+ parser.add_argument("--theme", type=str, default=get_default_theme(), choices=theme_map.keys(), help="Theme to use for the UI")
23
+ args = parser.parse_args()
24
+
25
+ demo = create_ui(theme_name=args.theme)
26
+ print(f"Starting server on {args.ip}:{args.port}")
27
+ demo.queue().launch(server_name=args.ip, server_port=args.port)
28
+ else:
29
+ # If run by Hugging Face Spaces (no __main__), just launch with defaults
30
+ run_gradio()
assets/web-ui.png ADDED

Git LFS Details

  • SHA256: ea3c23160272116985f1d24a8140f0746e92a820bbd6e4988b6aa4ec0dfbb491
  • Pointer size: 130 Bytes
  • Size of remote file: 24.5 kB
debug_imports.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ print("Python executable path:", sys.executable)
5
+ print("Python version:", sys.version)
6
+ print("Sys.path:", sys.path)
7
+
8
+ try:
9
+ import gradio
10
+ print("Gradio version:", gradio.__version__)
11
+ except ImportError as e:
12
+ print("Error importing gradio:", e)
13
+
14
+ try:
15
+ from src.webui.interface import theme_map, create_ui
16
+ print("Successfully imported src.webui.interface")
17
+ except ImportError as e:
18
+ print("Error importing src.webui.interface:", e)
19
+
20
+ try:
21
+ import src
22
+ print("src module exists")
23
+ try:
24
+ import src.webui
25
+ print("src.webui module exists")
26
+ print(dir(src.webui))
27
+ except ImportError as e:
28
+ print("Error importing src.webui:", e)
29
+ except ImportError as e:
30
+ print("Error importing src:", e)
31
+
32
+ print("Current working directory:", os.getcwd())
docker-compose.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ # debug: docker compose run --rm -it browser-use-webui bash
3
+ browser-use-webui:
4
+ # image: ghcr.io/browser-use/web-ui # Using precompiled image
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ args:
9
+ TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
10
+ ports:
11
+ - "7788:7788"
12
+ - "6080:6080"
13
+ - "5901:5901"
14
+ - "9222:9222"
15
+ environment:
16
+ # LLM API Keys & Endpoints
17
+ - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
18
+ - OPENAI_API_KEY=${OPENAI_API_KEY:-}
19
+ - ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
20
+ - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
21
+ - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
22
+ - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
23
+ - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
24
+ - AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
25
+ - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
26
+ - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
27
+ - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
28
+ - MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
29
+ - MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
30
+ - ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
31
+ - ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
32
+ - MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
33
+ - MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
34
+ - UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
35
+ - UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
36
+ - SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
37
+ - SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
38
+ - IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
39
+ - IBM_API_KEY=${IBM_API_KEY:-}
40
+ - IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
41
+
42
+ # Application Settings
43
+ - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
44
+ - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
45
+
46
+ # Browser Settings
47
+ - BROWSER_PATH=
48
+ - BROWSER_USER_DATA=
49
+ - BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
50
+ - BROWSER_DEBUGGING_HOST=localhost
51
+ - USE_OWN_BROWSER=false
52
+ - KEEP_BROWSER_OPEN=true
53
+ - BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
54
+
55
+ # Display Settings
56
+ - DISPLAY=:99
57
+ # This ENV is used by the Dockerfile during build time if playwright respects it.
58
+ # It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
59
+ - PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
60
+ - RESOLUTION=${RESOLUTION:-1920x1080x24}
61
+ - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
62
+ - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
63
+
64
+ # VNC Settings
65
+ - VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
66
+
67
+ volumes:
68
+ - /tmp/.X11-unix:/tmp/.X11-unix
69
+ # - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
70
+ restart: unless-stopped
71
+ shm_size: '2gb'
72
+ cap_add:
73
+ - SYS_ADMIN
74
+ tmpfs:
75
+ - /tmp
76
+ healthcheck:
77
+ test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
78
+ interval: 10s
79
+ timeout: 5s
80
+ retries: 3
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package-lock.json ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "WebCrawler",
3
+ "lockfileVersion": 3,
4
+ "requires": true,
5
+ "packages": {
6
+ "": {
7
+ "dependencies": {
8
+ "git": "^0.1.5",
9
+ "gradio": "^0.1.0"
10
+ },
11
+ "devDependencies": {
12
+ "playwright": "^1.52.0",
13
+ "pnpm": "^10.11.0"
14
+ }
15
+ },
16
+ "node_modules/asn1": {
17
+ "version": "0.1.11",
18
+ "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.1.11.tgz",
19
+ "integrity": "sha512-Fh9zh3G2mZ8qM/kwsiKwL2U2FmXxVsboP4x1mXjnhKHv3SmzaBZoYvxEQJz/YS2gnCgd8xlAVWcZnQyC9qZBsA==",
20
+ "optional": true,
21
+ "engines": {
22
+ "node": ">=0.4.9"
23
+ }
24
+ },
25
+ "node_modules/assert-plus": {
26
+ "version": "0.1.5",
27
+ "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.1.5.tgz",
28
+ "integrity": "sha512-brU24g7ryhRwGCI2y+1dGQmQXiZF7TtIj583S96y0jjdajIe6wn8BuXyELYhvD22dtIxDQVFk04YTJwwdwOYJw==",
29
+ "optional": true,
30
+ "engines": {
31
+ "node": ">=0.8"
32
+ }
33
+ },
34
+ "node_modules/async": {
35
+ "version": "0.9.2",
36
+ "resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
37
+ "integrity": "sha512-l6ToIJIotphWahxxHyzK9bnLR6kM4jJIIgLShZeqLY7iboHoGkdgFl7W2/Ivi4SkMJYGKqW8vSuk0uKUj6qsSw==",
38
+ "license": "MIT",
39
+ "optional": true
40
+ },
41
+ "node_modules/aws-sign2": {
42
+ "version": "0.5.0",
43
+ "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.5.0.tgz",
44
+ "integrity": "sha512-oqUX0DM5j7aPWPCnpWebiyNIj2wiNI87ZxnOMoGv0aE4TGlBy2N+5iWc6dQ/NOKZaBD2W6PVz8jtOGkWzSC5EA==",
45
+ "optional": true,
46
+ "engines": {
47
+ "node": "*"
48
+ }
49
+ },
50
+ "node_modules/bluebird": {
51
+ "version": "1.0.8",
52
+ "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-1.0.8.tgz",
53
+ "integrity": "sha512-e8rlJcByuxPdMiiwU3lGtENflMtUncAblzSlN7rBAZ9ygb75D/rng3Xt5FbZpYqVCzK+sFHVIMht4G6fbfUfbA==",
54
+ "license": "MIT"
55
+ },
56
+ "node_modules/boom": {
57
+ "version": "0.4.2",
58
+ "resolved": "https://registry.npmjs.org/boom/-/boom-0.4.2.tgz",
59
+ "integrity": "sha512-OvfN8y1oAxxphzkl2SnCS+ztV/uVKTATtgLjWYg/7KwcNyf3rzpHxNQJZCKtsZd4+MteKczhWbSjtEX4bGgU9g==",
60
+ "deprecated": "This version has been deprecated in accordance with the hapi support policy (hapi.im/support). Please upgrade to the latest version to get the best features, bug fixes, and security patches. If you are unable to upgrade at this time, paid support is available for older versions (hapi.im/commercial).",
61
+ "optional": true,
62
+ "dependencies": {
63
+ "hoek": "0.9.x"
64
+ },
65
+ "engines": {
66
+ "node": ">=0.8.0"
67
+ }
68
+ },
69
+ "node_modules/clarinet": {
70
+ "version": "0.8.1",
71
+ "resolved": "https://registry.npmjs.org/clarinet/-/clarinet-0.8.1.tgz",
72
+ "integrity": "sha512-0jIPAg6gXcr3GzcLbRT2CuNGaADlOhso35o//c5ye463twKEaWrJGTrvqCTbePxolEBEIZ6rDsldUVQL1/1wrw==",
73
+ "engines": {
74
+ "chrome": ">=16.0.912",
75
+ "firefox": ">=0.8.0",
76
+ "node": ">=0.3.6"
77
+ }
78
+ },
79
+ "node_modules/coffee-script": {
80
+ "version": "1.7.1",
81
+ "resolved": "https://registry.npmjs.org/coffee-script/-/coffee-script-1.7.1.tgz",
82
+ "integrity": "sha512-W3s+SROY73OmrSGtPTTW/2wp2rmW5vuh0/tUuCK1NvTuyzLOVPccIP9whmhZ4cYWcr2NJPNENZIFaAMkTD5G3w==",
83
+ "deprecated": "CoffeeScript on NPM has moved to \"coffeescript\" (no hyphen)",
84
+ "license": "MIT",
85
+ "dependencies": {
86
+ "mkdirp": "~0.3.5"
87
+ },
88
+ "bin": {
89
+ "cake": "bin/cake",
90
+ "coffee": "bin/coffee"
91
+ },
92
+ "engines": {
93
+ "node": ">=0.8.0"
94
+ }
95
+ },
96
+ "node_modules/combined-stream": {
97
+ "version": "0.0.7",
98
+ "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-0.0.7.tgz",
99
+ "integrity": "sha512-qfexlmLp9MyrkajQVyjEDb0Vj+KhRgR/rxLiVhaihlT+ZkX0lReqtH6Ack40CvMDERR4b5eFp3CreskpBs1Pig==",
100
+ "optional": true,
101
+ "dependencies": {
102
+ "delayed-stream": "0.0.5"
103
+ },
104
+ "engines": {
105
+ "node": ">= 0.8"
106
+ }
107
+ },
108
+ "node_modules/cryptiles": {
109
+ "version": "0.2.2",
110
+ "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-0.2.2.tgz",
111
+ "integrity": "sha512-gvWSbgqP+569DdslUiCelxIv3IYK5Lgmq1UrRnk+s1WxQOQ16j3GPDcjdtgL5Au65DU/xQi6q3xPtf5Kta+3IQ==",
112
+ "deprecated": "This version has been deprecated in accordance with the hapi support policy (hapi.im/support). Please upgrade to the latest version to get the best features, bug fixes, and security patches. If you are unable to upgrade at this time, paid support is available for older versions (hapi.im/commercial).",
113
+ "optional": true,
114
+ "dependencies": {
115
+ "boom": "0.4.x"
116
+ },
117
+ "engines": {
118
+ "node": ">=0.8.0"
119
+ }
120
+ },
121
+ "node_modules/ctype": {
122
+ "version": "0.5.3",
123
+ "resolved": "https://registry.npmjs.org/ctype/-/ctype-0.5.3.tgz",
124
+ "integrity": "sha512-T6CEkoSV4q50zW3TlTHMbzy1E5+zlnNcY+yb7tWVYlTwPhx9LpnfAkd4wecpWknDyptp4k97LUZeInlf6jdzBg==",
125
+ "optional": true,
126
+ "engines": {
127
+ "node": ">= 0.4"
128
+ }
129
+ },
130
+ "node_modules/delayed-stream": {
131
+ "version": "0.0.5",
132
+ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-0.0.5.tgz",
133
+ "integrity": "sha512-v+7uBd1pqe5YtgPacIIbZ8HuHeLFVNe4mUEyFDXL6KiqzEykjbw+5mXZXpGFgNVasdL4jWKgaKIXrEHiynN1LA==",
134
+ "optional": true,
135
+ "engines": {
136
+ "node": ">=0.4.0"
137
+ }
138
+ },
139
+ "node_modules/faye-websocket": {
140
+ "version": "0.11.4",
141
+ "resolved": "https://registry.npmjs.org/faye-websocket/-/faye-websocket-0.11.4.tgz",
142
+ "integrity": "sha512-CzbClwlXAuiRQAlUyfqPgvPoNKTckTPGfwZV4ZdAhVcP2lh9KUxJg2b5GkE7XbjKQ3YJnQ9z6D9ntLAlB+tP8g==",
143
+ "license": "Apache-2.0",
144
+ "dependencies": {
145
+ "websocket-driver": ">=0.5.1"
146
+ },
147
+ "engines": {
148
+ "node": ">=0.8.0"
149
+ }
150
+ },
151
+ "node_modules/forever-agent": {
152
+ "version": "0.5.2",
153
+ "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.5.2.tgz",
154
+ "integrity": "sha512-PDG5Ef0Dob/JsZUxUltJOhm/Y9mlteAE+46y3M9RBz/Rd3QVENJ75aGRhN56yekTUboaBIkd8KVWX2NjF6+91A==",
155
+ "engines": {
156
+ "node": "*"
157
+ }
158
+ },
159
+ "node_modules/form-data": {
160
+ "version": "0.1.4",
161
+ "resolved": "https://registry.npmjs.org/form-data/-/form-data-0.1.4.tgz",
162
+ "integrity": "sha512-x8eE+nzFtAMA0YYlSxf/Qhq6vP1f8wSoZ7Aw1GuctBcmudCNuTUmmx45TfEplyb6cjsZO/jvh6+1VpZn24ez+w==",
163
+ "optional": true,
164
+ "dependencies": {
165
+ "async": "~0.9.0",
166
+ "combined-stream": "~0.0.4",
167
+ "mime": "~1.2.11"
168
+ },
169
+ "engines": {
170
+ "node": ">= 0.8"
171
+ }
172
+ },
173
+ "node_modules/fsevents": {
174
+ "version": "2.3.2",
175
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
176
+ "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
177
+ "dev": true,
178
+ "hasInstallScript": true,
179
+ "license": "MIT",
180
+ "optional": true,
181
+ "os": [
182
+ "darwin"
183
+ ],
184
+ "engines": {
185
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
186
+ }
187
+ },
188
+ "node_modules/git": {
189
+ "version": "0.1.5",
190
+ "resolved": "https://registry.npmjs.org/git/-/git-0.1.5.tgz",
191
+ "integrity": "sha512-N+bfOrXyKMU/fQtCj6D/U9MQOEN0DAA8TLHSLdUQRSWBOkeRvsjJHdrdkvcq05xO7GSDKWc3nDEGoTZ4DfCCSg==",
192
+ "dependencies": {
193
+ "mime": "1.2.9"
194
+ },
195
+ "engines": {
196
+ "node": ">=0.4.0"
197
+ }
198
+ },
199
+ "node_modules/git/node_modules/mime": {
200
+ "version": "1.2.9",
201
+ "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.9.tgz",
202
+ "integrity": "sha512-WiLgbHTIq5AYUvU/Luli4mZ1bUcHpGNHyCsbl+KPMg4zt+XUDpQehWjuBjdLaEvDTinvKj/FgfQt3fPoT7j08g=="
203
+ },
204
+ "node_modules/gradio": {
205
+ "version": "0.1.0",
206
+ "resolved": "https://registry.npmjs.org/gradio/-/gradio-0.1.0.tgz",
207
+ "integrity": "sha512-EpUQLXA2Jnzwm8gm55UIZ7SZ10rYe5fpZ6gWPfeH/bi4dtr2VGF4JWyhss18DiCfi8YPop+dvDs/qPkqWe4OMw==",
208
+ "license": "MIT",
209
+ "dependencies": {
210
+ "bluebird": "1.0.x",
211
+ "coffee-script": "1.7.x",
212
+ "jandal": "0.0.x",
213
+ "log_": "0.0.x",
214
+ "oboe": "1.12.x",
215
+ "request": "2.34.x",
216
+ "sockjs": "0.3.x",
217
+ "uuid": "1.4.x"
218
+ }
219
+ },
220
+ "node_modules/hawk": {
221
+ "version": "1.0.0",
222
+ "resolved": "https://registry.npmjs.org/hawk/-/hawk-1.0.0.tgz",
223
+ "integrity": "sha512-Sg+VzrI7TjUomO0rjD6UXawsj50ykn5sB/xKNW/IenxzRVyw/wt9A2FLzYpGL/r0QG5hyXY8nLx/2m8UutoDcg==",
224
+ "deprecated": "This module moved to @hapi/hawk. Please make sure to switch over as this distribution is no longer supported and may contain bugs and critical security issues.",
225
+ "optional": true,
226
+ "dependencies": {
227
+ "boom": "0.4.x",
228
+ "cryptiles": "0.2.x",
229
+ "hoek": "0.9.x",
230
+ "sntp": "0.2.x"
231
+ },
232
+ "engines": {
233
+ "node": ">=0.8.0"
234
+ }
235
+ },
236
+ "node_modules/hoek": {
237
+ "version": "0.9.1",
238
+ "resolved": "https://registry.npmjs.org/hoek/-/hoek-0.9.1.tgz",
239
+ "integrity": "sha512-ZZ6eGyzGjyMTmpSPYVECXy9uNfqBR7x5CavhUaLOeD6W0vWK1mp/b7O3f86XE0Mtfo9rZ6Bh3fnuw9Xr8MF9zA==",
240
+ "deprecated": "This version has been deprecated in accordance with the hapi support policy (hapi.im/support). Please upgrade to the latest version to get the best features, bug fixes, and security patches. If you are unable to upgrade at this time, paid support is available for older versions (hapi.im/commercial).",
241
+ "optional": true,
242
+ "engines": {
243
+ "node": ">=0.8.0"
244
+ }
245
+ },
246
+ "node_modules/http-parser-js": {
247
+ "version": "0.5.10",
248
+ "resolved": "https://registry.npmjs.org/http-parser-js/-/http-parser-js-0.5.10.tgz",
249
+ "integrity": "sha512-Pysuw9XpUq5dVc/2SMHpuTY01RFl8fttgcyunjL7eEMhGM3cI4eOmiCycJDVCo/7O7ClfQD3SaI6ftDzqOXYMA==",
250
+ "license": "MIT"
251
+ },
252
+ "node_modules/http-signature": {
253
+ "version": "0.10.1",
254
+ "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-0.10.1.tgz",
255
+ "integrity": "sha512-coK8uR5rq2IMj+Hen+sKPA5ldgbCc1/spPdKCL1Fw6h+D0s/2LzMcRK0Cqufs1h0ryx/niwBHGFu8HC3hwU+lA==",
256
+ "license": "MIT",
257
+ "optional": true,
258
+ "dependencies": {
259
+ "asn1": "0.1.11",
260
+ "assert-plus": "^0.1.5",
261
+ "ctype": "0.5.3"
262
+ },
263
+ "engines": {
264
+ "node": ">=0.8"
265
+ }
266
+ },
267
+ "node_modules/jandal": {
268
+ "version": "0.0.15",
269
+ "resolved": "https://registry.npmjs.org/jandal/-/jandal-0.0.15.tgz",
270
+ "integrity": "sha512-HTz/RUaWpYxZsekaG6Gvw2IsTgXtdWZTxYqwHTKVtPSR6mDlnCW6liY/bcnrNs7LYX2YfLZmVA6L7pVJVoMM2A==",
271
+ "license": "MIT"
272
+ },
273
+ "node_modules/json-stringify-safe": {
274
+ "version": "5.0.1",
275
+ "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
276
+ "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
277
+ "license": "ISC"
278
+ },
279
+ "node_modules/log_": {
280
+ "version": "0.0.4",
281
+ "resolved": "https://registry.npmjs.org/log_/-/log_-0.0.4.tgz",
282
+ "integrity": "sha512-3sxGIYgwJ1f714ETc2i0eHW3j2W3ewCOMbrXvI+eNodRfkZCFiOTFAjbPhLNPS7+qs2wMQPtzCdRcg7B3MvvnA==",
283
+ "license": "MIT"
284
+ },
285
+ "node_modules/mime": {
286
+ "version": "1.2.11",
287
+ "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz",
288
+ "integrity": "sha512-Ysa2F/nqTNGHhhm9MV8ure4+Hc+Y8AWiqUdHxsO7xu8zc92ND9f3kpALHjaP026Ft17UfxrMt95c50PLUeynBw=="
289
+ },
290
+ "node_modules/mkdirp": {
291
+ "version": "0.3.5",
292
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.3.5.tgz",
293
+ "integrity": "sha512-8OCq0De/h9ZxseqzCH8Kw/Filf5pF/vMI6+BH7Lu0jXz2pqYCjTAQRolSxRIi+Ax+oCCjlxoJMP0YQ4XlrQNHg==",
294
+ "deprecated": "Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)",
295
+ "license": "MIT"
296
+ },
297
+ "node_modules/node-uuid": {
298
+ "version": "1.4.8",
299
+ "resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz",
300
+ "integrity": "sha512-TkCET/3rr9mUuRp+CpO7qfgT++aAxfDRaalQhwPFzI9BY/2rCDn6OfpZOVggi1AXfTPpfkTrg5f5WQx5G1uLxA==",
301
+ "deprecated": "Use uuid module instead",
302
+ "bin": {
303
+ "uuid": "bin/uuid"
304
+ }
305
+ },
306
+ "node_modules/oauth-sign": {
307
+ "version": "0.3.0",
308
+ "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.3.0.tgz",
309
+ "integrity": "sha512-Tr31Sh5FnK9YKm7xTUPyDMsNOvMqkVDND0zvK/Wgj7/H9q8mpye0qG2nVzrnsvLhcsX5DtqXD0la0ks6rkPCGQ==",
310
+ "optional": true,
311
+ "engines": {
312
+ "node": "*"
313
+ }
314
+ },
315
+ "node_modules/oboe": {
316
+ "version": "1.12.4",
317
+ "resolved": "https://registry.npmjs.org/oboe/-/oboe-1.12.4.tgz",
318
+ "integrity": "sha512-B19SoK5K/NVRvMKmLhvDTSRuSxi6rfnjPMMnLOtH/mub7nTTbglKD4a82IezgLjcJS05XXK0wCQaKdEsIvlhKQ==",
319
+ "license": "BSD",
320
+ "dependencies": {
321
+ "clarinet": "~0.8.0"
322
+ }
323
+ },
324
+ "node_modules/playwright": {
325
+ "version": "1.52.0",
326
+ "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.52.0.tgz",
327
+ "integrity": "sha512-JAwMNMBlxJ2oD1kce4KPtMkDeKGHQstdpFPcPH3maElAXon/QZeTvtsfXmTMRyO9TslfoYOXkSsvao2nE1ilTw==",
328
+ "dev": true,
329
+ "license": "Apache-2.0",
330
+ "dependencies": {
331
+ "playwright-core": "1.52.0"
332
+ },
333
+ "bin": {
334
+ "playwright": "cli.js"
335
+ },
336
+ "engines": {
337
+ "node": ">=18"
338
+ },
339
+ "optionalDependencies": {
340
+ "fsevents": "2.3.2"
341
+ }
342
+ },
343
+ "node_modules/playwright-core": {
344
+ "version": "1.52.0",
345
+ "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.52.0.tgz",
346
+ "integrity": "sha512-l2osTgLXSMeuLZOML9qYODUQoPPnUsKsb5/P6LJ2e6uPKXUdPK5WYhN4z03G+YNbWmGDY4YENauNu4ZKczreHg==",
347
+ "dev": true,
348
+ "license": "Apache-2.0",
349
+ "bin": {
350
+ "playwright-core": "cli.js"
351
+ },
352
+ "engines": {
353
+ "node": ">=18"
354
+ }
355
+ },
356
+ "node_modules/pnpm": {
357
+ "version": "10.11.0",
358
+ "resolved": "https://registry.npmjs.org/pnpm/-/pnpm-10.11.0.tgz",
359
+ "integrity": "sha512-ZUBYP0HMX2KOs9l3Ps7oAvT575kjzEW2mJD7R5kdSwkpZGlOw6T3OKQgyRijMwYsi5JdMS9C5PDCY+tgNVH5dw==",
360
+ "dev": true,
361
+ "license": "MIT",
362
+ "bin": {
363
+ "pnpm": "bin/pnpm.cjs",
364
+ "pnpx": "bin/pnpx.cjs"
365
+ },
366
+ "engines": {
367
+ "node": ">=18.12"
368
+ },
369
+ "funding": {
370
+ "url": "https://opencollective.com/pnpm"
371
+ }
372
+ },
373
+ "node_modules/qs": {
374
+ "version": "0.6.6",
375
+ "resolved": "https://registry.npmjs.org/qs/-/qs-0.6.6.tgz",
376
+ "integrity": "sha512-kN+yNdAf29Jgp+AYHUmC7X4QdJPR8czuMWLNLc0aRxkQ7tB3vJQEONKKT9ou/rW7EbqVec11srC9q9BiVbcnHA==",
377
+ "engines": {
378
+ "node": "*"
379
+ }
380
+ },
381
+ "node_modules/request": {
382
+ "version": "2.34.0",
383
+ "resolved": "https://registry.npmjs.org/request/-/request-2.34.0.tgz",
384
+ "integrity": "sha512-mD5mNhfkeaKMg5ZY/hZFbW4lyC/NTn34/ILGQr/XLSuxYOE6vJfL0MTPPXZcZrdt+Nh1Kce+f4B4KbGThIETxQ==",
385
+ "deprecated": "request has been deprecated, see https://github.com/request/request/issues/3142",
386
+ "engines": [
387
+ "node >= 0.8.0"
388
+ ],
389
+ "license": "Apache, Version 2.0",
390
+ "dependencies": {
391
+ "forever-agent": "~0.5.0",
392
+ "json-stringify-safe": "~5.0.0",
393
+ "mime": "~1.2.9",
394
+ "node-uuid": "~1.4.0",
395
+ "qs": "~0.6.0"
396
+ },
397
+ "optionalDependencies": {
398
+ "aws-sign2": "~0.5.0",
399
+ "form-data": "~0.1.0",
400
+ "hawk": "~1.0.0",
401
+ "http-signature": "~0.10.0",
402
+ "oauth-sign": "~0.3.0",
403
+ "tough-cookie": ">=0.12.0",
404
+ "tunnel-agent": "~0.3.0"
405
+ }
406
+ },
407
+ "node_modules/safe-buffer": {
408
+ "version": "5.2.1",
409
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
410
+ "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
411
+ "funding": [
412
+ {
413
+ "type": "github",
414
+ "url": "https://github.com/sponsors/feross"
415
+ },
416
+ {
417
+ "type": "patreon",
418
+ "url": "https://www.patreon.com/feross"
419
+ },
420
+ {
421
+ "type": "consulting",
422
+ "url": "https://feross.org/support"
423
+ }
424
+ ],
425
+ "license": "MIT"
426
+ },
427
+ "node_modules/sntp": {
428
+ "version": "0.2.4",
429
+ "resolved": "https://registry.npmjs.org/sntp/-/sntp-0.2.4.tgz",
430
+ "integrity": "sha512-bDLrKa/ywz65gCl+LmOiIhteP1bhEsAAzhfMedPoiHP3dyYnAevlaJshdqb9Yu0sRifyP/fRqSt8t+5qGIWlGQ==",
431
+ "deprecated": "This module moved to @hapi/sntp. Please make sure to switch over as this distribution is no longer supported and may contain bugs and critical security issues.",
432
+ "optional": true,
433
+ "dependencies": {
434
+ "hoek": "0.9.x"
435
+ },
436
+ "engines": {
437
+ "node": ">=0.8.0"
438
+ }
439
+ },
440
+ "node_modules/sockjs": {
441
+ "version": "0.3.24",
442
+ "resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.24.tgz",
443
+ "integrity": "sha512-GJgLTZ7vYb/JtPSSZ10hsOYIvEYsjbNU+zPdIHcUaWVNUEPivzxku31865sSSud0Da0W4lEeOPlmw93zLQchuQ==",
444
+ "license": "MIT",
445
+ "dependencies": {
446
+ "faye-websocket": "^0.11.3",
447
+ "uuid": "^8.3.2",
448
+ "websocket-driver": "^0.7.4"
449
+ }
450
+ },
451
+ "node_modules/sockjs/node_modules/uuid": {
452
+ "version": "8.3.2",
453
+ "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
454
+ "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==",
455
+ "license": "MIT",
456
+ "bin": {
457
+ "uuid": "dist/bin/uuid"
458
+ }
459
+ },
460
+ "node_modules/tldts": {
461
+ "version": "6.1.86",
462
+ "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.86.tgz",
463
+ "integrity": "sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==",
464
+ "license": "MIT",
465
+ "optional": true,
466
+ "dependencies": {
467
+ "tldts-core": "^6.1.86"
468
+ },
469
+ "bin": {
470
+ "tldts": "bin/cli.js"
471
+ }
472
+ },
473
+ "node_modules/tldts-core": {
474
+ "version": "6.1.86",
475
+ "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.86.tgz",
476
+ "integrity": "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==",
477
+ "license": "MIT",
478
+ "optional": true
479
+ },
480
+ "node_modules/tough-cookie": {
481
+ "version": "5.1.2",
482
+ "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.1.2.tgz",
483
+ "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==",
484
+ "license": "BSD-3-Clause",
485
+ "optional": true,
486
+ "dependencies": {
487
+ "tldts": "^6.1.32"
488
+ },
489
+ "engines": {
490
+ "node": ">=16"
491
+ }
492
+ },
493
+ "node_modules/tunnel-agent": {
494
+ "version": "0.3.0",
495
+ "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.3.0.tgz",
496
+ "integrity": "sha512-jlGqHGoKzyyjhwv/c9omAgohntThMcGtw8RV/RDLlkbbc08kni/akVxO62N8HaXMVbVsK1NCnpSK3N2xCt22ww==",
497
+ "optional": true,
498
+ "engines": {
499
+ "node": "*"
500
+ }
501
+ },
502
+ "node_modules/uuid": {
503
+ "version": "1.4.2",
504
+ "resolved": "https://registry.npmjs.org/uuid/-/uuid-1.4.2.tgz",
505
+ "integrity": "sha512-woV5Ei+GBJyrqMXt0mJ9p8/I+47LYKp/4urH76FNTMjl22EhLPz1tNrQufTsrFf/PYV/7ctSZYAK7fKPWQKg+Q==",
506
+ "deprecated": "Please upgrade to version 7 or higher. Older versions may use Math.random() in certain circumstances, which is known to be problematic. See https://v8.dev/blog/math-random for details."
507
+ },
508
+ "node_modules/websocket-driver": {
509
+ "version": "0.7.4",
510
+ "resolved": "https://registry.npmjs.org/websocket-driver/-/websocket-driver-0.7.4.tgz",
511
+ "integrity": "sha512-b17KeDIQVjvb0ssuSDF2cYXSg2iztliJ4B9WdsuB6J952qCPKmnVq4DyW5motImXHDC1cBT/1UezrJVsKw5zjg==",
512
+ "license": "Apache-2.0",
513
+ "dependencies": {
514
+ "http-parser-js": ">=0.5.1",
515
+ "safe-buffer": ">=5.1.0",
516
+ "websocket-extensions": ">=0.1.1"
517
+ },
518
+ "engines": {
519
+ "node": ">=0.8.0"
520
+ }
521
+ },
522
+ "node_modules/websocket-extensions": {
523
+ "version": "0.1.4",
524
+ "resolved": "https://registry.npmjs.org/websocket-extensions/-/websocket-extensions-0.1.4.tgz",
525
+ "integrity": "sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg==",
526
+ "license": "Apache-2.0",
527
+ "engines": {
528
+ "node": ">=0.8.0"
529
+ }
530
+ }
531
+ }
532
+ }
package.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "devDependencies": {
3
+ "playwright": "^1.52.0",
4
+ "pnpm": "^10.11.0"
5
+ },
6
+ "scripts": {
7
+ "pw:install": "playwright install"
8
+ },
9
+ "dependencies": {
10
+ "git": "^0.1.5",
11
+ "gradio": "^0.1.0"
12
+ }
13
+ }
pnpm-lock.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lockfileVersion: '9.0'
2
+
3
+ settings:
4
+ autoInstallPeers: true
5
+ excludeLinksFromLockfile: false
6
+
7
+ importers:
8
+
9
+ .:
10
+ devDependencies:
11
+ playwright:
12
+ specifier: ^1.52.0
13
+ version: 1.52.0
14
+ pnpm:
15
+ specifier: ^10.11.0
16
+ version: 10.11.0
17
+
18
+ packages:
19
+
20
+ fsevents@2.3.2:
21
+ resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
22
+ engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
23
+ os: [darwin]
24
+
25
+ playwright-core@1.52.0:
26
+ resolution: {integrity: sha512-l2osTgLXSMeuLZOML9qYODUQoPPnUsKsb5/P6LJ2e6uPKXUdPK5WYhN4z03G+YNbWmGDY4YENauNu4ZKczreHg==}
27
+ engines: {node: '>=18'}
28
+ hasBin: true
29
+
30
+ playwright@1.52.0:
31
+ resolution: {integrity: sha512-JAwMNMBlxJ2oD1kce4KPtMkDeKGHQstdpFPcPH3maElAXon/QZeTvtsfXmTMRyO9TslfoYOXkSsvao2nE1ilTw==}
32
+ engines: {node: '>=18'}
33
+ hasBin: true
34
+
35
+ pnpm@10.11.0:
36
+ resolution: {integrity: sha512-ZUBYP0HMX2KOs9l3Ps7oAvT575kjzEW2mJD7R5kdSwkpZGlOw6T3OKQgyRijMwYsi5JdMS9C5PDCY+tgNVH5dw==}
37
+ engines: {node: '>=18.12'}
38
+ hasBin: true
39
+
40
+ snapshots:
41
+
42
+ fsevents@2.3.2:
43
+ optional: true
44
+
45
+ playwright-core@1.52.0: {}
46
+
47
+ playwright@1.52.0:
48
+ dependencies:
49
+ playwright-core: 1.52.0
50
+ optionalDependencies:
51
+ fsevents: 2.3.2
52
+
53
+ pnpm@10.11.0: {}
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ browser-use==0.1.48
2
+ pyperclip==1.9.0
3
+ gradio==5.27.0
4
+ json-repair
5
+ langchain-mistralai==0.2.4
6
+ MainContentExtractor==0.0.4
7
+ langchain-ibm==0.3.10
8
+ langchain_mcp_adapters==0.0.9
9
+ langgraph==0.3.34
10
+ langchain-community
setup.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="web-ui",
5
+ version="0.1.0",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ "gradio==5.27.0",
9
+ "python-dotenv",
10
+ ],
11
+ )
src.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c787a12df0d1d8b2876730d9dae68bb761b98b37f59040b70b73b3768222afe3
3
+ size 89632525
src/__init__.py ADDED
File without changes
src/agent/__init__.py ADDED
File without changes
src/agent/browser_use/browser_use_agent.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+
7
+ # from lmnr.sdk.decorators import observe
8
+ from browser_use.agent.gif import create_history_gif
9
+ from browser_use.agent.service import Agent, AgentHookFunc
10
+ from browser_use.agent.views import (
11
+ ActionResult,
12
+ AgentHistory,
13
+ AgentHistoryList,
14
+ AgentStepInfo,
15
+ ToolCallingMethod,
16
+ )
17
+ from browser_use.browser.views import BrowserStateHistory
18
+ from browser_use.utils import time_execution_async
19
+ from dotenv import load_dotenv
20
+ from browser_use.agent.message_manager.utils import is_model_without_tool_support
21
+
22
+ load_dotenv()
23
+ logger = logging.getLogger(__name__)
24
+
25
+ SKIP_LLM_API_KEY_VERIFICATION = (
26
+ os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
27
+ )
28
+
29
+
30
+ class BrowserUseAgent(Agent):
31
+ def _set_tool_calling_method(self) -> ToolCallingMethod | None:
32
+ tool_calling_method = self.settings.tool_calling_method
33
+ if tool_calling_method == 'auto':
34
+ if is_model_without_tool_support(self.model_name):
35
+ return 'raw'
36
+ elif self.chat_model_library == 'ChatGoogleGenerativeAI':
37
+ return None
38
+ elif self.chat_model_library == 'ChatOpenAI':
39
+ return 'function_calling'
40
+ elif self.chat_model_library == 'AzureChatOpenAI':
41
+ return 'function_calling'
42
+ else:
43
+ return None
44
+ else:
45
+ return tool_calling_method
46
+
47
+ @time_execution_async("--run (agent)")
48
+ async def run(
49
+ self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
50
+ on_step_end: AgentHookFunc | None = None
51
+ ) -> AgentHistoryList:
52
+ """Execute the task with maximum number of steps"""
53
+
54
+ loop = asyncio.get_event_loop()
55
+
56
+ # Set up the Ctrl+C signal handler with callbacks specific to this agent
57
+ from browser_use.utils import SignalHandler
58
+
59
+ signal_handler = SignalHandler(
60
+ loop=loop,
61
+ pause_callback=self.pause,
62
+ resume_callback=self.resume,
63
+ custom_exit_callback=None, # No special cleanup needed on forced exit
64
+ exit_on_second_int=True,
65
+ )
66
+ signal_handler.register()
67
+
68
+ try:
69
+ self._log_agent_run()
70
+
71
+ # Execute initial actions if provided
72
+ if self.initial_actions:
73
+ result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
74
+ self.state.last_result = result
75
+
76
+ for step in range(max_steps):
77
+ # Check if waiting for user input after Ctrl+C
78
+ if self.state.paused:
79
+ signal_handler.wait_for_resume()
80
+ signal_handler.reset()
81
+
82
+ # Check if we should stop due to too many failures
83
+ if self.state.consecutive_failures >= self.settings.max_failures:
84
+ logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
85
+ break
86
+
87
+ # Check control flags before each step
88
+ if self.state.stopped:
89
+ logger.info('Agent stopped')
90
+ break
91
+
92
+ while self.state.paused:
93
+ await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
94
+ if self.state.stopped: # Allow stopping while paused
95
+ break
96
+
97
+ if on_step_start is not None:
98
+ await on_step_start(self)
99
+
100
+ step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
101
+ await self.step(step_info)
102
+
103
+ if on_step_end is not None:
104
+ await on_step_end(self)
105
+
106
+ if self.state.history.is_done():
107
+ if self.settings.validate_output and step < max_steps - 1:
108
+ if not await self._validate_output():
109
+ continue
110
+
111
+ await self.log_completion()
112
+ break
113
+ else:
114
+ error_message = 'Failed to complete task in maximum steps'
115
+
116
+ self.state.history.history.append(
117
+ AgentHistory(
118
+ model_output=None,
119
+ result=[ActionResult(error=error_message, include_in_memory=True)],
120
+ state=BrowserStateHistory(
121
+ url='',
122
+ title='',
123
+ tabs=[],
124
+ interacted_element=[],
125
+ screenshot=None,
126
+ ),
127
+ metadata=None,
128
+ )
129
+ )
130
+
131
+ logger.info(f'❌ {error_message}')
132
+
133
+ return self.state.history
134
+
135
+ except KeyboardInterrupt:
136
+ # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
137
+ logger.info('Got KeyboardInterrupt during execution, returning current history')
138
+ return self.state.history
139
+
140
+ finally:
141
+ # Unregister signal handlers before cleanup
142
+ signal_handler.unregister()
143
+
144
+ if self.settings.save_playwright_script_path:
145
+ logger.info(
146
+ f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
147
+ )
148
+ try:
149
+ # Extract sensitive data keys if sensitive_data is provided
150
+ keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
151
+ # Pass browser and context config to the saving method
152
+ self.state.history.save_as_playwright_script(
153
+ self.settings.save_playwright_script_path,
154
+ sensitive_data_keys=keys,
155
+ browser_config=self.browser.config,
156
+ context_config=self.browser_context.config,
157
+ )
158
+ except Exception as script_gen_err:
159
+ # Log any error during script generation/saving
160
+ logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
161
+
162
+ await self.close()
163
+
164
+ if self.settings.generate_gif:
165
+ output_path: str = 'agent_history.gif'
166
+ if isinstance(self.settings.generate_gif, str):
167
+ output_path = self.settings.generate_gif
168
+
169
+ create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
src/agent/deep_research/deep_research_agent.py ADDED
@@ -0,0 +1,1256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import threading
6
+ import uuid
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, TypedDict
9
+
10
+ from browser_use.browser.browser import BrowserConfig
11
+ from langchain_community.tools.file_management import (
12
+ ListDirectoryTool,
13
+ ReadFileTool,
14
+ WriteFileTool,
15
+ )
16
+
17
+ # Langchain imports
18
+ from langchain_core.messages import (
19
+ AIMessage,
20
+ BaseMessage,
21
+ HumanMessage,
22
+ SystemMessage,
23
+ ToolMessage,
24
+ )
25
+ from langchain_core.prompts import ChatPromptTemplate
26
+ from langchain_core.tools import StructuredTool, Tool
27
+
28
+ # Langgraph imports
29
+ from langgraph.graph import StateGraph
30
+ from pydantic import BaseModel, Field
31
+
32
+ from browser_use.browser.context import BrowserContextConfig
33
+
34
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
35
+ from src.browser.custom_browser import CustomBrowser
36
+ from src.controller.custom_controller import CustomController
37
+ from src.utils.mcp_client import setup_mcp_client_and_tools
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+ # Constants
42
+ REPORT_FILENAME = "report.md"
43
+ PLAN_FILENAME = "research_plan.md"
44
+ SEARCH_INFO_FILENAME = "search_info.json"
45
+
46
+ _AGENT_STOP_FLAGS = {}
47
+ _BROWSER_AGENT_INSTANCES = {}
48
+
49
+
50
+ async def run_single_browser_task(
51
+ task_query: str,
52
+ task_id: str,
53
+ llm: Any, # Pass the main LLM
54
+ browser_config: Dict[str, Any],
55
+ stop_event: threading.Event,
56
+ use_vision: bool = False,
57
+ ) -> Dict[str, Any]:
58
+ """
59
+ Runs a single BrowserUseAgent task.
60
+ Manages browser creation and closing for this specific task.
61
+ """
62
+ if not BrowserUseAgent:
63
+ return {
64
+ "query": task_query,
65
+ "error": "BrowserUseAgent components not available.",
66
+ }
67
+
68
+ # --- Browser Setup ---
69
+ # These should ideally come from the main agent's config
70
+ headless = browser_config.get("headless", False)
71
+ window_w = browser_config.get("window_width", 1280)
72
+ window_h = browser_config.get("window_height", 1100)
73
+ browser_user_data_dir = browser_config.get("user_data_dir", None)
74
+ use_own_browser = browser_config.get("use_own_browser", False)
75
+ browser_binary_path = browser_config.get("browser_binary_path", None)
76
+ wss_url = browser_config.get("wss_url", None)
77
+ cdp_url = browser_config.get("cdp_url", None)
78
+ disable_security = browser_config.get("disable_security", False)
79
+
80
+ bu_browser = None
81
+ bu_browser_context = None
82
+ try:
83
+ logger.info(f"Starting browser task for query: {task_query}")
84
+ extra_args = []
85
+ if use_own_browser:
86
+ browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
87
+ if browser_binary_path == "":
88
+ browser_binary_path = None
89
+ browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
90
+ if browser_user_data:
91
+ extra_args += [f"--user-data-dir={browser_user_data}"]
92
+ else:
93
+ browser_binary_path = None
94
+
95
+ bu_browser = CustomBrowser(
96
+ config=BrowserConfig(
97
+ headless=headless,
98
+ browser_binary_path=browser_binary_path,
99
+ extra_browser_args=extra_args,
100
+ wss_url=wss_url,
101
+ cdp_url=cdp_url,
102
+ new_context_config=BrowserContextConfig(
103
+ window_width=window_w,
104
+ window_height=window_h,
105
+ )
106
+ )
107
+ )
108
+
109
+ context_config = BrowserContextConfig(
110
+ save_downloads_path="./tmp/downloads",
111
+ window_height=window_h,
112
+ window_width=window_w,
113
+ force_new_context=True,
114
+ )
115
+ bu_browser_context = await bu_browser.new_context(config=context_config)
116
+
117
+ # Simple controller example, replace with your actual implementation if needed
118
+ bu_controller = CustomController()
119
+
120
+ # Construct the task prompt for BrowserUseAgent
121
+ # Instruct it to find specific info and return title/URL
122
+ bu_task_prompt = f"""
123
+ Research Task: {task_query}
124
+ Objective: Find relevant information answering the query.
125
+ Output Requirements: For each relevant piece of information found, please provide:
126
+ 1. A concise summary of the information.
127
+ 2. The title of the source page or document.
128
+ 3. The URL of the source.
129
+ Focus on accuracy and relevance. Avoid irrelevant details.
130
+ PDF cannot directly extract _content, please try to download first, then using read_file, if you can't save or read, please try other methods.
131
+ """
132
+
133
+ bu_agent_instance = BrowserUseAgent(
134
+ task=bu_task_prompt,
135
+ llm=llm, # Use the passed LLM
136
+ browser=bu_browser,
137
+ browser_context=bu_browser_context,
138
+ controller=bu_controller,
139
+ use_vision=use_vision,
140
+ source="webui",
141
+ )
142
+
143
+ # Store instance for potential stop() call
144
+ task_key = f"{task_id}_{uuid.uuid4()}"
145
+ _BROWSER_AGENT_INSTANCES[task_key] = bu_agent_instance
146
+
147
+ # --- Run with Stop Check ---
148
+ # BrowserUseAgent needs to internally check a stop signal or have a stop method.
149
+ # We simulate checking before starting and assume `run` might be interruptible
150
+ # or have its own stop mechanism we can trigger via bu_agent_instance.stop().
151
+ if stop_event.is_set():
152
+ logger.info(f"Browser task for '{task_query}' cancelled before start.")
153
+ return {"query": task_query, "result": None, "status": "cancelled"}
154
+
155
+ # The run needs to be awaitable and ideally accept a stop signal or have a .stop() method
156
+ # result = await bu_agent_instance.run(max_steps=max_steps) # Add max_steps if applicable
157
+ # Let's assume a simplified run for now
158
+ logger.info(f"Running BrowserUseAgent for: {task_query}")
159
+ result = await bu_agent_instance.run() # Assuming run is the main method
160
+ logger.info(f"BrowserUseAgent finished for: {task_query}")
161
+
162
+ final_data = result.final_result()
163
+
164
+ if stop_event.is_set():
165
+ logger.info(f"Browser task for '{task_query}' stopped during execution.")
166
+ return {"query": task_query, "result": final_data, "status": "stopped"}
167
+ else:
168
+ logger.info(f"Browser result for '{task_query}': {final_data}")
169
+ return {"query": task_query, "result": final_data, "status": "completed"}
170
+
171
+ except Exception as e:
172
+ logger.error(
173
+ f"Error during browser task for query '{task_query}': {e}", exc_info=True
174
+ )
175
+ return {"query": task_query, "error": str(e), "status": "failed"}
176
+ finally:
177
+ if bu_browser_context:
178
+ try:
179
+ await bu_browser_context.close()
180
+ bu_browser_context = None
181
+ logger.info("Closed browser context.")
182
+ except Exception as e:
183
+ logger.error(f"Error closing browser context: {e}")
184
+ if bu_browser:
185
+ try:
186
+ await bu_browser.close()
187
+ bu_browser = None
188
+ logger.info("Closed browser.")
189
+ except Exception as e:
190
+ logger.error(f"Error closing browser: {e}")
191
+
192
+ if task_key in _BROWSER_AGENT_INSTANCES:
193
+ del _BROWSER_AGENT_INSTANCES[task_key]
194
+
195
+
196
+ class BrowserSearchInput(BaseModel):
197
+ queries: List[str] = Field(
198
+ description="List of distinct search queries to find information relevant to the research task."
199
+ )
200
+
201
+
202
+ async def _run_browser_search_tool(
203
+ queries: List[str],
204
+ task_id: str, # Injected dependency
205
+ llm: Any, # Injected dependency
206
+ browser_config: Dict[str, Any],
207
+ stop_event: threading.Event,
208
+ max_parallel_browsers: int = 1,
209
+ ) -> List[Dict[str, Any]]:
210
+ """
211
+ Internal function to execute parallel browser searches based on LLM-provided queries.
212
+ Handles concurrency and stop signals.
213
+ """
214
+
215
+ # Limit queries just in case LLM ignores the description
216
+ queries = queries[:max_parallel_browsers]
217
+ logger.info(
218
+ f"[Browser Tool {task_id}] Running search for {len(queries)} queries: {queries}"
219
+ )
220
+
221
+ results = []
222
+ semaphore = asyncio.Semaphore(max_parallel_browsers)
223
+
224
+ async def task_wrapper(query):
225
+ async with semaphore:
226
+ if stop_event.is_set():
227
+ logger.info(
228
+ f"[Browser Tool {task_id}] Skipping task due to stop signal: {query}"
229
+ )
230
+ return {"query": query, "result": None, "status": "cancelled"}
231
+ # Pass necessary injected configs and the stop event
232
+ return await run_single_browser_task(
233
+ query,
234
+ task_id,
235
+ llm, # Pass the main LLM (or a dedicated one if needed)
236
+ browser_config,
237
+ stop_event,
238
+ # use_vision could be added here if needed
239
+ )
240
+
241
+ tasks = [task_wrapper(query) for query in queries]
242
+ search_results = await asyncio.gather(*tasks, return_exceptions=True)
243
+
244
+ processed_results = []
245
+ for i, res in enumerate(search_results):
246
+ query = queries[i] # Get corresponding query
247
+ if isinstance(res, Exception):
248
+ logger.error(
249
+ f"[Browser Tool {task_id}] Gather caught exception for query '{query}': {res}",
250
+ exc_info=True,
251
+ )
252
+ processed_results.append(
253
+ {"query": query, "error": str(res), "status": "failed"}
254
+ )
255
+ elif isinstance(res, dict):
256
+ processed_results.append(res)
257
+ else:
258
+ logger.error(
259
+ f"[Browser Tool {task_id}] Unexpected result type for query '{query}': {type(res)}"
260
+ )
261
+ processed_results.append(
262
+ {"query": query, "error": "Unexpected result type", "status": "failed"}
263
+ )
264
+
265
+ logger.info(
266
+ f"[Browser Tool {task_id}] Finished search. Results count: {len(processed_results)}"
267
+ )
268
+ return processed_results
269
+
270
+
271
+ def create_browser_search_tool(
272
+ llm: Any,
273
+ browser_config: Dict[str, Any],
274
+ task_id: str,
275
+ stop_event: threading.Event,
276
+ max_parallel_browsers: int = 1,
277
+ ) -> StructuredTool:
278
+ """Factory function to create the browser search tool with necessary dependencies."""
279
+ # Use partial to bind the dependencies that aren't part of the LLM call arguments
280
+ from functools import partial
281
+
282
+ bound_tool_func = partial(
283
+ _run_browser_search_tool,
284
+ task_id=task_id,
285
+ llm=llm,
286
+ browser_config=browser_config,
287
+ stop_event=stop_event,
288
+ max_parallel_browsers=max_parallel_browsers,
289
+ )
290
+
291
+ return StructuredTool.from_function(
292
+ coroutine=bound_tool_func,
293
+ name="parallel_browser_search",
294
+ description=f"""Use this tool to actively search the web for information related to a specific research task or question.
295
+ It runs up to {max_parallel_browsers} searches in parallel using a browser agent for better results than simple scraping.
296
+ Provide a list of distinct search queries(up to {max_parallel_browsers}) that are likely to yield relevant information.""",
297
+ args_schema=BrowserSearchInput,
298
+ )
299
+
300
+
301
+ # --- Langgraph State Definition ---
302
+
303
+
304
+ class ResearchTaskItem(TypedDict):
305
+ # step: int # Maybe step within category, or just implicit by order
306
+ task_description: str
307
+ status: str # "pending", "completed", "failed"
308
+ queries: Optional[List[str]]
309
+ result_summary: Optional[str]
310
+
311
+
312
+ class ResearchCategoryItem(TypedDict):
313
+ category_name: str
314
+ tasks: List[ResearchTaskItem]
315
+ # Optional: category_status: str # Could be "pending", "in_progress", "completed"
316
+
317
+
318
+ class DeepResearchState(TypedDict):
319
+ task_id: str
320
+ topic: str
321
+ research_plan: List[ResearchCategoryItem] # CHANGED
322
+ search_results: List[Dict[str, Any]]
323
+ llm: Any
324
+ tools: List[Tool]
325
+ output_dir: Path
326
+ browser_config: Dict[str, Any]
327
+ final_report: Optional[str]
328
+ current_category_index: int
329
+ current_task_index_in_category: int
330
+ stop_requested: bool
331
+ error_message: Optional[str]
332
+ messages: List[BaseMessage]
333
+
334
+
335
+ # --- Langgraph Nodes ---
336
+
337
+
338
+ def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
339
+ state_updates = {}
340
+ plan_file = os.path.join(output_dir, PLAN_FILENAME)
341
+ search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
342
+
343
+ loaded_plan: List[ResearchCategoryItem] = []
344
+ next_cat_idx, next_task_idx = 0, 0
345
+ found_pending = False
346
+
347
+ if os.path.exists(plan_file):
348
+ try:
349
+ with open(plan_file, "r", encoding="utf-8") as f:
350
+ current_category: Optional[ResearchCategoryItem] = None
351
+ lines = f.readlines()
352
+ cat_counter = 0
353
+ task_counter_in_cat = 0
354
+
355
+ for line_num, line_content in enumerate(lines):
356
+ line = line_content.strip()
357
+ if line.startswith("## "): # Category
358
+ if current_category: # Save previous category
359
+ loaded_plan.append(current_category)
360
+ if not found_pending: # If previous category was all done, advance cat counter
361
+ cat_counter += 1
362
+ task_counter_in_cat = 0
363
+ category_name = line[line.find(" "):].strip() # Get text after "## X. "
364
+ current_category = ResearchCategoryItem(category_name=category_name, tasks=[])
365
+ elif (line.startswith("- [ ]") or line.startswith("- [x]") or line.startswith(
366
+ "- [-]")) and current_category: # Task
367
+ status = "pending"
368
+ if line.startswith("- [x]"):
369
+ status = "completed"
370
+ elif line.startswith("- [-]"):
371
+ status = "failed"
372
+
373
+ task_desc = line[5:].strip()
374
+ current_category["tasks"].append(
375
+ ResearchTaskItem(task_description=task_desc, status=status, queries=None,
376
+ result_summary=None)
377
+ )
378
+ if status == "pending" and not found_pending:
379
+ next_cat_idx = cat_counter
380
+ next_task_idx = task_counter_in_cat
381
+ found_pending = True
382
+ if not found_pending: # only increment if previous tasks were completed/failed
383
+ task_counter_in_cat += 1
384
+
385
+ if current_category: # Append last category
386
+ loaded_plan.append(current_category)
387
+
388
+ if loaded_plan:
389
+ state_updates["research_plan"] = loaded_plan
390
+ if not found_pending and loaded_plan: # All tasks were completed or failed
391
+ next_cat_idx = len(loaded_plan) # Points beyond the last category
392
+ next_task_idx = 0
393
+ state_updates["current_category_index"] = next_cat_idx
394
+ state_updates["current_task_index_in_category"] = next_task_idx
395
+ logger.info(
396
+ f"Loaded hierarchical research plan from {plan_file}. "
397
+ f"Next task: Category {next_cat_idx}, Task {next_task_idx} in category."
398
+ )
399
+ else:
400
+ logger.warning(f"Plan file {plan_file} was empty or malformed.")
401
+
402
+ except Exception as e:
403
+ logger.error(f"Failed to load or parse research plan {plan_file}: {e}", exc_info=True)
404
+ state_updates["error_message"] = f"Failed to load research plan: {e}"
405
+ else:
406
+ logger.info(f"Plan file {plan_file} not found. Will start fresh.")
407
+
408
+ if os.path.exists(search_file):
409
+ try:
410
+ with open(search_file, "r", encoding="utf-8") as f:
411
+ state_updates["search_results"] = json.load(f)
412
+ logger.info(f"Loaded search results from {search_file}")
413
+ except Exception as e:
414
+ logger.error(f"Failed to load search results {search_file}: {e}")
415
+ state_updates["error_message"] = (
416
+ state_updates.get("error_message", "") + f" Failed to load search results: {e}").strip()
417
+
418
+ return state_updates
419
+
420
+
421
+ def _save_plan_to_md(plan: List[ResearchCategoryItem], output_dir: str):
422
+ plan_file = os.path.join(output_dir, PLAN_FILENAME)
423
+ try:
424
+ with open(plan_file, "w", encoding="utf-8") as f:
425
+ f.write(f"# Research Plan\n\n")
426
+ for cat_idx, category in enumerate(plan):
427
+ f.write(f"## {cat_idx + 1}. {category['category_name']}\n\n")
428
+ for task_idx, task in enumerate(category['tasks']):
429
+ marker = "- [x]" if task["status"] == "completed" else "- [ ]" if task[
430
+ "status"] == "pending" else "- [-]" # [-] for failed
431
+ f.write(f" {marker} {task['task_description']}\n")
432
+ f.write("\n")
433
+ logger.info(f"Hierarchical research plan saved to {plan_file}")
434
+ except Exception as e:
435
+ logger.error(f"Failed to save research plan to {plan_file}: {e}")
436
+
437
+
438
+ def _save_search_results_to_json(results: List[Dict[str, Any]], output_dir: str):
439
+ """Appends or overwrites search results to a JSON file."""
440
+ search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
441
+ try:
442
+ # Simple overwrite for now, could be append
443
+ with open(search_file, "w", encoding="utf-8") as f:
444
+ json.dump(results, f, indent=2, ensure_ascii=False)
445
+ logger.info(f"Search results saved to {search_file}")
446
+ except Exception as e:
447
+ logger.error(f"Failed to save search results to {search_file}: {e}")
448
+
449
+
450
+ def _save_report_to_md(report: str, output_dir: Path):
451
+ """Saves the final report to a markdown file."""
452
+ report_file = os.path.join(output_dir, REPORT_FILENAME)
453
+ try:
454
+ with open(report_file, "w", encoding="utf-8") as f:
455
+ f.write(report)
456
+ logger.info(f"Final report saved to {report_file}")
457
+ except Exception as e:
458
+ logger.error(f"Failed to save final report to {report_file}: {e}")
459
+
460
+
461
+ async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
462
+ logger.info("--- Entering Planning Node ---")
463
+ if state.get("stop_requested"):
464
+ logger.info("Stop requested, skipping planning.")
465
+ return {"stop_requested": True}
466
+
467
+ llm = state["llm"]
468
+ topic = state["topic"]
469
+ existing_plan = state.get("research_plan")
470
+ output_dir = state["output_dir"]
471
+
472
+ if existing_plan and (
473
+ state.get("current_category_index", 0) > 0 or state.get("current_task_index_in_category", 0) > 0):
474
+ logger.info("Resuming with existing plan.")
475
+ _save_plan_to_md(existing_plan, output_dir) # Ensure it's saved initially
476
+ # current_category_index and current_task_index_in_category should be set by _load_previous_state
477
+ return {"research_plan": existing_plan}
478
+
479
+ logger.info(f"Generating new research plan for topic: {topic}")
480
+
481
+ prompt_text = f"""You are a meticulous research assistant. Your goal is to create a hierarchical research plan to thoroughly investigate the topic: "{topic}".
482
+ The plan should be structured into several main research categories. Each category should contain a list of specific, actionable research tasks or questions.
483
+ Format the output as a JSON list of objects. Each object represents a research category and should have:
484
+ 1. "category_name": A string for the name of the research category.
485
+ 2. "tasks": A list of strings, where each string is a specific research task for that category.
486
+
487
+ Example JSON Output:
488
+ [
489
+ {{
490
+ "category_name": "Understanding Core Concepts and Definitions",
491
+ "tasks": [
492
+ "Define the primary terminology associated with '{topic}'.",
493
+ "Identify the fundamental principles and theories underpinning '{topic}'."
494
+ ]
495
+ }},
496
+ {{
497
+ "category_name": "Historical Development and Key Milestones",
498
+ "tasks": [
499
+ "Trace the historical evolution of '{topic}'.",
500
+ "Identify key figures, events, or breakthroughs in the development of '{topic}'."
501
+ ]
502
+ }},
503
+ {{
504
+ "category_name": "Current State-of-the-Art and Applications",
505
+ "tasks": [
506
+ "Analyze the current advancements and prominent applications of '{topic}'.",
507
+ "Investigate ongoing research and active areas of development related to '{topic}'."
508
+ ]
509
+ }},
510
+ {{
511
+ "category_name": "Challenges, Limitations, and Future Outlook",
512
+ "tasks": [
513
+ "Identify the major challenges and limitations currently facing '{topic}'.",
514
+ "Explore potential future trends, ethical considerations, and societal impacts of '{topic}'."
515
+ ]
516
+ }}
517
+ ]
518
+
519
+ Generate a plan with 3-10 categories, and 2-6 tasks per category for the topic: "{topic}" according to the complexity of the topic.
520
+ Ensure the output is a valid JSON array.
521
+ """
522
+ messages = [
523
+ SystemMessage(content="You are a research planning assistant outputting JSON."),
524
+ HumanMessage(content=prompt_text)
525
+ ]
526
+
527
+ try:
528
+ response = await llm.ainvoke(messages)
529
+ raw_content = response.content
530
+ # The LLM might wrap the JSON in backticks
531
+ if raw_content.strip().startswith("```json"):
532
+ raw_content = raw_content.strip()[7:-3].strip()
533
+ elif raw_content.strip().startswith("```"):
534
+ raw_content = raw_content.strip()[3:-3].strip()
535
+
536
+ logger.debug(f"LLM response for plan: {raw_content}")
537
+ parsed_plan_from_llm = json.loads(raw_content)
538
+
539
+ new_plan: List[ResearchCategoryItem] = []
540
+ for cat_idx, category_data in enumerate(parsed_plan_from_llm):
541
+ if not isinstance(category_data,
542
+ dict) or "category_name" not in category_data or "tasks" not in category_data:
543
+ logger.warning(f"Skipping invalid category data: {category_data}")
544
+ continue
545
+
546
+ tasks: List[ResearchTaskItem] = []
547
+ for task_idx, task_desc in enumerate(category_data["tasks"]):
548
+ if isinstance(task_desc, str):
549
+ tasks.append(
550
+ ResearchTaskItem(
551
+ task_description=task_desc,
552
+ status="pending",
553
+ queries=None,
554
+ result_summary=None,
555
+ )
556
+ )
557
+ else: # Sometimes LLM puts tasks as {"task": "description"}
558
+ if isinstance(task_desc, dict) and "task_description" in task_desc:
559
+ tasks.append(
560
+ ResearchTaskItem(
561
+ task_description=task_desc["task_description"],
562
+ status="pending",
563
+ queries=None,
564
+ result_summary=None,
565
+ )
566
+ )
567
+ elif isinstance(task_desc, dict) and "task" in task_desc: # common LLM mistake
568
+ tasks.append(
569
+ ResearchTaskItem(
570
+ task_description=task_desc["task"],
571
+ status="pending",
572
+ queries=None,
573
+ result_summary=None,
574
+ )
575
+ )
576
+ else:
577
+ logger.warning(
578
+ f"Skipping invalid task data: {task_desc} in category {category_data['category_name']}")
579
+
580
+ new_plan.append(
581
+ ResearchCategoryItem(
582
+ category_name=category_data["category_name"],
583
+ tasks=tasks,
584
+ )
585
+ )
586
+
587
+ if not new_plan:
588
+ logger.error("LLM failed to generate a valid plan structure from JSON.")
589
+ return {"error_message": "Failed to generate research plan structure."}
590
+
591
+ logger.info(f"Generated research plan with {len(new_plan)} categories.")
592
+ _save_plan_to_md(new_plan, output_dir) # Save the hierarchical plan
593
+
594
+ return {
595
+ "research_plan": new_plan,
596
+ "current_category_index": 0,
597
+ "current_task_index_in_category": 0,
598
+ "search_results": [],
599
+ }
600
+
601
+ except json.JSONDecodeError as e:
602
+ logger.error(f"Failed to parse JSON from LLM for plan: {e}. Response was: {raw_content}", exc_info=True)
603
+ return {"error_message": f"LLM generated invalid JSON for research plan: {e}"}
604
+ except Exception as e:
605
+ logger.error(f"Error during planning: {e}", exc_info=True)
606
+ return {"error_message": f"LLM Error during planning: {e}"}
607
+
608
+
609
+ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
610
+ logger.info("--- Entering Research Execution Node ---")
611
+ if state.get("stop_requested"):
612
+ logger.info("Stop requested, skipping research execution.")
613
+ return {
614
+ "stop_requested": True,
615
+ "current_category_index": state["current_category_index"],
616
+ "current_task_index_in_category": state["current_task_index_in_category"],
617
+ }
618
+
619
+ plan = state["research_plan"]
620
+ cat_idx = state["current_category_index"]
621
+ task_idx = state["current_task_index_in_category"]
622
+ llm = state["llm"]
623
+ tools = state["tools"]
624
+ output_dir = str(state["output_dir"])
625
+ task_id = state["task_id"] # For _AGENT_STOP_FLAGS
626
+
627
+ # This check should ideally be handled by `should_continue`
628
+ if not plan or cat_idx >= len(plan):
629
+ logger.info("Research plan complete or categories exhausted.")
630
+ return {} # should route to synthesis
631
+
632
+ current_category = plan[cat_idx]
633
+ if task_idx >= len(current_category["tasks"]):
634
+ logger.info(f"All tasks in category '{current_category['category_name']}' completed. Moving to next category.")
635
+ # This logic is now effectively handled by should_continue and the index updates below
636
+ # The next iteration will be caught by should_continue or this node with updated indices
637
+ return {
638
+ "current_category_index": cat_idx + 1,
639
+ "current_task_index_in_category": 0,
640
+ "messages": state["messages"] # Pass messages along
641
+ }
642
+
643
+ current_task = current_category["tasks"][task_idx]
644
+
645
+ if current_task["status"] == "completed":
646
+ logger.info(
647
+ f"Task '{current_task['task_description']}' in category '{current_category['category_name']}' already completed. Skipping.")
648
+ # Logic to find next task
649
+ next_task_idx = task_idx + 1
650
+ next_cat_idx = cat_idx
651
+ if next_task_idx >= len(current_category["tasks"]):
652
+ next_cat_idx += 1
653
+ next_task_idx = 0
654
+ return {
655
+ "current_category_index": next_cat_idx,
656
+ "current_task_index_in_category": next_task_idx,
657
+ "messages": state["messages"] # Pass messages along
658
+ }
659
+
660
+ logger.info(
661
+ f"Executing research task: '{current_task['task_description']}' (Category: '{current_category['category_name']}')"
662
+ )
663
+
664
+ llm_with_tools = llm.bind_tools(tools)
665
+
666
+ # Construct messages for LLM invocation
667
+ task_prompt_content = (
668
+ f"Current Research Category: {current_category['category_name']}\n"
669
+ f"Specific Task: {current_task['task_description']}\n\n"
670
+ "Please use the available tools, especially 'parallel_browser_search', to gather information for this specific task. "
671
+ "Provide focused search queries relevant ONLY to this task. "
672
+ "If you believe you have sufficient information from previous steps for this specific task, you can indicate that you are ready to summarize or that no further search is needed."
673
+ )
674
+ current_task_message_history = [
675
+ HumanMessage(content=task_prompt_content)
676
+ ]
677
+ if not state["messages"]: # First actual execution message
678
+ invocation_messages = [
679
+ SystemMessage(
680
+ content="You are a research assistant executing one task of a research plan. Focus on the current task only."),
681
+ ] + current_task_message_history
682
+ else:
683
+ invocation_messages = state["messages"] + current_task_message_history
684
+
685
+ try:
686
+ logger.info(f"Invoking LLM with tools for task: {current_task['task_description']}")
687
+ ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
688
+ logger.info("LLM invocation complete.")
689
+
690
+ tool_results = []
691
+ executed_tool_names = []
692
+ current_search_results = state.get("search_results", []) # Get existing search results
693
+
694
+ if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
695
+ logger.warning(
696
+ f"LLM did not call any tool for task '{current_task['task_description']}'. Response: {ai_response.content[:100]}..."
697
+ )
698
+ current_task["status"] = "pending" # Or "completed_no_tool" if LLM explains it's done
699
+ current_task["result_summary"] = f"LLM did not use a tool. Response: {ai_response.content}"
700
+ current_task["current_category_index"] = cat_idx
701
+ current_task["current_task_index_in_category"] = task_idx
702
+ return current_task
703
+ # We still save the plan and advance.
704
+ else:
705
+ # Process tool calls
706
+ for tool_call in ai_response.tool_calls:
707
+ tool_name = tool_call.get("name")
708
+ tool_args = tool_call.get("args", {})
709
+ tool_call_id = tool_call.get("id")
710
+
711
+ logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
712
+ executed_tool_names.append(tool_name)
713
+ selected_tool = next((t for t in tools if t.name == tool_name), None)
714
+
715
+ if not selected_tool:
716
+ logger.error(f"LLM called tool '{tool_name}' which is not available.")
717
+ tool_results.append(
718
+ ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id=tool_call_id))
719
+ continue
720
+
721
+ try:
722
+ stop_event = _AGENT_STOP_FLAGS.get(task_id)
723
+ if stop_event and stop_event.is_set():
724
+ logger.info(f"Stop requested before executing tool: {tool_name}")
725
+ current_task["status"] = "pending" # Or a new "stopped" status
726
+ _save_plan_to_md(plan, output_dir)
727
+ return {"stop_requested": True, "research_plan": plan, "current_category_index": cat_idx,
728
+ "current_task_index_in_category": task_idx}
729
+
730
+ logger.info(f"Executing tool: {tool_name}")
731
+ tool_output = await selected_tool.ainvoke(tool_args)
732
+ logger.info(f"Tool '{tool_name}' executed successfully.")
733
+
734
+ if tool_name == "parallel_browser_search":
735
+ current_search_results.extend(tool_output) # tool_output is List[Dict]
736
+ else: # For other tools, we might need specific handling or just log
737
+ logger.info(f"Result from tool '{tool_name}': {str(tool_output)[:200]}...")
738
+ # Storing non-browser results might need a different structure or key in search_results
739
+ current_search_results.append(
740
+ {"tool_name": tool_name, "args": tool_args, "output": str(tool_output),
741
+ "status": "completed"})
742
+
743
+ tool_results.append(ToolMessage(content=json.dumps(tool_output), tool_call_id=tool_call_id))
744
+
745
+ except Exception as e:
746
+ logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
747
+ tool_results.append(
748
+ ToolMessage(content=f"Error executing tool {tool_name}: {e}", tool_call_id=tool_call_id))
749
+ current_search_results.append(
750
+ {"tool_name": tool_name, "args": tool_args, "status": "failed", "error": str(e)})
751
+
752
+ # After processing all tool calls for this task
753
+ step_failed_tool_execution = any("Error:" in str(tr.content) for tr in tool_results)
754
+ # Consider a task successful if a browser search was attempted and didn't immediately error out during call
755
+ # The browser search itself returns status for each query.
756
+ browser_tool_attempted_successfully = "parallel_browser_search" in executed_tool_names and not step_failed_tool_execution
757
+
758
+ if step_failed_tool_execution:
759
+ current_task["status"] = "failed"
760
+ current_task[
761
+ "result_summary"] = f"Tool execution failed. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
762
+ elif executed_tool_names: # If any tool was called
763
+ current_task["status"] = "completed"
764
+ current_task["result_summary"] = f"Executed tool(s): {', '.join(executed_tool_names)}."
765
+ # TODO: Could ask LLM to summarize the tool_results for this task if needed, rather than just listing tools.
766
+ else: # No tool calls but AI response had .tool_calls structure (empty)
767
+ current_task["status"] = "failed" # Or a more specific status
768
+ current_task["result_summary"] = "LLM prepared for tool call but provided no tools."
769
+
770
+ # Save progress
771
+ _save_plan_to_md(plan, output_dir)
772
+ _save_search_results_to_json(current_search_results, output_dir)
773
+
774
+ # Determine next indices
775
+ next_task_idx = task_idx + 1
776
+ next_cat_idx = cat_idx
777
+ if next_task_idx >= len(current_category["tasks"]):
778
+ next_cat_idx += 1
779
+ next_task_idx = 0
780
+
781
+ updated_messages = state["messages"] + current_task_message_history + [ai_response] + tool_results
782
+
783
+ return {
784
+ "research_plan": plan,
785
+ "search_results": current_search_results,
786
+ "current_category_index": next_cat_idx,
787
+ "current_task_index_in_category": next_task_idx,
788
+ "messages": updated_messages,
789
+ }
790
+
791
+ except Exception as e:
792
+ logger.error(f"Unhandled error during research execution for task '{current_task['task_description']}': {e}",
793
+ exc_info=True)
794
+ current_task["status"] = "failed"
795
+ _save_plan_to_md(plan, output_dir)
796
+ # Determine next indices even on error to attempt to move on
797
+ next_task_idx = task_idx + 1
798
+ next_cat_idx = cat_idx
799
+ if next_task_idx >= len(current_category["tasks"]):
800
+ next_cat_idx += 1
801
+ next_task_idx = 0
802
+ return {
803
+ "research_plan": plan,
804
+ "current_category_index": next_cat_idx,
805
+ "current_task_index_in_category": next_task_idx,
806
+ "error_message": f"Core Execution Error on task '{current_task['task_description']}': {e}",
807
+ "messages": state["messages"] + current_task_message_history # Preserve messages up to error
808
+ }
809
+
810
+
811
+ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
812
+ """Synthesizes the final report from the collected search results."""
813
+ logger.info("--- Entering Synthesis Node ---")
814
+ if state.get("stop_requested"):
815
+ logger.info("Stop requested, skipping synthesis.")
816
+ return {"stop_requested": True}
817
+
818
+ llm = state["llm"]
819
+ topic = state["topic"]
820
+ search_results = state.get("search_results", [])
821
+ output_dir = state["output_dir"]
822
+ plan = state["research_plan"] # Include plan for context
823
+
824
+ if not search_results:
825
+ logger.warning("No search results found to synthesize report.")
826
+ report = f"# Research Report: {topic}\n\nNo information was gathered during the research process."
827
+ _save_report_to_md(report, output_dir)
828
+ return {"final_report": report}
829
+
830
+ logger.info(
831
+ f"Synthesizing report from {len(search_results)} collected search result entries."
832
+ )
833
+
834
+ # Prepare context for the LLM
835
+ # Format search results nicely, maybe group by query or original plan step
836
+ formatted_results = ""
837
+ references = {}
838
+ ref_count = 1
839
+ for i, result_entry in enumerate(search_results):
840
+ query = result_entry.get("query", "Unknown Query") # From parallel_browser_search
841
+ tool_name = result_entry.get("tool_name") # From other tools
842
+ status = result_entry.get("status", "unknown")
843
+ result_data = result_entry.get("result") # From BrowserUseAgent's final_result
844
+ tool_output_str = result_entry.get("output") # From other tools
845
+
846
+ if tool_name == "parallel_browser_search" and status == "completed" and result_data:
847
+ # result_data is the summary from BrowserUseAgent
848
+ formatted_results += f'### Finding from Web Search Query: "{query}"\n'
849
+ formatted_results += f"- **Summary:**\n{result_data}\n" # result_data is already a summary string here
850
+ # If result_data contained title/URL, you'd format them here.
851
+ # The current BrowserUseAgent returns a string summary directly as 'final_data' in run_single_browser_task
852
+ formatted_results += "---\n"
853
+ elif tool_name != "parallel_browser_search" and status == "completed" and tool_output_str:
854
+ formatted_results += f'### Finding from Tool: "{tool_name}" (Args: {result_entry.get("args")})\n'
855
+ formatted_results += f"- **Output:**\n{tool_output_str}\n"
856
+ formatted_results += "---\n"
857
+ elif status == "failed":
858
+ error = result_entry.get("error")
859
+ q_or_t = f"Query: \"{query}\"" if query != "Unknown Query" else f"Tool: \"{tool_name}\""
860
+ formatted_results += f'### Failed {q_or_t}\n'
861
+ formatted_results += f"- **Error:** {error}\n"
862
+ formatted_results += "---\n"
863
+
864
+ # Prepare the research plan context
865
+ plan_summary = "\nResearch Plan Followed:\n"
866
+ for cat_idx, category in enumerate(plan):
867
+ plan_summary += f"\n#### Category {cat_idx + 1}: {category['category_name']}\n"
868
+ for task_idx, task in enumerate(category['tasks']):
869
+ marker = "[x]" if task["status"] == "completed" else "[ ]" if task["status"] == "pending" else "[-]"
870
+ plan_summary += f" - {marker} {task['task_description']}\n"
871
+
872
+ synthesis_prompt = ChatPromptTemplate.from_messages(
873
+ [
874
+ (
875
+ "system",
876
+ """You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
877
+ The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
878
+ Structure the report logically:
879
+ 1. Briefly introduce the topic and the report's scope (mentioning the research plan followed, including categories and tasks, is good).
880
+ 2. Discuss the key findings, organizing them thematically, possibly aligning with the research categories. Analyze, compare, and contrast information.
881
+ 3. Summarize the main points and offer concluding thoughts.
882
+
883
+ Ensure the tone is objective and professional.
884
+ If findings are contradictory or incomplete, acknowledge this.
885
+ """, # Removed citation part for simplicity for now, as browser agent returns summaries.
886
+ ),
887
+ (
888
+ "human",
889
+ f"""
890
+ **Research Topic:** {topic}
891
+
892
+ {plan_summary}
893
+
894
+ **Collected Findings:**
895
+ ```
896
+ {formatted_results}
897
+ ```
898
+
899
+ Please generate the final research report in Markdown format based **only** on the information above.
900
+ """,
901
+ ),
902
+ ]
903
+ )
904
+
905
+ try:
906
+ response = await llm.ainvoke(
907
+ synthesis_prompt.format_prompt(
908
+ topic=topic,
909
+ plan_summary=plan_summary,
910
+ formatted_results=formatted_results,
911
+ ).to_messages()
912
+ )
913
+ final_report_md = response.content
914
+
915
+ # Append the reference list automatically to the end of the generated markdown
916
+ if references:
917
+ report_references_section = "\n\n## References\n\n"
918
+ # Sort refs by ID for consistent output
919
+ sorted_refs = sorted(references.values(), key=lambda x: x["id"])
920
+ for ref in sorted_refs:
921
+ report_references_section += (
922
+ f"[{ref['id']}] {ref['title']} - {ref['url']}\n"
923
+ )
924
+ final_report_md += report_references_section
925
+
926
+ logger.info("Successfully synthesized the final report.")
927
+ _save_report_to_md(final_report_md, output_dir)
928
+ return {"final_report": final_report_md}
929
+
930
+ except Exception as e:
931
+ logger.error(f"Error during report synthesis: {e}", exc_info=True)
932
+ return {"error_message": f"LLM Error during synthesis: {e}"}
933
+
934
+
935
+ # --- Langgraph Edges and Conditional Logic ---
936
+
937
+
938
+ def should_continue(state: DeepResearchState) -> str:
939
+ logger.info("--- Evaluating Condition: Should Continue? ---")
940
+ if state.get("stop_requested"):
941
+ logger.info("Stop requested, routing to END.")
942
+ return "end_run"
943
+ if state.get("error_message") and "Core Execution Error" in state["error_message"]: # Critical error in node
944
+ logger.warning(f"Critical error detected: {state['error_message']}. Routing to END.")
945
+ return "end_run"
946
+
947
+ plan = state.get("research_plan")
948
+ cat_idx = state.get("current_category_index", 0)
949
+ task_idx = state.get("current_task_index_in_category", 0) # This is the *next* task to check
950
+
951
+ if not plan:
952
+ logger.warning("No research plan found. Routing to END.")
953
+ return "end_run"
954
+
955
+ # Check if the current indices point to a valid pending task
956
+ if cat_idx < len(plan):
957
+ current_category = plan[cat_idx]
958
+ if task_idx < len(current_category["tasks"]):
959
+ # We are trying to execute the task at plan[cat_idx]["tasks"][task_idx]
960
+ # The research_execution_node will handle if it's already completed.
961
+ logger.info(
962
+ f"Plan has potential pending tasks (next up: Category {cat_idx}, Task {task_idx}). Routing to Research Execution."
963
+ )
964
+ return "execute_research"
965
+ else: # task_idx is out of bounds for current category, means we need to check next category
966
+ if cat_idx + 1 < len(plan): # If there is a next category
967
+ logger.info(
968
+ f"Finished tasks in category {cat_idx}. Moving to category {cat_idx + 1}. Routing to Research Execution."
969
+ )
970
+ # research_execution_node will update state to {current_category_index: cat_idx + 1, current_task_index_in_category: 0}
971
+ # Or rather, the previous execution node already set these indices to the start of the next category.
972
+ return "execute_research"
973
+
974
+ # If we've gone through all categories and tasks (cat_idx >= len(plan))
975
+ logger.info("All plan categories and tasks processed or current indices are out of bounds. Routing to Synthesis.")
976
+ return "synthesize_report"
977
+
978
+
979
+ # --- DeepSearchAgent Class ---
980
+
981
+
982
+ class DeepResearchAgent:
983
+ def __init__(
984
+ self,
985
+ llm: Any,
986
+ browser_config: Dict[str, Any],
987
+ mcp_server_config: Optional[Dict[str, Any]] = None,
988
+ ):
989
+ """
990
+ Initializes the DeepSearchAgent.
991
+
992
+ Args:
993
+ llm: The Langchain compatible language model instance.
994
+ browser_config: Configuration dictionary for the BrowserUseAgent tool.
995
+ Example: {"headless": True, "window_width": 1280, ...}
996
+ mcp_server_config: Optional configuration for the MCP client.
997
+ """
998
+ self.llm = llm
999
+ self.browser_config = browser_config
1000
+ self.mcp_server_config = mcp_server_config
1001
+ self.mcp_client = None
1002
+ self.stopped = False
1003
+ self.graph = self._compile_graph()
1004
+ self.current_task_id: Optional[str] = None
1005
+ self.stop_event: Optional[threading.Event] = None
1006
+ self.runner: Optional[asyncio.Task] = None # To hold the asyncio task for run
1007
+
1008
+ async def _setup_tools(
1009
+ self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
1010
+ ) -> List[Tool]:
1011
+ """Sets up the basic tools (File I/O) and optional MCP tools."""
1012
+ tools = [
1013
+ WriteFileTool(),
1014
+ ReadFileTool(),
1015
+ ListDirectoryTool(),
1016
+ ] # Basic file operations
1017
+ browser_use_tool = create_browser_search_tool(
1018
+ llm=self.llm,
1019
+ browser_config=self.browser_config,
1020
+ task_id=task_id,
1021
+ stop_event=stop_event,
1022
+ max_parallel_browsers=max_parallel_browsers,
1023
+ )
1024
+ tools += [browser_use_tool]
1025
+ # Add MCP tools if config is provided
1026
+ if self.mcp_server_config:
1027
+ try:
1028
+ logger.info("Setting up MCP client and tools...")
1029
+ if not self.mcp_client:
1030
+ self.mcp_client = await setup_mcp_client_and_tools(
1031
+ self.mcp_server_config
1032
+ )
1033
+ mcp_tools = self.mcp_client.get_tools()
1034
+ logger.info(f"Loaded {len(mcp_tools)} MCP tools.")
1035
+ tools.extend(mcp_tools)
1036
+ except Exception as e:
1037
+ logger.error(f"Failed to set up MCP tools: {e}", exc_info=True)
1038
+ elif self.mcp_server_config:
1039
+ logger.warning(
1040
+ "MCP server config provided, but setup function unavailable."
1041
+ )
1042
+ tools_map = {tool.name: tool for tool in tools}
1043
+ return tools_map.values()
1044
+
1045
+ async def close_mcp_client(self):
1046
+ if self.mcp_client:
1047
+ await self.mcp_client.__aexit__(None, None, None)
1048
+ self.mcp_client = None
1049
+
1050
+ def _compile_graph(self) -> StateGraph:
1051
+ """Compiles the Langgraph state machine."""
1052
+ workflow = StateGraph(DeepResearchState)
1053
+
1054
+ # Add nodes
1055
+ workflow.add_node("plan_research", planning_node)
1056
+ workflow.add_node("execute_research", research_execution_node)
1057
+ workflow.add_node("synthesize_report", synthesis_node)
1058
+ workflow.add_node(
1059
+ "end_run", lambda state: logger.info("--- Reached End Run Node ---") or {}
1060
+ ) # Simple end node
1061
+
1062
+ # Define edges
1063
+ workflow.set_entry_point("plan_research")
1064
+
1065
+ workflow.add_edge(
1066
+ "plan_research", "execute_research"
1067
+ ) # Always execute after planning
1068
+
1069
+ # Conditional edge after execution
1070
+ workflow.add_conditional_edges(
1071
+ "execute_research",
1072
+ should_continue,
1073
+ {
1074
+ "execute_research": "execute_research", # Loop back if more steps
1075
+ "synthesize_report": "synthesize_report", # Move to synthesis if done
1076
+ "end_run": "end_run", # End if stop requested or error
1077
+ },
1078
+ )
1079
+
1080
+ workflow.add_edge("synthesize_report", "end_run") # End after synthesis
1081
+
1082
+ app = workflow.compile()
1083
+ return app
1084
+
1085
+ async def run(
1086
+ self,
1087
+ topic: str,
1088
+ task_id: Optional[str] = None,
1089
+ save_dir: str = "./tmp/deep_research",
1090
+ max_parallel_browsers: int = 1,
1091
+ ) -> Dict[str, Any]:
1092
+ """
1093
+ Starts the deep research process (Async Generator Version).
1094
+
1095
+ Args:
1096
+ topic: The research topic.
1097
+ task_id: Optional existing task ID to resume. If None, a new ID is generated.
1098
+
1099
+ Yields:
1100
+ Intermediate state updates or messages during execution.
1101
+ """
1102
+ if self.runner and not self.runner.done():
1103
+ logger.warning(
1104
+ "Agent is already running. Please stop the current task first."
1105
+ )
1106
+ # Return an error status instead of yielding
1107
+ return {
1108
+ "status": "error",
1109
+ "message": "Agent already running.",
1110
+ "task_id": self.current_task_id,
1111
+ }
1112
+
1113
+ self.current_task_id = task_id if task_id else str(uuid.uuid4())
1114
+ output_dir = os.path.join(save_dir, self.current_task_id)
1115
+ os.makedirs(output_dir, exist_ok=True)
1116
+
1117
+ logger.info(
1118
+ f"[AsyncGen] Starting research task ID: {self.current_task_id} for topic: '{topic}'"
1119
+ )
1120
+ logger.info(f"[AsyncGen] Output directory: {output_dir}")
1121
+
1122
+ self.stop_event = threading.Event()
1123
+ _AGENT_STOP_FLAGS[self.current_task_id] = self.stop_event
1124
+ agent_tools = await self._setup_tools(
1125
+ self.current_task_id, self.stop_event, max_parallel_browsers
1126
+ )
1127
+ initial_state: DeepResearchState = {
1128
+ "task_id": self.current_task_id,
1129
+ "topic": topic,
1130
+ "research_plan": [],
1131
+ "search_results": [],
1132
+ "messages": [],
1133
+ "llm": self.llm,
1134
+ "tools": agent_tools,
1135
+ "output_dir": Path(output_dir),
1136
+ "browser_config": self.browser_config,
1137
+ "final_report": None,
1138
+ "current_category_index": 0,
1139
+ "current_task_index_in_category": 0,
1140
+ "stop_requested": False,
1141
+ "error_message": None,
1142
+ }
1143
+
1144
+ if task_id:
1145
+ logger.info(f"Attempting to resume task {task_id}...")
1146
+ loaded_state = _load_previous_state(task_id, output_dir)
1147
+ initial_state.update(loaded_state)
1148
+ if loaded_state.get("research_plan"):
1149
+ logger.info(
1150
+ f"Resuming with {len(loaded_state['research_plan'])} plan categories "
1151
+ f"and {len(loaded_state.get('search_results', []))} existing results. "
1152
+ f"Next task: Cat {initial_state['current_category_index']}, Task {initial_state['current_task_index_in_category']}"
1153
+ )
1154
+ initial_state["topic"] = (
1155
+ topic # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
1156
+ )
1157
+ else:
1158
+ logger.warning(
1159
+ f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
1160
+ )
1161
+
1162
+ # --- Execute Graph using ainvoke ---
1163
+ final_state = None
1164
+ status = "unknown"
1165
+ message = None
1166
+ try:
1167
+ logger.info(f"Invoking graph execution for task {self.current_task_id}...")
1168
+ self.runner = asyncio.create_task(self.graph.ainvoke(initial_state))
1169
+ final_state = await self.runner
1170
+ logger.info(f"Graph execution finished for task {self.current_task_id}.")
1171
+
1172
+ # Determine status based on final state
1173
+ if self.stop_event and self.stop_event.is_set():
1174
+ status = "stopped"
1175
+ message = "Research process was stopped by request."
1176
+ logger.info(message)
1177
+ elif final_state and final_state.get("error_message"):
1178
+ status = "error"
1179
+ message = final_state["error_message"]
1180
+ logger.error(f"Graph execution completed with error: {message}")
1181
+ elif final_state and final_state.get("final_report"):
1182
+ status = "completed"
1183
+ message = "Research process completed successfully."
1184
+ logger.info(message)
1185
+ else:
1186
+ # If it ends without error/report (e.g., empty plan, stopped before synthesis)
1187
+ status = "finished_incomplete"
1188
+ message = "Research process finished, but may be incomplete (no final report generated)."
1189
+ logger.warning(message)
1190
+
1191
+ except asyncio.CancelledError:
1192
+ status = "cancelled"
1193
+ message = f"Agent run task cancelled for {self.current_task_id}."
1194
+ logger.info(message)
1195
+ # final_state will remain None or the state before cancellation if checkpointing was used
1196
+ except Exception as e:
1197
+ status = "error"
1198
+ message = f"Unhandled error during graph execution for {self.current_task_id}: {e}"
1199
+ logger.error(message, exc_info=True)
1200
+ # final_state will remain None or the state before the error
1201
+ finally:
1202
+ logger.info(f"Cleaning up resources for task {self.current_task_id}")
1203
+ task_id_to_clean = self.current_task_id
1204
+
1205
+ self.stop_event = None
1206
+ self.current_task_id = None
1207
+ self.runner = None # Mark runner as finished
1208
+ if self.mcp_client:
1209
+ await self.mcp_client.__aexit__(None, None, None)
1210
+
1211
+ # Return a result dictionary including the status and the final state if available
1212
+ return {
1213
+ "status": status,
1214
+ "message": message,
1215
+ "task_id": task_id_to_clean, # Use the stored task_id
1216
+ "final_state": final_state
1217
+ if final_state
1218
+ else {}, # Return the final state dict
1219
+ }
1220
+
1221
+ async def _stop_lingering_browsers(self, task_id):
1222
+ """Attempts to stop any BrowserUseAgent instances associated with the task_id."""
1223
+ keys_to_stop = [
1224
+ key for key in _BROWSER_AGENT_INSTANCES if key.startswith(f"{task_id}_")
1225
+ ]
1226
+ if not keys_to_stop:
1227
+ return
1228
+
1229
+ logger.warning(
1230
+ f"Found {len(keys_to_stop)} potentially lingering browser agents for task {task_id}. Attempting stop..."
1231
+ )
1232
+ for key in keys_to_stop:
1233
+ agent_instance = _BROWSER_AGENT_INSTANCES.get(key)
1234
+ try:
1235
+ if agent_instance:
1236
+ # Assuming BU agent has an async stop method
1237
+ await agent_instance.stop()
1238
+ logger.info(f"Called stop() on browser agent instance {key}")
1239
+ except Exception as e:
1240
+ logger.error(
1241
+ f"Error calling stop() on browser agent instance {key}: {e}"
1242
+ )
1243
+
1244
+ async def stop(self):
1245
+ """Signals the currently running agent task to stop."""
1246
+ if not self.current_task_id or not self.stop_event:
1247
+ logger.info("No agent task is currently running.")
1248
+ return
1249
+
1250
+ logger.info(f"Stop requested for task ID: {self.current_task_id}")
1251
+ self.stop_event.set() # Signal the stop event
1252
+ self.stopped = True
1253
+ await self._stop_lingering_browsers(self.current_task_id)
1254
+
1255
+ def close(self):
1256
+ self.stopped = False
src/browser/__init__.py ADDED
File without changes
src/browser/custom_browser.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import pdb
3
+
4
+ from playwright.async_api import Browser as PlaywrightBrowser
5
+ from playwright.async_api import (
6
+ BrowserContext as PlaywrightBrowserContext,
7
+ )
8
+ from playwright.async_api import (
9
+ Playwright,
10
+ async_playwright,
11
+ )
12
+ from browser_use.browser.browser import Browser, IN_DOCKER
13
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
14
+ from playwright.async_api import BrowserContext as PlaywrightBrowserContext
15
+ import logging
16
+
17
+ from browser_use.browser.chrome import (
18
+ CHROME_ARGS,
19
+ CHROME_DETERMINISTIC_RENDERING_ARGS,
20
+ CHROME_DISABLE_SECURITY_ARGS,
21
+ CHROME_DOCKER_ARGS,
22
+ CHROME_HEADLESS_ARGS,
23
+ )
24
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
25
+ from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
26
+ from browser_use.utils import time_execution_async
27
+ import socket
28
+
29
+ from .custom_context import CustomBrowserContext
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class CustomBrowser(Browser):
35
+
36
+ async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
37
+ """Create a browser context"""
38
+ browser_config = self.config.model_dump() if self.config else {}
39
+ context_config = config.model_dump() if config else {}
40
+ merged_config = {**browser_config, **context_config}
41
+ return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
42
+
43
+ async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
44
+ """Sets up and returns a Playwright Browser instance with anti-detection measures."""
45
+ assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
46
+
47
+ # Use the configured window size from new_context_config if available
48
+ if (
49
+ not self.config.headless
50
+ and hasattr(self.config, 'new_context_config')
51
+ and hasattr(self.config.new_context_config, 'window_width')
52
+ and hasattr(self.config.new_context_config, 'window_height')
53
+ ):
54
+ screen_size = {
55
+ 'width': self.config.new_context_config.window_width,
56
+ 'height': self.config.new_context_config.window_height,
57
+ }
58
+ offset_x, offset_y = get_window_adjustments()
59
+ elif self.config.headless:
60
+ screen_size = {'width': 1920, 'height': 1080}
61
+ offset_x, offset_y = 0, 0
62
+ else:
63
+ screen_size = get_screen_resolution()
64
+ offset_x, offset_y = get_window_adjustments()
65
+
66
+ chrome_args = {
67
+ f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
68
+ *CHROME_ARGS,
69
+ *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
70
+ *(CHROME_HEADLESS_ARGS if self.config.headless else []),
71
+ *(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
72
+ *(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
73
+ f'--window-position={offset_x},{offset_y}',
74
+ f'--window-size={screen_size["width"]},{screen_size["height"]}',
75
+ *self.config.extra_browser_args,
76
+ }
77
+
78
+ # check if chrome remote debugging port is already taken,
79
+ # if so remove the remote-debugging-port arg to prevent conflicts
80
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
81
+ if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
82
+ chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
83
+
84
+ browser_class = getattr(playwright, self.config.browser_class)
85
+ args = {
86
+ 'chromium': list(chrome_args),
87
+ 'firefox': [
88
+ *{
89
+ '-no-remote',
90
+ *self.config.extra_browser_args,
91
+ }
92
+ ],
93
+ 'webkit': [
94
+ *{
95
+ '--no-startup-window',
96
+ *self.config.extra_browser_args,
97
+ }
98
+ ],
99
+ }
100
+
101
+ browser = await browser_class.launch(
102
+ channel='chromium', # https://github.com/microsoft/playwright/issues/33566
103
+ headless=self.config.headless,
104
+ args=args[self.config.browser_class],
105
+ proxy=self.config.proxy.model_dump() if self.config.proxy else None,
106
+ handle_sigterm=False,
107
+ handle_sigint=False,
108
+ )
109
+ return browser
src/browser/custom_context.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+
5
+ from browser_use.browser.browser import Browser, IN_DOCKER
6
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
7
+ from playwright.async_api import Browser as PlaywrightBrowser
8
+ from playwright.async_api import BrowserContext as PlaywrightBrowserContext
9
+ from typing import Optional
10
+ from browser_use.browser.context import BrowserContextState
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class CustomBrowserContext(BrowserContext):
16
+ def __init__(
17
+ self,
18
+ browser: 'Browser',
19
+ config: BrowserContextConfig | None = None,
20
+ state: Optional[BrowserContextState] = None,
21
+ ):
22
+ super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
src/controller/__init__.py ADDED
File without changes
src/controller/custom_controller.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdb
2
+
3
+ import pyperclip
4
+ from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
5
+ from pydantic import BaseModel
6
+ from browser_use.agent.views import ActionResult
7
+ from browser_use.browser.context import BrowserContext
8
+ from browser_use.controller.service import Controller, DoneAction
9
+ from browser_use.controller.registry.service import Registry, RegisteredAction
10
+ from main_content_extractor import MainContentExtractor
11
+ from browser_use.controller.views import (
12
+ ClickElementAction,
13
+ DoneAction,
14
+ ExtractPageContentAction,
15
+ GoToUrlAction,
16
+ InputTextAction,
17
+ OpenTabAction,
18
+ ScrollAction,
19
+ SearchGoogleAction,
20
+ SendKeysAction,
21
+ SwitchTabAction,
22
+ )
23
+ import logging
24
+ import inspect
25
+ import asyncio
26
+ import os
27
+ from langchain_core.language_models.chat_models import BaseChatModel
28
+ from browser_use.agent.views import ActionModel, ActionResult
29
+
30
+ from src.utils.mcp_client import create_tool_param_model, setup_mcp_client_and_tools
31
+
32
+ from browser_use.utils import time_execution_sync
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ Context = TypeVar('Context')
37
+
38
+
39
+ class CustomController(Controller):
40
+ def __init__(self, exclude_actions: list[str] = [],
41
+ output_model: Optional[Type[BaseModel]] = None,
42
+ ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[
43
+ [str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None,
44
+ ):
45
+ super().__init__(exclude_actions=exclude_actions, output_model=output_model)
46
+ self._register_custom_actions()
47
+ self.ask_assistant_callback = ask_assistant_callback
48
+ self.mcp_client = None
49
+ self.mcp_server_config = None
50
+
51
+ def _register_custom_actions(self):
52
+ """Register all custom browser actions"""
53
+
54
+ @self.registry.action(
55
+ "When executing tasks, prioritize autonomous completion. However, if you encounter a definitive blocker "
56
+ "that prevents you from proceeding independently – such as needing credentials you don't possess, "
57
+ "requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, "
58
+ "or facing limitations in your capabilities – you must request human assistance."
59
+ )
60
+ async def ask_for_assistant(query: str, browser: BrowserContext):
61
+ if self.ask_assistant_callback:
62
+ if inspect.iscoroutinefunction(self.ask_assistant_callback):
63
+ user_response = await self.ask_assistant_callback(query, browser)
64
+ else:
65
+ user_response = self.ask_assistant_callback(query, browser)
66
+ msg = f"AI ask: {query}. User response: {user_response['response']}"
67
+ logger.info(msg)
68
+ return ActionResult(extracted_content=msg, include_in_memory=True)
69
+ else:
70
+ return ActionResult(extracted_content="Human cannot help you. Please try another way.",
71
+ include_in_memory=True)
72
+
73
+ @self.registry.action(
74
+ 'Upload file to interactive element with file path ',
75
+ )
76
+ async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
77
+ if path not in available_file_paths:
78
+ return ActionResult(error=f'File path {path} is not available')
79
+
80
+ if not os.path.exists(path):
81
+ return ActionResult(error=f'File {path} does not exist')
82
+
83
+ dom_el = await browser.get_dom_element_by_index(index)
84
+
85
+ file_upload_dom_el = dom_el.get_file_upload_element()
86
+
87
+ if file_upload_dom_el is None:
88
+ msg = f'No file upload element found at index {index}'
89
+ logger.info(msg)
90
+ return ActionResult(error=msg)
91
+
92
+ file_upload_el = await browser.get_locate_element(file_upload_dom_el)
93
+
94
+ if file_upload_el is None:
95
+ msg = f'No file upload element found at index {index}'
96
+ logger.info(msg)
97
+ return ActionResult(error=msg)
98
+
99
+ try:
100
+ await file_upload_el.set_input_files(path)
101
+ msg = f'Successfully uploaded file to index {index}'
102
+ logger.info(msg)
103
+ return ActionResult(extracted_content=msg, include_in_memory=True)
104
+ except Exception as e:
105
+ msg = f'Failed to upload file to index {index}: {str(e)}'
106
+ logger.info(msg)
107
+ return ActionResult(error=msg)
108
+
109
+ @time_execution_sync('--act')
110
+ async def act(
111
+ self,
112
+ action: ActionModel,
113
+ browser_context: Optional[BrowserContext] = None,
114
+ #
115
+ page_extraction_llm: Optional[BaseChatModel] = None,
116
+ sensitive_data: Optional[Dict[str, str]] = None,
117
+ available_file_paths: Optional[list[str]] = None,
118
+ #
119
+ context: Context | None = None,
120
+ ) -> ActionResult:
121
+ """Execute an action"""
122
+
123
+ try:
124
+ for action_name, params in action.model_dump(exclude_unset=True).items():
125
+ if params is not None:
126
+ if action_name.startswith("mcp"):
127
+ # this is a mcp tool
128
+ logger.debug(f"Invoke MCP tool: {action_name}")
129
+ mcp_tool = self.registry.registry.actions.get(action_name).function
130
+ result = await mcp_tool.ainvoke(params)
131
+ else:
132
+ result = await self.registry.execute_action(
133
+ action_name,
134
+ params,
135
+ browser=browser_context,
136
+ page_extraction_llm=page_extraction_llm,
137
+ sensitive_data=sensitive_data,
138
+ available_file_paths=available_file_paths,
139
+ context=context,
140
+ )
141
+
142
+ if isinstance(result, str):
143
+ return ActionResult(extracted_content=result)
144
+ elif isinstance(result, ActionResult):
145
+ return result
146
+ elif result is None:
147
+ return ActionResult()
148
+ else:
149
+ raise ValueError(f'Invalid action result type: {type(result)} of {result}')
150
+ return ActionResult()
151
+ except Exception as e:
152
+ raise e
153
+
154
+ async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
155
+ self.mcp_server_config = mcp_server_config
156
+ if self.mcp_server_config:
157
+ self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
158
+ self.register_mcp_tools()
159
+
160
+ def register_mcp_tools(self):
161
+ """
162
+ Register the MCP tools used by this controller.
163
+ """
164
+ if self.mcp_client:
165
+ for server_name in self.mcp_client.server_name_to_tools:
166
+ for tool in self.mcp_client.server_name_to_tools[server_name]:
167
+ tool_name = f"mcp.{server_name}.{tool.name}"
168
+ self.registry.registry.actions[tool_name] = RegisteredAction(
169
+ name=tool_name,
170
+ description=tool.description,
171
+ function=tool,
172
+ param_model=create_tool_param_model(tool),
173
+ )
174
+ logger.info(f"Add mcp tool: {tool_name}")
175
+ logger.debug(
176
+ f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
177
+ else:
178
+ logger.warning(f"MCP client not started.")
179
+
180
+ async def close_mcp_client(self):
181
+ if self.mcp_client:
182
+ await self.mcp_client.__aexit__(None, None, None)
src/utils/__init__.py ADDED
File without changes
src/utils/config.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ PROVIDER_DISPLAY_NAMES = {
2
+ "openai": "OpenAI"
3
+ }
4
+
5
+ # Predefined model names for OpenAI provider
6
+ model_names = {
7
+ "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"]
8
+ }
src/utils/llm_provider.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_core.globals import get_llm_cache
4
+ from langchain_core.language_models.base import (
5
+ BaseLanguageModel,
6
+ LangSmithParams,
7
+ LanguageModelInput,
8
+ )
9
+ import os
10
+ from langchain_core.load import dumpd, dumps
11
+ from langchain_core.messages import (
12
+ AIMessage,
13
+ SystemMessage,
14
+ AnyMessage,
15
+ BaseMessage,
16
+ BaseMessageChunk,
17
+ HumanMessage,
18
+ convert_to_messages,
19
+ message_chunk_to_message,
20
+ )
21
+ from langchain_core.outputs import (
22
+ ChatGeneration,
23
+ ChatGenerationChunk,
24
+ ChatResult,
25
+ LLMResult,
26
+ RunInfo,
27
+ )
28
+ from langchain_core.output_parsers.base import OutputParserLike
29
+ from langchain_core.runnables import Runnable, RunnableConfig
30
+ from langchain_core.tools import BaseTool
31
+
32
+ from typing import (
33
+ TYPE_CHECKING,
34
+ Any,
35
+ Callable,
36
+ Literal,
37
+ Optional,
38
+ Union,
39
+ cast, List,
40
+ )
41
+ from pydantic import SecretStr
42
+
43
+ from src.utils import config
44
+
45
+
46
+ def get_llm_model(provider: str, **kwargs):
47
+ """
48
+ Get LLM model
49
+ :param provider: LLM provider (only 'openai' is supported)
50
+ :param kwargs:
51
+ :return:
52
+ """
53
+ # Always use OpenAI
54
+ env_var = "OPENAI_API_KEY"
55
+ api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
56
+ if not api_key:
57
+ error_msg = f"💥 OpenAI API key not found! 🔑 Please set the `{env_var}` environment variable or provide it in the UI."
58
+ raise ValueError(error_msg)
59
+ if isinstance(api_key, str):
60
+ api_key = SecretStr(api_key)
61
+ kwargs["api_key"] = api_key
62
+
63
+ # Configure OpenAI endpoint
64
+ base_url = kwargs.get("base_url", "") or os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
65
+
66
+ return ChatOpenAI(
67
+ model=kwargs.get("model_name", "gpt-4o"),
68
+ temperature=kwargs.get("temperature", 0.0),
69
+ base_url=base_url,
70
+ api_key=api_key,
71
+ )
src/utils/mcp_client.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import logging
3
+ import uuid
4
+ from datetime import date, datetime, time
5
+ from enum import Enum
6
+ from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
7
+
8
+ from browser_use.controller.registry.views import ActionModel
9
+ from langchain.tools import BaseTool
10
+ from langchain_mcp_adapters.client import MultiServerMCPClient
11
+ from pydantic import BaseModel, Field, create_model
12
+ from pydantic.v1 import BaseModel, Field
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optional[MultiServerMCPClient]:
18
+ """
19
+ Initializes the MultiServerMCPClient, connects to servers, fetches tools,
20
+ filters them, and returns a flat list of usable tools and the client instance.
21
+
22
+ Returns:
23
+ A tuple containing:
24
+ - list[BaseTool]: The filtered list of usable LangChain tools.
25
+ - MultiServerMCPClient | None: The initialized and started client instance, or None on failure.
26
+ """
27
+
28
+ logger.info("Initializing MultiServerMCPClient...")
29
+
30
+ if not mcp_server_config:
31
+ logger.error("No MCP server configuration provided.")
32
+ return None
33
+
34
+ try:
35
+ if "mcpServers" in mcp_server_config:
36
+ mcp_server_config = mcp_server_config["mcpServers"]
37
+ client = MultiServerMCPClient(mcp_server_config)
38
+ await client.__aenter__()
39
+ return client
40
+
41
+ except Exception as e:
42
+ logger.error(f"Failed to setup MCP client or fetch tools: {e}", exc_info=True)
43
+ return None
44
+
45
+
46
+ def create_tool_param_model(tool: BaseTool) -> Type[BaseModel]:
47
+ """Creates a Pydantic model from a LangChain tool's schema"""
48
+
49
+ # Get tool schema information
50
+ json_schema = tool.args_schema
51
+ tool_name = tool.name
52
+
53
+ # If the tool already has a schema defined, convert it to a new param_model
54
+ if json_schema is not None:
55
+
56
+ # Create new parameter model
57
+ params = {}
58
+
59
+ # Process properties if they exist
60
+ if 'properties' in json_schema:
61
+ # Find required fields
62
+ required_fields: Set[str] = set(json_schema.get('required', []))
63
+
64
+ for prop_name, prop_details in json_schema['properties'].items():
65
+ field_type = resolve_type(prop_details, f"{tool_name}_{prop_name}")
66
+
67
+ # Check if parameter is required
68
+ is_required = prop_name in required_fields
69
+
70
+ # Get default value and description
71
+ default_value = prop_details.get('default', ... if is_required else None)
72
+ description = prop_details.get('description', '')
73
+
74
+ # Add field constraints
75
+ field_kwargs = {'default': default_value}
76
+ if description:
77
+ field_kwargs['description'] = description
78
+
79
+ # Add additional constraints if present
80
+ if 'minimum' in prop_details:
81
+ field_kwargs['ge'] = prop_details['minimum']
82
+ if 'maximum' in prop_details:
83
+ field_kwargs['le'] = prop_details['maximum']
84
+ if 'minLength' in prop_details:
85
+ field_kwargs['min_length'] = prop_details['minLength']
86
+ if 'maxLength' in prop_details:
87
+ field_kwargs['max_length'] = prop_details['maxLength']
88
+ if 'pattern' in prop_details:
89
+ field_kwargs['pattern'] = prop_details['pattern']
90
+
91
+ # Add to parameters dictionary
92
+ params[prop_name] = (field_type, Field(**field_kwargs))
93
+
94
+ return create_model(
95
+ f'{tool_name}_parameters',
96
+ __base__=ActionModel,
97
+ **params, # type: ignore
98
+ )
99
+
100
+ # If no schema is defined, extract parameters from the _run method
101
+ run_method = tool._run
102
+ sig = inspect.signature(run_method)
103
+
104
+ # Get type hints for better type information
105
+ try:
106
+ type_hints = get_type_hints(run_method)
107
+ except Exception:
108
+ type_hints = {}
109
+
110
+ params = {}
111
+ for name, param in sig.parameters.items():
112
+ # Skip 'self' parameter and any other parameters you want to exclude
113
+ if name == 'self':
114
+ continue
115
+
116
+ # Get annotation from type hints if available, otherwise from signature
117
+ annotation = type_hints.get(name, param.annotation)
118
+ if annotation == inspect.Parameter.empty:
119
+ annotation = Any
120
+
121
+ # Use default value if available, otherwise make it required
122
+ if param.default != param.empty:
123
+ params[name] = (annotation, param.default)
124
+ else:
125
+ params[name] = (annotation, ...)
126
+
127
+ return create_model(
128
+ f'{tool_name}_parameters',
129
+ __base__=ActionModel,
130
+ **params, # type: ignore
131
+ )
132
+
133
+
134
+ def resolve_type(prop_details: Dict[str, Any], prefix: str = "") -> Any:
135
+ """Recursively resolves JSON schema type to Python/Pydantic type"""
136
+
137
+ # Handle reference types
138
+ if '$ref' in prop_details:
139
+ # In a real application, reference resolution would be needed
140
+ return Any
141
+
142
+ # Basic type mapping
143
+ type_mapping = {
144
+ 'string': str,
145
+ 'integer': int,
146
+ 'number': float,
147
+ 'boolean': bool,
148
+ 'array': List,
149
+ 'object': Dict,
150
+ 'null': type(None),
151
+ }
152
+
153
+ # Handle formatted strings
154
+ if prop_details.get('type') == 'string' and 'format' in prop_details:
155
+ format_mapping = {
156
+ 'date-time': datetime,
157
+ 'date': date,
158
+ 'time': time,
159
+ 'email': str,
160
+ 'uri': str,
161
+ 'url': str,
162
+ 'uuid': uuid.UUID,
163
+ 'binary': bytes,
164
+ }
165
+ return format_mapping.get(prop_details['format'], str)
166
+
167
+ # Handle enum types
168
+ if 'enum' in prop_details:
169
+ enum_values = prop_details['enum']
170
+ # Create dynamic enum class with safe names
171
+ enum_dict = {}
172
+ for i, v in enumerate(enum_values):
173
+ # Ensure enum names are valid Python identifiers
174
+ if isinstance(v, str):
175
+ key = v.upper().replace(' ', '_').replace('-', '_')
176
+ if not key.isidentifier():
177
+ key = f"VALUE_{i}"
178
+ else:
179
+ key = f"VALUE_{i}"
180
+ enum_dict[key] = v
181
+
182
+ # Only create enum if we have values
183
+ if enum_dict:
184
+ return Enum(f"{prefix}_Enum", enum_dict)
185
+ return str # Fallback
186
+
187
+ # Handle array types
188
+ if prop_details.get('type') == 'array' and 'items' in prop_details:
189
+ item_type = resolve_type(prop_details['items'], f"{prefix}_item")
190
+ return List[item_type] # type: ignore
191
+
192
+ # Handle object types with properties
193
+ if prop_details.get('type') == 'object' and 'properties' in prop_details:
194
+ nested_params = {}
195
+ for nested_name, nested_details in prop_details['properties'].items():
196
+ nested_type = resolve_type(nested_details, f"{prefix}_{nested_name}")
197
+ # Get required field info
198
+ required_fields = prop_details.get('required', [])
199
+ is_required = nested_name in required_fields
200
+ default_value = nested_details.get('default', ... if is_required else None)
201
+ description = nested_details.get('description', '')
202
+
203
+ field_kwargs = {'default': default_value}
204
+ if description:
205
+ field_kwargs['description'] = description
206
+
207
+ nested_params[nested_name] = (nested_type, Field(**field_kwargs))
208
+
209
+ # Create nested model
210
+ nested_model = create_model(f"{prefix}_Model", **nested_params)
211
+ return nested_model
212
+
213
+ # Handle union types (oneOf, anyOf)
214
+ if 'oneOf' in prop_details or 'anyOf' in prop_details:
215
+ union_schema = prop_details.get('oneOf') or prop_details.get('anyOf')
216
+ union_types = []
217
+ for i, t in enumerate(union_schema):
218
+ union_types.append(resolve_type(t, f"{prefix}_{i}"))
219
+
220
+ if union_types:
221
+ return Union.__getitem__(tuple(union_types)) # type: ignore
222
+ return Any
223
+
224
+ # Handle allOf (intersection types)
225
+ if 'allOf' in prop_details:
226
+ nested_params = {}
227
+ for i, schema_part in enumerate(prop_details['allOf']):
228
+ if 'properties' in schema_part:
229
+ for nested_name, nested_details in schema_part['properties'].items():
230
+ nested_type = resolve_type(nested_details, f"{prefix}_allOf_{i}_{nested_name}")
231
+ # Check if required
232
+ required_fields = schema_part.get('required', [])
233
+ is_required = nested_name in required_fields
234
+ nested_params[nested_name] = (nested_type, ... if is_required else None)
235
+
236
+ # Create composite model
237
+ if nested_params:
238
+ composite_model = create_model(f"{prefix}_CompositeModel", **nested_params)
239
+ return composite_model
240
+ return Dict
241
+
242
+ # Default to basic types
243
+ schema_type = prop_details.get('type', 'string')
244
+ if isinstance(schema_type, list):
245
+ # Handle multiple types (e.g., ["string", "null"])
246
+ non_null_types = [t for t in schema_type if t != 'null']
247
+ if non_null_types:
248
+ primary_type = type_mapping.get(non_null_types[0], Any)
249
+ if 'null' in schema_type:
250
+ return Optional[primary_type] # type: ignore
251
+ return primary_type
252
+ return Any
253
+
254
+ return type_mapping.get(schema_type, Any)
src/utils/utils.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import time
4
+ from pathlib import Path
5
+ from typing import Dict, Optional
6
+ import requests
7
+ import json
8
+ import gradio as gr
9
+ import uuid
10
+
11
+
12
+ def encode_image(img_path):
13
+ if not img_path:
14
+ return None
15
+ with open(img_path, "rb") as fin:
16
+ image_data = base64.b64encode(fin.read()).decode("utf-8")
17
+ return image_data
18
+
19
+
20
+ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
21
+ """Get the latest recording and trace files"""
22
+ latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
23
+
24
+ if not os.path.exists(directory):
25
+ os.makedirs(directory, exist_ok=True)
26
+ return latest_files
27
+
28
+ for file_type in file_types:
29
+ try:
30
+ matches = list(Path(directory).rglob(f"*{file_type}"))
31
+ if matches:
32
+ latest = max(matches, key=lambda p: p.stat().st_mtime)
33
+ # Only return files that are complete (not being written)
34
+ if time.time() - latest.stat().st_mtime > 1.0:
35
+ latest_files[file_type] = str(latest)
36
+ except Exception as e:
37
+ print(f"Error getting latest {file_type} file: {e}")
38
+
39
+ return latest_files
src/webui/__init__.py ADDED
File without changes
src/webui/components/__init__.py ADDED
File without changes
src/webui/components/agent_settings_tab.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import gradio as gr
5
+ from gradio.components import Component
6
+ from typing import Any, Dict, Optional
7
+ from src.webui.webui_manager import WebuiManager
8
+ from src.utils import config
9
+ import logging
10
+ from functools import partial
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
16
+ """
17
+ Update the MCP server.
18
+ """
19
+ if hasattr(webui_manager, "bu_controller") and webui_manager.bu_controller:
20
+ logger.warning("⚠️ Close controller because mcp file has changed!")
21
+ await webui_manager.bu_controller.close_mcp_client()
22
+ webui_manager.bu_controller = None
23
+
24
+ if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
25
+ logger.warning(f"{mcp_file} is not a valid MCP file.")
26
+ return None, gr.update(visible=False)
27
+
28
+ with open(mcp_file, 'r') as f:
29
+ mcp_server = json.load(f)
30
+
31
+ return json.dumps(mcp_server, indent=2), gr.update(visible=True)
32
+
33
+
34
+ def create_agent_settings_tab(webui_manager: WebuiManager):
35
+ """
36
+ Creates an agent settings tab.
37
+ """
38
+ input_components = set(webui_manager.get_components())
39
+ tab_components = {}
40
+
41
+ with gr.Group():
42
+ with gr.Column():
43
+ override_system_prompt = gr.Textbox(label="Override system prompt", lines=4, interactive=True)
44
+ extend_system_prompt = gr.Textbox(label="Extend system prompt", lines=4, interactive=True)
45
+
46
+ with gr.Group():
47
+ mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
48
+ mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
49
+
50
+ with gr.Group():
51
+ with gr.Row():
52
+ # Fixed provider as OpenAI
53
+ llm_provider = gr.Dropdown(
54
+ choices=["openai"],
55
+ label="LLM Provider",
56
+ value="openai",
57
+ info="OpenAI is the only supported LLM provider",
58
+ interactive=False
59
+ )
60
+ llm_model_name = gr.Dropdown(
61
+ label="LLM Model Name",
62
+ choices=config.model_names['openai'],
63
+ value="gpt-4o",
64
+ interactive=True,
65
+ allow_custom_value=True,
66
+ info="Select a model in the dropdown options or directly type a custom model name"
67
+ )
68
+ with gr.Row():
69
+ llm_temperature = gr.Slider(
70
+ minimum=0.0,
71
+ maximum=2.0,
72
+ value=0.6,
73
+ step=0.1,
74
+ label="LLM Temperature",
75
+ info="Controls randomness in model outputs",
76
+ interactive=True
77
+ )
78
+
79
+ use_vision = gr.Checkbox(
80
+ label="Use Vision",
81
+ value=True,
82
+ info="Enable Vision(Input highlighted screenshot into LLM)",
83
+ interactive=True
84
+ )
85
+
86
+ with gr.Row():
87
+ llm_base_url = gr.Textbox(
88
+ label="Base URL",
89
+ value="",
90
+ info="API endpoint URL (if required)"
91
+ )
92
+ llm_api_key = gr.Textbox(
93
+ label="API Key",
94
+ type="password",
95
+ value="",
96
+ info="Your API key (leave blank to use .env)"
97
+ )
98
+
99
+ with gr.Row():
100
+ max_steps = gr.Slider(
101
+ minimum=1,
102
+ maximum=1000,
103
+ value=100,
104
+ step=1,
105
+ label="Max Run Steps",
106
+ info="Maximum number of steps the agent will take",
107
+ interactive=True
108
+ )
109
+ max_actions = gr.Slider(
110
+ minimum=1,
111
+ maximum=100,
112
+ value=10,
113
+ step=1,
114
+ label="Max Number of Actions",
115
+ info="Maximum number of actions the agent will take per step",
116
+ interactive=True
117
+ )
118
+
119
+ with gr.Row():
120
+ max_input_tokens = gr.Number(
121
+ label="Max Input Tokens",
122
+ value=128000,
123
+ precision=0,
124
+ interactive=True
125
+ )
126
+ tool_calling_method = gr.Dropdown(
127
+ label="Tool Calling Method",
128
+ value="auto",
129
+ interactive=True,
130
+ allow_custom_value=True,
131
+ choices=['function_calling', 'json_mode', 'raw', 'auto', 'tools', "None"],
132
+ visible=True
133
+ )
134
+
135
+ tab_components.update(dict(
136
+ override_system_prompt=override_system_prompt,
137
+ extend_system_prompt=extend_system_prompt,
138
+ llm_provider=llm_provider,
139
+ llm_model_name=llm_model_name,
140
+ llm_temperature=llm_temperature,
141
+ use_vision=use_vision,
142
+ llm_base_url=llm_base_url,
143
+ llm_api_key=llm_api_key,
144
+ max_steps=max_steps,
145
+ max_actions=max_actions,
146
+ max_input_tokens=max_input_tokens,
147
+ tool_calling_method=tool_calling_method,
148
+ mcp_json_file=mcp_json_file,
149
+ mcp_server_config=mcp_server_config,
150
+ ))
151
+ webui_manager.add_components("agent_settings", tab_components)
152
+
153
+ async def update_wrapper(mcp_file):
154
+ """Wrapper for handle_pause_resume."""
155
+ update_dict = await update_mcp_server(mcp_file, webui_manager)
156
+ yield update_dict
157
+
158
+ mcp_json_file.change(
159
+ update_wrapper,
160
+ inputs=[mcp_json_file],
161
+ outputs=[mcp_server_config, mcp_server_config]
162
+ )
src/webui/components/browser_settings_tab.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import logging
5
+ from gradio.components import Component
6
+
7
+ from src.webui.webui_manager import WebuiManager
8
+ from src.utils import config
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ async def close_browser(webui_manager: WebuiManager):
13
+ """
14
+ Close browser
15
+ """
16
+ if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
17
+ webui_manager.bu_current_task.cancel()
18
+ webui_manager.bu_current_task = None
19
+
20
+ if webui_manager.bu_browser_context:
21
+ logger.info("⚠️ Closing browser context when changing browser config.")
22
+ await webui_manager.bu_browser_context.close()
23
+ webui_manager.bu_browser_context = None
24
+
25
+ if webui_manager.bu_browser:
26
+ logger.info("⚠️ Closing browser when changing browser config.")
27
+ await webui_manager.bu_browser.close()
28
+ webui_manager.bu_browser = None
29
+
30
+ def create_browser_settings_tab(webui_manager: WebuiManager):
31
+ """
32
+ Creates a browser settings tab.
33
+ """
34
+ input_components = set(webui_manager.get_components())
35
+ tab_components = {}
36
+
37
+ with gr.Group():
38
+ with gr.Row():
39
+ browser_binary_path = gr.Textbox(
40
+ label="Browser Binary Path",
41
+ lines=1,
42
+ interactive=True,
43
+ placeholder="e.g. '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome'"
44
+ )
45
+ browser_user_data_dir = gr.Textbox(
46
+ label="Browser User Data Dir",
47
+ lines=1,
48
+ interactive=True,
49
+ placeholder="Leave it empty if you use your default user data",
50
+ )
51
+ with gr.Group():
52
+ with gr.Row():
53
+ use_own_browser = gr.Checkbox(
54
+ label="Use Own Browser",
55
+ value=False,
56
+ info="Use your existing browser instance",
57
+ interactive=True
58
+ )
59
+ keep_browser_open = gr.Checkbox(
60
+ label="Keep Browser Open",
61
+ value=os.getenv("KEEP_BROWSER_OPEN", True),
62
+ info="Keep Browser Open between Tasks",
63
+ interactive=True
64
+ )
65
+ headless = gr.Checkbox(
66
+ label="Headless Mode",
67
+ value=os.getenv("KEEP_BROWSER_HEADLESS", True),
68
+ info="Run browser without GUI",
69
+ interactive=True
70
+ )
71
+ disable_security = gr.Checkbox(
72
+ label="Disable Security",
73
+ value=False,
74
+ info="Disable browser security",
75
+ interactive=True
76
+ )
77
+
78
+ with gr.Group():
79
+ with gr.Row():
80
+ window_w = gr.Number(
81
+ label="Window Width",
82
+ value=1280,
83
+ info="Browser window width",
84
+ interactive=True
85
+ )
86
+ window_h = gr.Number(
87
+ label="Window Height",
88
+ value=1100,
89
+ info="Browser window height",
90
+ interactive=True
91
+ )
92
+ with gr.Group():
93
+ with gr.Row():
94
+ cdp_url = gr.Textbox(
95
+ label="CDP URL",
96
+ value=os.getenv("BROWSER_CDP", None),
97
+ info="CDP URL for browser remote debugging",
98
+ interactive=True,
99
+ )
100
+ wss_url = gr.Textbox(
101
+ label="WSS URL",
102
+ info="WSS URL for browser remote debugging",
103
+ interactive=True,
104
+ )
105
+ with gr.Group():
106
+ with gr.Row():
107
+ save_recording_path = gr.Textbox(
108
+ label="Recording Path",
109
+ placeholder="e.g. ./tmp/record_videos",
110
+ info="Path to save browser recordings",
111
+ interactive=True,
112
+ )
113
+
114
+ save_trace_path = gr.Textbox(
115
+ label="Trace Path",
116
+ placeholder="e.g. ./tmp/traces",
117
+ info="Path to save Agent traces",
118
+ interactive=True,
119
+ )
120
+
121
+ with gr.Row():
122
+ save_agent_history_path = gr.Textbox(
123
+ label="Agent History Save Path",
124
+ value="./tmp/agent_history",
125
+ info="Specify the directory where agent history should be saved.",
126
+ interactive=True,
127
+ )
128
+ save_download_path = gr.Textbox(
129
+ label="Save Directory for browser downloads",
130
+ value="./tmp/downloads",
131
+ info="Specify the directory where downloaded files should be saved.",
132
+ interactive=True,
133
+ )
134
+ tab_components.update(
135
+ dict(
136
+ browser_binary_path=browser_binary_path,
137
+ browser_user_data_dir=browser_user_data_dir,
138
+ use_own_browser=use_own_browser,
139
+ keep_browser_open=keep_browser_open,
140
+ headless=headless,
141
+ disable_security=disable_security,
142
+ save_recording_path=save_recording_path,
143
+ save_trace_path=save_trace_path,
144
+ save_agent_history_path=save_agent_history_path,
145
+ save_download_path=save_download_path,
146
+ cdp_url=cdp_url,
147
+ wss_url=wss_url,
148
+ window_h=window_h,
149
+ window_w=window_w,
150
+ )
151
+ )
152
+ webui_manager.add_components("browser_settings", tab_components)
153
+
154
+ async def close_wrapper():
155
+ """Wrapper for handle_clear."""
156
+ await close_browser(webui_manager)
157
+
158
+ headless.change(close_wrapper)
159
+ keep_browser_open.change(close_wrapper)
160
+ disable_security.change(close_wrapper)
161
+ use_own_browser.change(close_wrapper)
src/webui/components/browser_use_agent_tab.py ADDED
@@ -0,0 +1,1153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import uuid
6
+ from typing import Any, AsyncGenerator, Dict, Optional
7
+
8
+ import gradio as gr
9
+
10
+ # from browser_use.agent.service import Agent
11
+ from browser_use.agent.views import (
12
+ AgentHistoryList,
13
+ AgentOutput,
14
+ )
15
+ from browser_use.browser.browser import BrowserConfig
16
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
17
+ from browser_use.browser.views import BrowserState
18
+ from gradio.components import Component
19
+ from langchain_core.language_models.chat_models import BaseChatModel
20
+
21
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
22
+ from src.browser.custom_browser import CustomBrowser
23
+ from src.controller.custom_controller import CustomController
24
+ from src.utils import llm_provider
25
+ from src.webui.webui_manager import WebuiManager
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # Custom function to format task metrics as markdown
31
+ def format_task_metrics(metrics):
32
+ if not metrics:
33
+ return "No task metrics available yet. Run a task to see metrics here."
34
+
35
+ md = "#### Task Execution Summary\n\n"
36
+ md += f"⏱️ **Duration:** {metrics['duration']} seconds\n\n"
37
+ md += f"🔢 **Tokens Used:** {metrics['tokens']}\n\n"
38
+
39
+ if metrics['result']:
40
+ md += f"📋 **Final Result:**\n\n```\n{metrics['result']}\n```\n\n"
41
+
42
+ md += f"✅ **Status:** {metrics['status']}\n\n"
43
+
44
+ if metrics['errors'] and any(metrics['errors']):
45
+ md += f"❌ **Errors:**\n\n```\n{metrics['errors']}\n```\n\n"
46
+
47
+ return md
48
+
49
+
50
+ # --- Helper Functions --- (Defined at module level)
51
+
52
+
53
+ def get_llm(provider: str, model_name: str, temperature: float = 0.0, base_url: str = "", api_key: str = "", num_ctx: Optional[int] = None):
54
+ """
55
+ Get LLM based on provider name, model name, and other parameters.
56
+ Only OpenAI is supported now.
57
+
58
+ Note: This function was simplified to remove dependencies on browser_use.browser_controller
59
+ which was causing import errors.
60
+ """
61
+ if not model_name:
62
+ logger.info("Model Name not specified, LLM will be None.")
63
+ return None
64
+
65
+ try:
66
+ # Always use OpenAI
67
+ from langchain_openai import ChatOpenAI
68
+ from pydantic import SecretStr
69
+
70
+ # Ensure proper defaults
71
+ if not model_name:
72
+ model_name = "gpt-4o"
73
+ if temperature is None:
74
+ temperature = 0.0
75
+
76
+ # Convert the API key to SecretStr if provided
77
+ api_key_value = SecretStr(api_key) if api_key else None
78
+
79
+ # Create OpenAI model
80
+ llm = ChatOpenAI(
81
+ model=model_name,
82
+ temperature=temperature,
83
+ base_url=base_url if base_url else None,
84
+ api_key=api_key_value,
85
+ )
86
+ logger.info(f"Created OpenAI LLM: {model_name}")
87
+ return llm
88
+ except Exception as e:
89
+ logger.error(f"Failed to create OpenAI LLM: {model_name} - {e}")
90
+ return None
91
+
92
+
93
+ def _get_config_value(
94
+ webui_manager: WebuiManager,
95
+ comp_dict: Dict[gr.components.Component, Any],
96
+ comp_id_suffix: str,
97
+ default: Any = None,
98
+ ) -> Any:
99
+ """Safely get value from component dictionary using its ID suffix relative to the tab."""
100
+ # Assumes component ID format is "tab_name.comp_name"
101
+ tab_name = "browser_use_agent" # Hardcode or derive if needed
102
+ comp_id = f"{tab_name}.{comp_id_suffix}"
103
+ # Need to find the component object first using the ID from the manager
104
+ try:
105
+ comp = webui_manager.get_component_by_id(comp_id)
106
+ return comp_dict.get(comp, default)
107
+ except KeyError:
108
+ # Try accessing settings tabs as well
109
+ for prefix in ["agent_settings", "browser_settings"]:
110
+ try:
111
+ comp_id = f"{prefix}.{comp_id_suffix}"
112
+ comp = webui_manager.get_component_by_id(comp_id)
113
+ return comp_dict.get(comp, default)
114
+ except KeyError:
115
+ continue
116
+ logger.warning(
117
+ f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup."
118
+ )
119
+ return default
120
+
121
+
122
+ def _format_agent_output(model_output: AgentOutput) -> str:
123
+ """Formats AgentOutput for display in the chatbot using JSON."""
124
+ content = ""
125
+ if model_output:
126
+ try:
127
+ # Directly use model_dump if actions and current_state are Pydantic models
128
+ action_dump = [
129
+ action.model_dump(exclude_none=True) for action in model_output.action
130
+ ]
131
+
132
+ state_dump = model_output.current_state.model_dump(exclude_none=True)
133
+ model_output_dump = {
134
+ "current_state": state_dump,
135
+ "action": action_dump,
136
+ }
137
+ # Dump to JSON string with indentation
138
+ json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
139
+ # Wrap in <pre><code> for proper display in HTML
140
+ content = f"<pre><code class='language-json'>{json_string}</code></pre>"
141
+
142
+ except AttributeError as ae:
143
+ logger.error(
144
+ f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'."
145
+ )
146
+ content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
147
+ except Exception as e:
148
+ logger.error(f"Error formatting agent output: {e}", exc_info=True)
149
+ # Fallback to simple string representation on error
150
+ content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
151
+
152
+ return content.strip()
153
+
154
+
155
+ # --- Updated Callback Implementation ---
156
+
157
+
158
+ async def _handle_new_step(
159
+ webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
160
+ ):
161
+ """Callback for each step taken by the agent, including screenshot display."""
162
+
163
+ # Use the correct chat history attribute name from the user's code
164
+ if not hasattr(webui_manager, "bu_chat_history"):
165
+ logger.error(
166
+ "Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message."
167
+ )
168
+ # Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
169
+ webui_manager.bu_chat_history = [] # Initialize if missing (consider if this is the right place)
170
+ # return # Or stop if this is critical
171
+ step_num -= 1
172
+ logger.info(f"Step {step_num} completed.")
173
+
174
+ # --- Screenshot Handling ---
175
+ screenshot_html = ""
176
+ # Ensure state.screenshot exists and is not empty before proceeding
177
+ # Use getattr for safer access
178
+ screenshot_data = getattr(state, "screenshot", None)
179
+ if screenshot_data:
180
+ try:
181
+ # Basic validation: check if it looks like base64
182
+ if (
183
+ isinstance(screenshot_data, str) and len(screenshot_data) > 100
184
+ ): # Arbitrary length check
185
+ # *** UPDATED STYLE: Removed centering, adjusted width ***
186
+ img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
187
+ screenshot_html = (
188
+ img_tag + "<br/>"
189
+ ) # Use <br/> for line break after inline-block image
190
+ else:
191
+ logger.warning(
192
+ f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'})."
193
+ )
194
+ screenshot_html = "**[Invalid screenshot data]**<br/>"
195
+
196
+ except Exception as e:
197
+ logger.error(
198
+ f"Error processing or formatting screenshot for step {step_num}: {e}",
199
+ exc_info=True,
200
+ )
201
+ screenshot_html = "**[Error displaying screenshot]**<br/>"
202
+ else:
203
+ logger.debug(f"No screenshot available for step {step_num}.")
204
+
205
+ # --- Format Agent Output ---
206
+ formatted_output = _format_agent_output(output) # Use the updated function
207
+
208
+ # --- Combine and Append to Chat ---
209
+ step_header = f"--- **Step {step_num}** ---"
210
+ # Combine header, image (with line break), and JSON block
211
+ final_content = step_header + "<br/>" + screenshot_html + formatted_output
212
+
213
+ chat_message = {
214
+ "role": "assistant",
215
+ "content": final_content.strip(), # Remove leading/trailing whitespace
216
+ }
217
+
218
+ # Append to the correct chat history list
219
+ webui_manager.bu_chat_history.append(chat_message)
220
+
221
+ await asyncio.sleep(0.05)
222
+
223
+
224
+ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
225
+ """Callback when the agent finishes the task (success or failure)."""
226
+ logger.info(
227
+ f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}"
228
+ )
229
+ final_summary = "**Task Completed**\n"
230
+ final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
231
+ final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n" # Or total tokens if available
232
+
233
+ final_result = history.final_result()
234
+ if final_result:
235
+ final_summary += f"- Final Result: {final_result}\n"
236
+
237
+ errors = history.errors()
238
+ if errors and any(errors):
239
+ final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
240
+ else:
241
+ final_summary += "- Status: Success\n"
242
+
243
+ # Store task metrics separately for the metrics display
244
+ webui_manager.bu_task_metrics = {
245
+ "duration": f"{history.total_duration_seconds():.2f}",
246
+ "tokens": f"{history.total_input_tokens()}",
247
+ "result": final_result if final_result else "",
248
+ "status": "Error" if (errors and any(errors)) else "Success",
249
+ "errors": errors if (errors and any(errors)) else None
250
+ }
251
+
252
+ webui_manager.bu_chat_history.append(
253
+ {"role": "assistant", "content": final_summary}
254
+ )
255
+
256
+
257
+ async def _ask_assistant_callback(
258
+ webui_manager: WebuiManager, query: str, browser_context: BrowserContext
259
+ ) -> Dict[str, Any]:
260
+ """Callback triggered by the agent's ask_for_assistant action."""
261
+ logger.info("Agent requires assistance. Waiting for user input.")
262
+
263
+ if not hasattr(webui_manager, "_chat_history"):
264
+ logger.error("Chat history not found in webui_manager during ask_assistant!")
265
+ return {"response": "Internal Error: Cannot display help request."}
266
+
267
+ webui_manager.bu_chat_history.append(
268
+ {
269
+ "role": "assistant",
270
+ "content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'.",
271
+ }
272
+ )
273
+
274
+ # Use state stored in webui_manager
275
+ webui_manager.bu_response_event = asyncio.Event()
276
+ webui_manager.bu_user_help_response = None # Reset previous response
277
+
278
+ try:
279
+ logger.info("Waiting for user response event...")
280
+ await asyncio.wait_for(
281
+ webui_manager.bu_response_event.wait(), timeout=3600.0
282
+ ) # Long timeout
283
+ logger.info("User response event received.")
284
+ except asyncio.TimeoutError:
285
+ logger.warning("Timeout waiting for user assistance.")
286
+ webui_manager.bu_chat_history.append(
287
+ {
288
+ "role": "assistant",
289
+ "content": "**Timeout:** No response received. Trying to proceed.",
290
+ }
291
+ )
292
+ webui_manager.bu_response_event = None # Clear the event
293
+ return {"response": "Timeout: User did not respond."} # Inform the agent
294
+
295
+ response = webui_manager.bu_user_help_response
296
+ webui_manager.bu_chat_history.append(
297
+ {"role": "user", "content": response}
298
+ ) # Show user response in chat
299
+ webui_manager.bu_response_event = (
300
+ None # Clear the event for the next potential request
301
+ )
302
+ return {"response": response}
303
+
304
+
305
+ # --- Core Agent Execution Logic --- (Needs access to webui_manager)
306
+
307
+
308
+ async def run_agent_task(
309
+ webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
310
+ ) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
311
+ """
312
+ Runs a browser agent task and yields UI updates.
313
+ """
314
+ # Get user task from input field
315
+ user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
316
+ task = components.get(user_input_comp, "").strip()
317
+ if not task:
318
+ logger.warning("Empty task submitted")
319
+ task = "Help me browse the web" # Default task if empty
320
+
321
+ # Get all required UI components
322
+ run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
323
+ stop_button_comp = webui_manager.get_component_by_id("browser_use_agent.stop_button")
324
+ pause_resume_button_comp = webui_manager.get_component_by_id("browser_use_agent.pause_resume_button")
325
+ clear_button_comp = webui_manager.get_component_by_id("browser_use_agent.clear_button")
326
+ chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
327
+ history_file_comp = webui_manager.get_component_by_id("browser_use_agent.agent_history_file")
328
+ gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
329
+ browser_view_comp = webui_manager.get_component_by_id("browser_use_agent.browser_view")
330
+
331
+ # Get settings from UI components
332
+ from src.utils.llm_provider import get_llm_model
333
+
334
+ # Helper function to check if OpenAI model supports vision
335
+ def is_openai_vision_model(model_name: str) -> bool:
336
+ """Check if an OpenAI model supports vision capabilities"""
337
+ vision_models = ["gpt-4-vision", "gpt-4o", "gpt-4-turbo"]
338
+ return any(vision_model in model_name.lower() for vision_model in vision_models)
339
+
340
+ def get_setting(name, default=None):
341
+ # Get component from UI
342
+ return _get_config_value(webui_manager, components, name, default)
343
+
344
+ override_system_prompt = get_setting("override_system_prompt", "")
345
+ extend_system_prompt = get_setting("extend_system_prompt", "")
346
+ llm_provider_name = get_setting("llm_provider", "openai")
347
+ llm_model_name = get_setting("llm_model_name", "gpt-4o")
348
+ llm_temperature = get_setting("llm_temperature", 0.6)
349
+ use_vision = get_setting("use_vision", True)
350
+ ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
351
+ llm_base_url = get_setting("llm_base_url", "")
352
+ llm_api_key = get_setting("llm_api_key", "")
353
+ max_steps = get_setting("max_steps", 100)
354
+ max_actions = get_setting("max_actions", 10)
355
+ max_input_tokens = get_setting("max_input_tokens", 128000)
356
+ tool_calling_method = get_setting("tool_calling_method", "auto")
357
+ tool_calling_method = tool_calling_method if tool_calling_method != "None" else None
358
+ mcp_server_config_comp = webui_manager.get_component_by_id(
359
+ "agent_settings.mcp_server_config"
360
+ )
361
+ mcp_server_config_str = (
362
+ components.get(mcp_server_config_comp) if mcp_server_config_comp else None
363
+ )
364
+ mcp_server_config = (
365
+ json.loads(mcp_server_config_str) if mcp_server_config_str else None
366
+ )
367
+
368
+ # Planner LLM Settings (Optional)
369
+ planner_llm_provider_name = get_setting("planner_llm_provider") or None
370
+ planner_llm = None
371
+ planner_use_vision = False
372
+ if planner_llm_provider_name:
373
+ planner_llm_model_name = get_setting("planner_llm_model_name")
374
+ planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
375
+ planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
376
+ planner_llm_base_url = get_setting("planner_llm_base_url") or None
377
+ planner_llm_api_key = get_setting("planner_llm_api_key") or None
378
+ planner_use_vision = get_setting("planner_use_vision", False)
379
+
380
+ # Always use OpenAI for planner too
381
+ planner_llm_provider_name = "openai"
382
+ planner_llm = get_llm(
383
+ provider=planner_llm_provider_name,
384
+ model_name=str(planner_llm_model_name) if planner_llm_model_name else "gpt-4o",
385
+ temperature=float(planner_llm_temperature),
386
+ base_url=str(planner_llm_base_url) if planner_llm_base_url else "",
387
+ api_key=str(planner_llm_api_key) if planner_llm_api_key else "",
388
+ )
389
+
390
+ # --- Browser Settings ---
391
+ def get_browser_setting(key, default=None):
392
+ comp = webui_manager.get_component_by_id(f"browser_settings.{key}")
393
+ return components.get(comp, default) if comp else default
394
+
395
+ browser_binary_path = get_browser_setting("browser_binary_path") or None
396
+ browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
397
+ use_own_browser = get_browser_setting(
398
+ "use_own_browser", False
399
+ ) # Logic handled by CDP/WSS presence
400
+ keep_browser_open = get_browser_setting("keep_browser_open", False)
401
+ headless = get_browser_setting("headless", True)
402
+ disable_security = get_browser_setting("disable_security", False)
403
+ window_w = int(get_browser_setting("window_w", 1920))
404
+ window_h = int(get_browser_setting("window_h", 1080))
405
+ cdp_url = get_browser_setting("cdp_url") or None
406
+ wss_url = get_browser_setting("wss_url") or None
407
+ save_recording_path = get_browser_setting("save_recording_path") or None
408
+ save_trace_path = get_browser_setting("save_trace_path") or None
409
+ save_agent_history_path = get_browser_setting(
410
+ "save_agent_history_path", "./tmp/agent_history"
411
+ )
412
+ save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
413
+
414
+ stream_vw = 80
415
+ stream_vh = int(80 * window_h // window_w)
416
+
417
+ os.makedirs(save_agent_history_path, exist_ok=True)
418
+ if save_recording_path:
419
+ os.makedirs(save_recording_path, exist_ok=True)
420
+ if save_trace_path:
421
+ os.makedirs(save_trace_path, exist_ok=True)
422
+ if save_download_path:
423
+ os.makedirs(save_download_path, exist_ok=True)
424
+
425
+ # --- 2. Initialize LLM ---
426
+ # Always use OpenAI
427
+ llm_provider_name = "openai"
428
+
429
+ main_llm = get_llm(
430
+ provider=llm_provider_name,
431
+ model_name=str(llm_model_name) if llm_model_name else "gpt-4o",
432
+ temperature=float(llm_temperature),
433
+ base_url=str(llm_base_url) if llm_base_url else "",
434
+ api_key=str(llm_api_key) if llm_api_key else "",
435
+ )
436
+
437
+ # Pass the webui_manager instance to the callback when wrapping it
438
+ async def ask_callback_wrapper(
439
+ query: str, browser_context: BrowserContext
440
+ ) -> Dict[str, Any]:
441
+ return await _ask_assistant_callback(webui_manager, query, browser_context)
442
+
443
+ if not webui_manager.bu_controller:
444
+ webui_manager.bu_controller = CustomController(
445
+ ask_assistant_callback=ask_callback_wrapper
446
+ )
447
+ await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
448
+
449
+ # --- 4. Initialize Browser and Context ---
450
+ should_close_browser_on_finish = not keep_browser_open
451
+
452
+ try:
453
+ # Close existing resources if not keeping open
454
+ if not keep_browser_open:
455
+ if webui_manager.bu_browser_context:
456
+ logger.info("Closing previous browser context.")
457
+ await webui_manager.bu_browser_context.close()
458
+ webui_manager.bu_browser_context = None
459
+ if webui_manager.bu_browser:
460
+ logger.info("Closing previous browser.")
461
+ await webui_manager.bu_browser.close()
462
+ webui_manager.bu_browser = None
463
+
464
+ # Create Browser if needed
465
+ if not webui_manager.bu_browser:
466
+ logger.info("Launching new browser instance.")
467
+ extra_args = []
468
+ if use_own_browser:
469
+ browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
470
+ if browser_binary_path == "":
471
+ browser_binary_path = None
472
+ browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
473
+ if browser_user_data:
474
+ extra_args += [f"--user-data-dir={browser_user_data}"]
475
+ else:
476
+ browser_binary_path = None
477
+
478
+ webui_manager.bu_browser = CustomBrowser(
479
+ config=BrowserConfig(
480
+ headless=headless,
481
+ disable_security=disable_security,
482
+ browser_binary_path=browser_binary_path,
483
+ extra_browser_args=extra_args,
484
+ wss_url=wss_url,
485
+ cdp_url=cdp_url,
486
+ new_context_config=BrowserContextConfig(
487
+ window_width=window_w,
488
+ window_height=window_h,
489
+ )
490
+ )
491
+ )
492
+
493
+ # Create Context if needed
494
+ if not webui_manager.bu_browser_context:
495
+ logger.info("Creating new browser context.")
496
+ context_config = BrowserContextConfig(
497
+ trace_path=save_trace_path if save_trace_path else None,
498
+ save_recording_path=save_recording_path
499
+ if save_recording_path
500
+ else None,
501
+ save_downloads_path=save_download_path if save_download_path else None,
502
+ window_height=window_h,
503
+ window_width=window_w,
504
+ )
505
+ if not webui_manager.bu_browser:
506
+ raise ValueError("Browser not initialized, cannot create context.")
507
+ webui_manager.bu_browser_context = (
508
+ await webui_manager.bu_browser.new_context(config=context_config)
509
+ )
510
+
511
+ # --- 5. Initialize or Update Agent ---
512
+ webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run
513
+ os.makedirs(
514
+ os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id),
515
+ exist_ok=True,
516
+ )
517
+ history_file = os.path.join(
518
+ save_agent_history_path,
519
+ webui_manager.bu_agent_task_id,
520
+ f"{webui_manager.bu_agent_task_id}.json",
521
+ )
522
+ gif_path = os.path.join(
523
+ save_agent_history_path,
524
+ webui_manager.bu_agent_task_id,
525
+ f"{webui_manager.bu_agent_task_id}.gif",
526
+ )
527
+
528
+ # Pass the webui_manager to callbacks when wrapping them
529
+ async def step_callback_wrapper(
530
+ state: BrowserState, output: AgentOutput, step_num: int
531
+ ):
532
+ await _handle_new_step(webui_manager, state, output, step_num)
533
+
534
+ def done_callback_wrapper(history: AgentHistoryList):
535
+ _handle_done(webui_manager, history)
536
+
537
+ if not webui_manager.bu_agent:
538
+ logger.info(f"Initializing new agent for task: {task}")
539
+ if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
540
+ raise ValueError(
541
+ "Browser or Context not initialized, cannot create agent."
542
+ )
543
+ webui_manager.bu_agent = BrowserUseAgent(
544
+ task=task,
545
+ llm=main_llm,
546
+ browser=webui_manager.bu_browser,
547
+ browser_context=webui_manager.bu_browser_context,
548
+ controller=webui_manager.bu_controller,
549
+ register_new_step_callback=step_callback_wrapper,
550
+ register_done_callback=done_callback_wrapper,
551
+ use_vision=use_vision,
552
+ override_system_message=override_system_prompt,
553
+ extend_system_message=extend_system_prompt,
554
+ max_input_tokens=max_input_tokens,
555
+ max_actions_per_step=max_actions,
556
+ tool_calling_method=tool_calling_method,
557
+ planner_llm=planner_llm,
558
+ use_vision_for_planner=planner_use_vision if planner_llm else False,
559
+ source="webui",
560
+ )
561
+ webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
562
+ webui_manager.bu_agent.settings.generate_gif = gif_path
563
+ else:
564
+ webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
565
+ webui_manager.bu_agent.add_new_task(task)
566
+ webui_manager.bu_agent.settings.generate_gif = gif_path
567
+ webui_manager.bu_agent.browser = webui_manager.bu_browser
568
+ webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
569
+ webui_manager.bu_agent.controller = webui_manager.bu_controller
570
+
571
+ # --- 6. Run Agent Task and Stream Updates ---
572
+ agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
573
+ agent_task = asyncio.create_task(agent_run_coro)
574
+ webui_manager.bu_current_task = agent_task # Store the task
575
+
576
+ last_chat_len = len(webui_manager.bu_chat_history)
577
+ while not agent_task.done():
578
+ is_paused = webui_manager.bu_agent.state.paused
579
+ is_stopped = webui_manager.bu_agent.state.stopped
580
+
581
+ # Check for pause state
582
+ if is_paused:
583
+ yield {
584
+ pause_resume_button_comp: gr.update(
585
+ value="▶️ Resume", interactive=True
586
+ ),
587
+ stop_button_comp: gr.update(interactive=True),
588
+ }
589
+ # Wait until pause is released or task is stopped/done
590
+ while is_paused and not agent_task.done():
591
+ # Re-check agent state in loop
592
+ is_paused = webui_manager.bu_agent.state.paused
593
+ is_stopped = webui_manager.bu_agent.state.stopped
594
+ if is_stopped: # Stop signal received while paused
595
+ break
596
+ await asyncio.sleep(0.2)
597
+
598
+ if (
599
+ agent_task.done() or is_stopped
600
+ ): # If stopped or task finished while paused
601
+ break
602
+
603
+ # If resumed, yield UI update
604
+ yield {
605
+ pause_resume_button_comp: gr.update(
606
+ value="⏸️ Pause", interactive=True
607
+ ),
608
+ run_button_comp: gr.update(
609
+ value="⏳ Running...", interactive=False
610
+ ),
611
+ }
612
+
613
+ # Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
614
+ if is_stopped:
615
+ logger.info("Agent has stopped (internally or via stop button).")
616
+ if not agent_task.done():
617
+ # Ensure the task coroutine finishes if agent just set flag
618
+ try:
619
+ await asyncio.wait_for(
620
+ agent_task, timeout=1.0
621
+ ) # Give it a moment to exit run()
622
+ except asyncio.TimeoutError:
623
+ logger.warning(
624
+ "Agent task did not finish quickly after stop signal, cancelling."
625
+ )
626
+ agent_task.cancel()
627
+ except Exception: # Catch task exceptions if it errors on stop
628
+ pass
629
+ break # Exit the streaming loop
630
+
631
+ # Check if agent is asking for help (via response_event)
632
+ update_dict = {}
633
+ if webui_manager.bu_response_event is not None:
634
+ update_dict = {
635
+ user_input_comp: gr.update(
636
+ placeholder="Agent needs help. Enter response and submit.",
637
+ interactive=True,
638
+ ),
639
+ run_button_comp: gr.update(
640
+ value="✔️ Submit Response", interactive=True
641
+ ),
642
+ pause_resume_button_comp: gr.update(interactive=False),
643
+ stop_button_comp: gr.update(interactive=False),
644
+ chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
645
+ }
646
+ last_chat_len = len(webui_manager.bu_chat_history)
647
+ yield update_dict
648
+ # Wait until response is submitted or task finishes
649
+ while (
650
+ webui_manager.bu_response_event is not None
651
+ and not agent_task.done()
652
+ ):
653
+ await asyncio.sleep(0.2)
654
+ # Restore UI after response submitted or if task ended unexpectedly
655
+ if not agent_task.done():
656
+ yield {
657
+ user_input_comp: gr.update(
658
+ placeholder="Agent is running...", interactive=False
659
+ ),
660
+ run_button_comp: gr.update(
661
+ value="⏳ Running...", interactive=False
662
+ ),
663
+ pause_resume_button_comp: gr.update(interactive=True),
664
+ stop_button_comp: gr.update(interactive=True),
665
+ }
666
+ else:
667
+ break # Task finished while waiting for response
668
+
669
+ # Update Chatbot if new messages arrived via callbacks
670
+ if len(webui_manager.bu_chat_history) > last_chat_len:
671
+ update_dict[chatbot_comp] = gr.update(
672
+ value=webui_manager.bu_chat_history
673
+ )
674
+ last_chat_len = len(webui_manager.bu_chat_history)
675
+
676
+ # Update Browser View
677
+ if headless and webui_manager.bu_browser_context:
678
+ try:
679
+ screenshot_b64 = (
680
+ await webui_manager.bu_browser_context.take_screenshot()
681
+ )
682
+ if screenshot_b64:
683
+ html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
684
+ update_dict[browser_view_comp] = gr.update(
685
+ value=html_content, visible=True
686
+ )
687
+ else:
688
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
689
+ update_dict[browser_view_comp] = gr.update(
690
+ value=html_content, visible=True
691
+ )
692
+ except Exception as e:
693
+ logger.debug(f"Failed to capture screenshot: {e}")
694
+ update_dict[browser_view_comp] = gr.update(
695
+ value="<div style='...'>Error loading view...</div>",
696
+ visible=True,
697
+ )
698
+ else:
699
+ update_dict[browser_view_comp] = gr.update(visible=False)
700
+
701
+ # Yield accumulated updates
702
+ if update_dict:
703
+ yield update_dict
704
+
705
+ await asyncio.sleep(0.1) # Polling interval
706
+
707
+ # --- 7. Task Finalization ---
708
+ webui_manager.bu_agent.state.paused = False
709
+ webui_manager.bu_agent.state.stopped = False
710
+ final_update = {}
711
+ try:
712
+ logger.info("Agent task completing...")
713
+ # Await the task ensure completion and catch exceptions if not already caught
714
+ if not agent_task.done():
715
+ await agent_task # Retrieve result/exception
716
+ elif agent_task.exception(): # Check if task finished with exception
717
+ agent_task.result() # Raise the exception to be caught below
718
+ logger.info("Agent task completed processing.")
719
+
720
+ logger.info(f"Explicitly saving agent history to: {history_file}")
721
+ webui_manager.bu_agent.save_history(history_file)
722
+
723
+ if os.path.exists(history_file):
724
+ final_update[history_file_comp] = gr.File(value=history_file)
725
+
726
+ if gif_path and os.path.exists(gif_path):
727
+ logger.info(f"GIF found at: {gif_path}")
728
+ final_update[gif_comp] = gr.Image(value=gif_path)
729
+
730
+ # Update task metrics display if metrics are available
731
+ task_metrics_display_comp = webui_manager.get_component_by_id("browser_use_agent.task_metrics_display")
732
+ if hasattr(webui_manager, 'bu_task_metrics') and webui_manager.bu_task_metrics:
733
+ # For Markdown display, we need to format the metrics
734
+ metrics_md = format_task_metrics(webui_manager.bu_task_metrics)
735
+ final_update[task_metrics_display_comp] = gr.update(value=metrics_md)
736
+
737
+ except asyncio.CancelledError:
738
+ logger.info("Agent task was cancelled.")
739
+ if not any(
740
+ msg.get("role") == "assistant" and
741
+ msg.get("content", "") and
742
+ "Cancelled" in msg.get("content", "")
743
+ for msg in webui_manager.bu_chat_history
744
+ ):
745
+ webui_manager.bu_chat_history.append(
746
+ {"role": "assistant", "content": "**Task Cancelled**."}
747
+ )
748
+ final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
749
+ except Exception as e:
750
+ logger.error(f"Error during agent execution: {e}", exc_info=True)
751
+ error_message = (
752
+ f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
753
+ )
754
+ if not any(
755
+ msg.get("role") == "assistant" and
756
+ msg.get("content", "") and
757
+ error_message in msg.get("content", "")
758
+ for msg in webui_manager.bu_chat_history
759
+ ):
760
+ webui_manager.bu_chat_history.append(
761
+ {"role": "assistant", "content": error_message}
762
+ )
763
+ final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
764
+ gr.Error(f"Agent execution failed: {e}")
765
+
766
+ finally:
767
+ webui_manager.bu_current_task = None # Clear the task reference
768
+
769
+ # Close browser/context if requested
770
+ if should_close_browser_on_finish:
771
+ if webui_manager.bu_browser_context:
772
+ logger.info("Closing browser context after task.")
773
+ await webui_manager.bu_browser_context.close()
774
+ webui_manager.bu_browser_context = None
775
+ if webui_manager.bu_browser:
776
+ logger.info("Closing browser after task.")
777
+ await webui_manager.bu_browser.close()
778
+ webui_manager.bu_browser = None
779
+
780
+ # --- 8. Final UI Update ---
781
+ final_update.update(
782
+ {
783
+ user_input_comp: gr.update(
784
+ value="",
785
+ interactive=True,
786
+ placeholder="Enter your next task...",
787
+ ),
788
+ run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
789
+ stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
790
+ pause_resume_button_comp: gr.update(
791
+ value="⏸️ Pause", interactive=False
792
+ ),
793
+ clear_button_comp: gr.update(interactive=True),
794
+ # Ensure final chat history is shown
795
+ chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
796
+ }
797
+ )
798
+ yield final_update
799
+
800
+ except Exception as e:
801
+ # Catch errors during setup (before agent run starts)
802
+ logger.error(f"Error setting up agent task: {e}", exc_info=True)
803
+ webui_manager.bu_current_task = None # Ensure state is reset
804
+ yield {
805
+ user_input_comp: gr.update(
806
+ interactive=True, placeholder="Error during setup. Enter task..."
807
+ ),
808
+ run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
809
+ stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
810
+ pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
811
+ clear_button_comp: gr.update(interactive=True),
812
+ chatbot_comp: gr.update(
813
+ value=webui_manager.bu_chat_history
814
+ + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
815
+ ),
816
+ }
817
+
818
+
819
+ # --- Button Click Handlers --- (Need access to webui_manager)
820
+
821
+
822
+ async def handle_submit(
823
+ webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
824
+ ):
825
+ """Handles clicks on the main 'Submit' button."""
826
+ user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
827
+ run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
828
+
829
+ user_input_value = components.get(user_input_comp, "").strip()
830
+
831
+ # Check if waiting for user assistance
832
+ if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
833
+ logger.info(f"User submitted assistance: {user_input_value}")
834
+ webui_manager.bu_user_help_response = (
835
+ user_input_value if user_input_value else "User provided no text response."
836
+ )
837
+ webui_manager.bu_response_event.set()
838
+ # UI updates handled by the main loop reacting to the event being set
839
+ yield {
840
+ user_input_comp: gr.update(
841
+ value="",
842
+ interactive=False,
843
+ placeholder="Waiting for agent to continue...",
844
+ ),
845
+ run_button_comp: gr.update(value="⏳ Running...", interactive=False),
846
+ }
847
+ # Check if a task is currently running (using _current_task)
848
+ elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
849
+ logger.warning(
850
+ "Submit button clicked while agent is already running and not asking for help."
851
+ )
852
+ gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
853
+ yield {} # No change
854
+ else:
855
+ # Handle submission for a new task
856
+ logger.info("Submit button clicked for new task.")
857
+ # Use async generator to stream updates from run_agent_task
858
+ async for update in run_agent_task(webui_manager, components):
859
+ yield update
860
+
861
+
862
+ async def handle_stop(webui_manager: WebuiManager):
863
+ """Handles clicks on the 'Stop' button."""
864
+ logger.info("Stop button clicked.")
865
+ agent = webui_manager.bu_agent
866
+ task = webui_manager.bu_current_task
867
+
868
+ if agent and task and not task.done():
869
+ # Signal the agent to stop by setting its internal flag
870
+ agent.state.stopped = True
871
+ agent.state.paused = False # Ensure not paused if stopped
872
+ return {
873
+ webui_manager.get_component_by_id(
874
+ "browser_use_agent.stop_button"
875
+ ): gr.update(interactive=False, value="⏹️ Stopping..."),
876
+ webui_manager.get_component_by_id(
877
+ "browser_use_agent.pause_resume_button"
878
+ ): gr.update(interactive=False),
879
+ webui_manager.get_component_by_id(
880
+ "browser_use_agent.run_button"
881
+ ): gr.update(interactive=False),
882
+ }
883
+ else:
884
+ logger.warning("Stop clicked but agent is not running or task is already done.")
885
+ # Reset UI just in case it's stuck
886
+ return {
887
+ webui_manager.get_component_by_id(
888
+ "browser_use_agent.run_button"
889
+ ): gr.update(interactive=True),
890
+ webui_manager.get_component_by_id(
891
+ "browser_use_agent.stop_button"
892
+ ): gr.update(interactive=False),
893
+ webui_manager.get_component_by_id(
894
+ "browser_use_agent.pause_resume_button"
895
+ ): gr.update(interactive=False),
896
+ webui_manager.get_component_by_id(
897
+ "browser_use_agent.clear_button"
898
+ ): gr.update(interactive=True),
899
+ }
900
+
901
+
902
+ async def handle_pause_resume(webui_manager: WebuiManager):
903
+ """Handles clicks on the 'Pause/Resume' button."""
904
+ agent = webui_manager.bu_agent
905
+ task = webui_manager.bu_current_task
906
+
907
+ if agent and task and not task.done():
908
+ if agent.state.paused:
909
+ logger.info("Resume button clicked.")
910
+ agent.resume()
911
+ # UI update happens in main loop
912
+ return {
913
+ webui_manager.get_component_by_id(
914
+ "browser_use_agent.pause_resume_button"
915
+ ): gr.update(value="⏸️ Pause", interactive=True)
916
+ } # Optimistic update
917
+ else:
918
+ logger.info("Pause button clicked.")
919
+ agent.pause()
920
+ return {
921
+ webui_manager.get_component_by_id(
922
+ "browser_use_agent.pause_resume_button"
923
+ ): gr.update(value="▶️ Resume", interactive=True)
924
+ } # Optimistic update
925
+ else:
926
+ logger.warning(
927
+ "Pause/Resume clicked but agent is not running or doesn't support state."
928
+ )
929
+ return {} # No change
930
+
931
+
932
+ async def handle_clear(webui_manager: WebuiManager):
933
+ """Handles clicks on the 'Clear' button."""
934
+ logger.info("Clear button clicked.")
935
+
936
+ # Stop any running task first
937
+ task = webui_manager.bu_current_task
938
+ if task and not task.done():
939
+ logger.info("Clearing requires stopping the current task.")
940
+ # Safely try to stop the agent if it exists
941
+ if webui_manager.bu_agent:
942
+ try:
943
+ if hasattr(webui_manager.bu_agent, 'stop'):
944
+ webui_manager.bu_agent.stop()
945
+ else:
946
+ # Alternative method if stop isn't available
947
+ webui_manager.bu_agent.state.stopped = True
948
+ webui_manager.bu_agent.state.paused = False
949
+ except Exception as e:
950
+ logger.warning(f"Error stopping agent: {e}")
951
+
952
+ task.cancel()
953
+ try:
954
+ await asyncio.wait_for(task, timeout=2.0) # Wait briefly
955
+ except (asyncio.CancelledError, asyncio.TimeoutError):
956
+ pass
957
+ except Exception as e:
958
+ logger.warning(f"Error stopping task on clear: {e}")
959
+ webui_manager.bu_current_task = None
960
+
961
+ if webui_manager.bu_controller:
962
+ await webui_manager.bu_controller.close_mcp_client()
963
+ webui_manager.bu_controller = None
964
+ webui_manager.bu_agent = None
965
+
966
+ # Reset state stored in manager
967
+ webui_manager.bu_chat_history = []
968
+ webui_manager.bu_response_event = None
969
+ webui_manager.bu_user_help_response = None
970
+ webui_manager.bu_agent_task_id = None
971
+ webui_manager.bu_task_metrics = None # Clear task metrics
972
+
973
+ logger.info("Agent state and browser resources cleared.")
974
+
975
+ # Reset UI components
976
+ return {
977
+ webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(
978
+ value=[]
979
+ ),
980
+ webui_manager.get_component_by_id("browser_use_agent.user_input"): gr.update(
981
+ value="", placeholder="Enter your task here..."
982
+ ),
983
+ webui_manager.get_component_by_id(
984
+ "browser_use_agent.agent_history_file"
985
+ ): gr.update(value=None),
986
+ webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(
987
+ value=None
988
+ ),
989
+ webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
990
+ value="<div style='...'>Browser Cleared</div>"
991
+ ),
992
+ webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(
993
+ value="▶️ Submit Task", interactive=True
994
+ ),
995
+ webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(
996
+ interactive=False
997
+ ),
998
+ webui_manager.get_component_by_id(
999
+ "browser_use_agent.pause_resume_button"
1000
+ ): gr.update(value="⏸️ Pause", interactive=False),
1001
+ webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(
1002
+ interactive=True
1003
+ ),
1004
+ webui_manager.get_component_by_id("browser_use_agent.task_metrics_display"): gr.update(
1005
+ value="No task metrics available yet. Run a task to see metrics here."
1006
+ ),
1007
+ }
1008
+
1009
+
1010
+ # --- Tab Creation Function ---
1011
+
1012
+
1013
+ def create_browser_use_agent_tab(webui_manager: WebuiManager):
1014
+ """
1015
+ Create the run agent tab, defining UI, state, and handlers.
1016
+ """
1017
+ webui_manager.init_browser_use_agent()
1018
+
1019
+ # Initialize task metrics if not already present
1020
+ if not hasattr(webui_manager, 'bu_task_metrics'):
1021
+ webui_manager.bu_task_metrics = None
1022
+
1023
+ # --- Define UI Components ---
1024
+ tab_components = {}
1025
+ with gr.Column():
1026
+ chatbot = gr.Chatbot(
1027
+ lambda: webui_manager.bu_chat_history, # Load history dynamically
1028
+ elem_id="browser_use_chatbot",
1029
+ label="Agent Interaction",
1030
+ type="messages",
1031
+ height=600,
1032
+ show_copy_button=True,
1033
+ )
1034
+ user_input = gr.Textbox(
1035
+ label="Your Task or Response",
1036
+ placeholder="Enter your task here or provide assistance when asked.",
1037
+ lines=3,
1038
+ interactive=True,
1039
+ elem_id="user_input",
1040
+ )
1041
+ with gr.Row():
1042
+ stop_button = gr.Button(
1043
+ "⏹️ Stop", interactive=False, variant="stop", scale=2
1044
+ )
1045
+ pause_resume_button = gr.Button(
1046
+ "⏸️ Pause", interactive=False, variant="secondary", scale=2, visible=True
1047
+ )
1048
+ clear_button = gr.Button(
1049
+ "🗑️ Clear", interactive=True, variant="secondary", scale=2
1050
+ )
1051
+ run_button = gr.Button("▶️ Submit Task", variant="primary", scale=3)
1052
+
1053
+ browser_view = gr.HTML(
1054
+ value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
1055
+ label="Browser Live View",
1056
+ elem_id="browser_view",
1057
+ visible=False,
1058
+ )
1059
+
1060
+ # Task Metrics Section
1061
+ with gr.Column(visible=True) as task_metrics_container:
1062
+ gr.Markdown("### Task Metrics", elem_id="task_metrics_heading")
1063
+
1064
+ task_metrics_display = gr.Markdown(
1065
+ value=lambda: format_task_metrics(webui_manager.bu_task_metrics),
1066
+ elem_id="task_metrics_display",
1067
+ )
1068
+
1069
+ with gr.Column():
1070
+ gr.Markdown("### Task Outputs")
1071
+ agent_history_file = gr.File(label="Agent History JSON", interactive=False)
1072
+ recording_gif = gr.Image(
1073
+ label="Task Recording GIF",
1074
+ format="gif",
1075
+ interactive=False,
1076
+ type="filepath",
1077
+ )
1078
+
1079
+ # --- Store Components in Manager ---
1080
+ tab_components.update(
1081
+ dict(
1082
+ chatbot=chatbot,
1083
+ user_input=user_input,
1084
+ clear_button=clear_button,
1085
+ run_button=run_button,
1086
+ stop_button=stop_button,
1087
+ pause_resume_button=pause_resume_button,
1088
+ agent_history_file=agent_history_file,
1089
+ recording_gif=recording_gif,
1090
+ browser_view=browser_view,
1091
+ task_metrics_display=task_metrics_display,
1092
+ )
1093
+ )
1094
+ webui_manager.add_components(
1095
+ "browser_use_agent", tab_components
1096
+ ) # Use "browser_use_agent" as tab_name prefix
1097
+
1098
+ all_managed_components = set(
1099
+ webui_manager.get_components()
1100
+ ) # Get all components known to manager
1101
+ run_tab_outputs = list(tab_components.values())
1102
+
1103
+ async def submit_wrapper(
1104
+ *args,
1105
+ ) -> AsyncGenerator[Dict[Component, Any], None]:
1106
+ """Wrapper for handle_submit that yields its results.
1107
+ This receives all component values as separate arguments.
1108
+ """
1109
+ # Convert the individual args into a components dictionary
1110
+ components_dict = {}
1111
+ if args and len(args) == len(all_managed_components):
1112
+ # Combine the args and components into a dictionary
1113
+ components_dict = dict(zip(all_managed_components, args))
1114
+
1115
+ async for update in handle_submit(webui_manager, components_dict):
1116
+ yield update
1117
+
1118
+ async def stop_wrapper(*args) -> AsyncGenerator[Dict[Component, Any], None]:
1119
+ """Wrapper for handle_stop."""
1120
+ update_dict = await handle_stop(webui_manager)
1121
+ if update_dict: # Only yield if there's an update to return
1122
+ yield update_dict
1123
+ else:
1124
+ yield {} # Yield empty dict to avoid errors
1125
+
1126
+ async def pause_resume_wrapper(*args) -> AsyncGenerator[Dict[Component, Any], None]:
1127
+ """Wrapper for handle_pause_resume."""
1128
+ update_dict = await handle_pause_resume(webui_manager)
1129
+ if update_dict: # Only yield if there's an update to return
1130
+ yield update_dict
1131
+ else:
1132
+ yield {} # Yield empty dict to avoid errors
1133
+
1134
+ async def clear_wrapper(*args) -> AsyncGenerator[Dict[Component, Any], None]:
1135
+ """Wrapper for handle_clear."""
1136
+ update_dict = await handle_clear(webui_manager)
1137
+ if update_dict: # Only yield if there's an update to return
1138
+ yield update_dict
1139
+ else:
1140
+ yield {} # Yield empty dict to avoid errors
1141
+
1142
+ # --- Connect Event Handlers using the Wrappers --
1143
+ run_button.click(
1144
+ fn=submit_wrapper, inputs=list(all_managed_components), outputs=run_tab_outputs
1145
+ )
1146
+ user_input.submit(
1147
+ fn=submit_wrapper, inputs=list(all_managed_components), outputs=run_tab_outputs
1148
+ )
1149
+ stop_button.click(fn=stop_wrapper, inputs=None, outputs=run_tab_outputs)
1150
+ pause_resume_button.click(
1151
+ fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs
1152
+ )
1153
+ clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs)
src/webui/components/browser_use_agent_tab.py.bak ADDED
@@ -0,0 +1,1305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import uuid
6
+ from typing import Any, AsyncGenerator, Dict, Optional
7
+
8
+ import gradio as gr
9
+
10
+ # from browser_use.agent.service import Agent
11
+ from browser_use.agent.views import (
12
+ AgentHistoryList,
13
+ AgentOutput,
14
+ )
15
+ from browser_use.browser.browser import BrowserConfig
16
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
17
+ from browser_use.browser.views import BrowserState
18
+ from gradio.components import Component
19
+ from langchain_core.language_models.chat_models import BaseChatModel
20
+
21
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
22
+ from src.browser.custom_browser import CustomBrowser
23
+ from src.controller.custom_controller import CustomController
24
+ from src.utils import llm_provider
25
+ from src.webui.webui_manager import WebuiManager
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # Custom function to format task metrics as markdown
31
+ def format_task_metrics(metrics):
32
+ if not metrics:
33
+ return "No task metrics available yet. Run a task to see metrics here."
34
+
35
+ md = "#### Task Execution Summary\n\n"
36
+ md += f"⏱️ **Duration:** {metrics['duration']} seconds\n\n"
37
+ md += f"🔢 **Tokens Used:** {metrics['tokens']}\n\n"
38
+
39
+ if metrics['result']:
40
+ md += f"📋 **Final Result:**\n\n```\n{metrics['result']}\n```\n\n"
41
+
42
+ md += f"✅ **Status:** {metrics['status']}\n\n"
43
+
44
+ if metrics['errors'] and any(metrics['errors']):
45
+ md += f"❌ **Errors:**\n\n```\n{metrics['errors']}\n```\n\n"
46
+
47
+ # Display screenshot if available
48
+ if metrics.get('screenshot'):
49
+ md += f"📸 **Final Screenshot:**\n\n"
50
+ md += f"<img src=\"data:image/jpeg;base64,{metrics['screenshot']}\" alt=\"Final Screenshot\" style=\"max-width:100%; border:1px solid #ccc;\" />\n\n"
51
+
52
+ return md
53
+
54
+
55
+ # Add this new function after the format_task_metrics function
56
+
57
+ def create_business_task(business_name, business_type, business_website=None, additional_info=None):
58
+ """Creates a standardized task description for analyzing a business."""
59
+ task = f"Analyze the business '{business_name}' which is in the {business_type} industry."
60
+
61
+ if business_website:
62
+ task += f" Start by visiting their website at {business_website}."
63
+ else:
64
+ task += f" Search for information about this business online."
65
+
66
+ task += " Gather the following information: main products/services, contact information, location, hours of operation, and customer reviews."
67
+
68
+ if additional_info:
69
+ task += f" Additional context: {additional_info}"
70
+
71
+ task += " Provide a comprehensive report with all findings."
72
+
73
+ return task
74
+
75
+
76
+ # --- Helper Functions --- (Defined at module level)
77
+
78
+
79
+ async def _initialize_llm(
80
+ provider: Optional[str],
81
+ model_name: Optional[str],
82
+ temperature: float,
83
+ base_url: Optional[str],
84
+ api_key: Optional[str],
85
+ num_ctx: Optional[int] = None,
86
+ ) -> Optional[BaseChatModel]:
87
+ """Initializes the LLM based on settings. Returns None if provider/model is missing."""
88
+ if not provider or not model_name:
89
+ logger.info("LLM Provider or Model Name not specified, LLM will be None.")
90
+ return None
91
+ try:
92
+ # Use your actual LLM provider logic here
93
+ logger.info(
94
+ f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}"
95
+ )
96
+ # Example using a placeholder function
97
+ llm = llm_provider.get_llm_model(
98
+ provider=provider,
99
+ model_name=model_name,
100
+ temperature=temperature,
101
+ base_url=base_url or None,
102
+ api_key=api_key or None,
103
+ # Add other relevant params like num_ctx for ollama
104
+ num_ctx=num_ctx if provider == "ollama" else None,
105
+ )
106
+ return llm
107
+ except Exception as e:
108
+ logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
109
+ gr.Warning(
110
+ f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}"
111
+ )
112
+ return None
113
+
114
+
115
+ def _get_config_value(
116
+ webui_manager: WebuiManager,
117
+ comp_dict: Dict[gr.components.Component, Any],
118
+ comp_id_suffix: str,
119
+ default: Any = None,
120
+ ) -> Any:
121
+ """Safely get value from component dictionary using its ID suffix relative to the tab."""
122
+ # Assumes component ID format is "tab_name.comp_name"
123
+ tab_name = "browser_use_agent" # Hardcode or derive if needed
124
+ comp_id = f"{tab_name}.{comp_id_suffix}"
125
+ # Need to find the component object first using the ID from the manager
126
+ try:
127
+ comp = webui_manager.get_component_by_id(comp_id)
128
+ return comp_dict.get(comp, default)
129
+ except KeyError:
130
+ # Try accessing settings tabs as well
131
+ for prefix in ["agent_settings", "browser_settings"]:
132
+ try:
133
+ comp_id = f"{prefix}.{comp_id_suffix}"
134
+ comp = webui_manager.get_component_by_id(comp_id)
135
+ return comp_dict.get(comp, default)
136
+ except KeyError:
137
+ continue
138
+ logger.warning(
139
+ f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup."
140
+ )
141
+ return default
142
+
143
+
144
+ def _format_agent_output(model_output: AgentOutput) -> str:
145
+ """Formats AgentOutput for display in the chatbot using JSON."""
146
+ content = ""
147
+ if model_output:
148
+ try:
149
+ # Directly use model_dump if actions and current_state are Pydantic models
150
+ action_dump = [
151
+ action.model_dump(exclude_none=True) for action in model_output.action
152
+ ]
153
+
154
+ state_dump = model_output.current_state.model_dump(exclude_none=True)
155
+ model_output_dump = {
156
+ "current_state": state_dump,
157
+ "action": action_dump,
158
+ }
159
+ # Dump to JSON string with indentation
160
+ json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
161
+ # Wrap in <pre><code> for proper display in HTML
162
+ content = f"<pre><code class='language-json'>{json_string}</code></pre>"
163
+
164
+ except AttributeError as ae:
165
+ logger.error(
166
+ f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'."
167
+ )
168
+ content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
169
+ except Exception as e:
170
+ logger.error(f"Error formatting agent output: {e}", exc_info=True)
171
+ # Fallback to simple string representation on error
172
+ content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
173
+
174
+ return content.strip()
175
+
176
+
177
+ # --- Updated Callback Implementation ---
178
+
179
+
180
+ async def _handle_new_step(
181
+ webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
182
+ ):
183
+ """Callback for each step taken by the agent, including screenshot display."""
184
+
185
+ # Use the correct chat history attribute name from the user's code
186
+ if not hasattr(webui_manager, "bu_chat_history"):
187
+ logger.error(
188
+ "Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message."
189
+ )
190
+ # Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
191
+ webui_manager.bu_chat_history = [] # Initialize if missing (consider if this is the right place)
192
+ # return # Or stop if this is critical
193
+ step_num -= 1
194
+ logger.info(f"Step {step_num} completed.")
195
+
196
+ # --- Screenshot Handling ---
197
+ screenshot_html = ""
198
+ # Ensure state.screenshot exists and is not empty before proceeding
199
+ # Use getattr for safer access
200
+ screenshot_data = getattr(state, "screenshot", None)
201
+ if screenshot_data:
202
+ try:
203
+ # Basic validation: check if it looks like base64
204
+ if (
205
+ isinstance(screenshot_data, str) and len(screenshot_data) > 100
206
+ ): # Arbitrary length check
207
+ # *** UPDATED STYLE: Removed centering, adjusted width ***
208
+ img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
209
+ screenshot_html = (
210
+ img_tag + "<br/>"
211
+ ) # Use <br/> for line break after inline-block image
212
+ else:
213
+ logger.warning(
214
+ f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'})."
215
+ )
216
+ screenshot_html = "**[Invalid screenshot data]**<br/>"
217
+
218
+ except Exception as e:
219
+ logger.error(
220
+ f"Error processing or formatting screenshot for step {step_num}: {e}",
221
+ exc_info=True,
222
+ )
223
+ screenshot_html = "**[Error displaying screenshot]**<br/>"
224
+ else:
225
+ logger.debug(f"No screenshot available for step {step_num}.")
226
+
227
+ # --- Format Agent Output ---
228
+ formatted_output = _format_agent_output(output) # Use the updated function
229
+
230
+ # --- Combine and Append to Chat ---
231
+ step_header = f"--- **Step {step_num}** ---"
232
+ # Combine header, image (with line break), and JSON block
233
+ final_content = step_header + "<br/>" + screenshot_html + formatted_output
234
+
235
+ chat_message = {
236
+ "role": "assistant",
237
+ "content": final_content.strip(), # Remove leading/trailing whitespace
238
+ }
239
+
240
+ # Append to the correct chat history list
241
+ webui_manager.bu_chat_history.append(chat_message)
242
+
243
+ await asyncio.sleep(0.05)
244
+
245
+
246
+ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
247
+ """Callback when the agent finishes the task (success or failure)."""
248
+ logger.info(
249
+ f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}"
250
+ )
251
+ final_summary = "**Task Completed**\n"
252
+ final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
253
+ final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n" # Or total tokens if available
254
+
255
+ final_result = history.final_result()
256
+ if final_result:
257
+ final_summary += f"- Final Result: {final_result}\n"
258
+
259
+ errors = history.errors()
260
+ if errors and any(errors):
261
+ final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
262
+ else:
263
+ final_summary += "- Status: Success\n"
264
+
265
+ # Get the last screenshot if available
266
+ screenshots = history.screenshots()
267
+ final_screenshot = screenshots[-1] if screenshots and len(screenshots) > 0 else None
268
+
269
+ # Store task metrics separately for the metrics display
270
+ webui_manager.bu_task_metrics = {
271
+ "duration": f"{history.total_duration_seconds():.2f}",
272
+ "tokens": f"{history.total_input_tokens()}",
273
+ "result": final_result if final_result else "",
274
+ "status": "Error" if (errors and any(errors)) else "Success",
275
+ "errors": errors if (errors and any(errors)) else None,
276
+ "screenshot": final_screenshot # Add the final screenshot to the metrics
277
+ }
278
+
279
+ webui_manager.bu_chat_history.append(
280
+ {"role": "assistant", "content": final_summary}
281
+ )
282
+
283
+
284
+ async def _ask_assistant_callback(
285
+ webui_manager: WebuiManager, query: str, browser_context: BrowserContext
286
+ ) -> Dict[str, Any]:
287
+ """Callback triggered by the agent's ask_for_assistant action."""
288
+ logger.info("Agent requires assistance. Waiting for user input.")
289
+
290
+ if not hasattr(webui_manager, "_chat_history"):
291
+ logger.error("Chat history not found in webui_manager during ask_assistant!")
292
+ return {"response": "Internal Error: Cannot display help request."}
293
+
294
+ webui_manager.bu_chat_history.append(
295
+ {
296
+ "role": "assistant",
297
+ "content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'.",
298
+ }
299
+ )
300
+
301
+ # Use state stored in webui_manager
302
+ webui_manager.bu_response_event = asyncio.Event()
303
+ webui_manager.bu_user_help_response = None # Reset previous response
304
+
305
+ try:
306
+ logger.info("Waiting for user response event...")
307
+ await asyncio.wait_for(
308
+ webui_manager.bu_response_event.wait(), timeout=3600.0
309
+ ) # Long timeout
310
+ logger.info("User response event received.")
311
+ except asyncio.TimeoutError:
312
+ logger.warning("Timeout waiting for user assistance.")
313
+ webui_manager.bu_chat_history.append(
314
+ {
315
+ "role": "assistant",
316
+ "content": "**Timeout:** No response received. Trying to proceed.",
317
+ }
318
+ )
319
+ webui_manager.bu_response_event = None # Clear the event
320
+ return {"response": "Timeout: User did not respond."} # Inform the agent
321
+
322
+ response = webui_manager.bu_user_help_response
323
+ webui_manager.bu_chat_history.append(
324
+ {"role": "user", "content": response}
325
+ ) # Show user response in chat
326
+ webui_manager.bu_response_event = (
327
+ None # Clear the event for the next potential request
328
+ )
329
+ return {"response": response}
330
+
331
+
332
+ # --- Core Agent Execution Logic --- (Needs access to webui_manager)
333
+
334
+
335
+ async def run_agent_task(
336
+ webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
337
+ ) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
338
+ """Handles the entire lifecycle of initializing and running the agent."""
339
+
340
+ # --- Get Components ---
341
+ # Need handles to specific UI components to update them
342
+ business_name_comp = webui_manager.get_component_by_id("browser_use_agent.business_name")
343
+ business_website_comp = webui_manager.get_component_by_id("browser_use_agent.business_website")
344
+ business_type_comp = webui_manager.get_component_by_id("browser_use_agent.business_type")
345
+ additional_info_comp = webui_manager.get_component_by_id("browser_use_agent.additional_info")
346
+ run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
347
+ stop_button_comp = webui_manager.get_component_by_id(
348
+ "browser_use_agent.stop_button"
349
+ )
350
+ pause_resume_button_comp = webui_manager.get_component_by_id(
351
+ "browser_use_agent.pause_resume_button"
352
+ )
353
+ clear_button_comp = webui_manager.get_component_by_id(
354
+ "browser_use_agent.clear_button"
355
+ )
356
+ chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
357
+ history_file_comp = webui_manager.get_component_by_id(
358
+ "browser_use_agent.agent_history_file"
359
+ )
360
+ gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
361
+ browser_view_comp = webui_manager.get_component_by_id(
362
+ "browser_use_agent.browser_view"
363
+ )
364
+
365
+ # --- 1. Get Task and Initial UI Update ---
366
+ task = components.get(business_name_comp, "").strip()
367
+ if not task:
368
+ gr.Warning("Please enter a business name or task.")
369
+ yield {run_button_comp: gr.update(interactive=True)}
370
+ return
371
+
372
+ # Set running state indirectly via _current_task
373
+ if "Analyze the business" not in task:
374
+ # If task isn't already formatted, create one from the business info
375
+ business_name = task # The business name was stored in the "task" variable
376
+ business_website = components.get(business_website_comp, "").strip()
377
+ business_type = components.get(business_type_comp, "Retail")
378
+ additional_info = components.get(additional_info_comp, "").strip()
379
+
380
+ task = create_business_task(
381
+ business_name,
382
+ business_type,
383
+ business_website,
384
+ additional_info
385
+ )
386
+
387
+ # We should already have added the task to chat history in handle_submit
388
+ if not any(msg.get("content") == task for msg in webui_manager.bu_chat_history if msg.get("role") == "user"):
389
+ webui_manager.bu_chat_history.append({"role": "user", "content": task})
390
+
391
+ yield {
392
+ business_name_comp: gr.Textbox(
393
+ value=components.get(business_name_comp, ""), interactive=False
394
+ ),
395
+ business_website_comp: gr.Textbox(
396
+ value=components.get(business_website_comp, ""), interactive=False
397
+ ),
398
+ business_type_comp: gr.update(interactive=False),
399
+ additional_info_comp: gr.Textbox(
400
+ value=components.get(additional_info_comp, ""), interactive=False
401
+ ),
402
+ run_button_comp: gr.Button(value="⏳ Running...", interactive=False),
403
+ stop_button_comp: gr.Button(interactive=True),
404
+ pause_resume_button_comp: gr.Button(value="⏸️ Pause", interactive=True),
405
+ clear_button_comp: gr.Button(interactive=False),
406
+ chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
407
+ history_file_comp: gr.update(value=None),
408
+ gif_comp: gr.update(value=None),
409
+ }
410
+
411
+ # --- Agent Settings ---
412
+ # Access settings values via components dict, getting IDs from webui_manager
413
+ def get_setting(key, default=None):
414
+ comp = webui_manager.id_to_component.get(f"agent_settings.{key}")
415
+ return components.get(comp, default) if comp else default
416
+
417
+ override_system_prompt = get_setting("override_system_prompt") or None
418
+ extend_system_prompt = get_setting("extend_system_prompt") or None
419
+ llm_provider_name = get_setting(
420
+ "llm_provider", None
421
+ ) # Default to None if not found
422
+ llm_model_name = get_setting("llm_model_name", None)
423
+ llm_temperature = get_setting("llm_temperature", 0.6)
424
+ use_vision = get_setting("use_vision", True)
425
+ ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
426
+ llm_base_url = get_setting("llm_base_url") or None
427
+ llm_api_key = get_setting("llm_api_key") or None
428
+ max_steps = get_setting("max_steps", 100)
429
+ max_actions = get_setting("max_actions", 10)
430
+ max_input_tokens = get_setting("max_input_tokens", 128000)
431
+ tool_calling_str = get_setting("tool_calling_method", "auto")
432
+ tool_calling_method = tool_calling_str if tool_calling_str != "None" else None
433
+ mcp_server_config_comp = webui_manager.id_to_component.get(
434
+ "agent_settings.mcp_server_config"
435
+ )
436
+ mcp_server_config_str = (
437
+ components.get(mcp_server_config_comp) if mcp_server_config_comp else None
438
+ )
439
+ mcp_server_config = (
440
+ json.loads(mcp_server_config_str) if mcp_server_config_str else None
441
+ )
442
+
443
+ # Planner LLM Settings (Optional)
444
+ planner_llm_provider_name = get_setting("planner_llm_provider") or None
445
+ planner_llm = None
446
+ planner_use_vision = False
447
+ if planner_llm_provider_name:
448
+ planner_llm_model_name = get_setting("planner_llm_model_name")
449
+ planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
450
+ planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
451
+ planner_llm_base_url = get_setting("planner_llm_base_url") or None
452
+ planner_llm_api_key = get_setting("planner_llm_api_key") or None
453
+ planner_use_vision = get_setting("planner_use_vision", False)
454
+
455
+ planner_llm = await _initialize_llm(
456
+ planner_llm_provider_name,
457
+ planner_llm_model_name,
458
+ planner_llm_temperature,
459
+ planner_llm_base_url,
460
+ planner_llm_api_key,
461
+ planner_ollama_num_ctx if planner_llm_provider_name == "ollama" else None,
462
+ )
463
+
464
+ # --- Browser Settings ---
465
+ def get_browser_setting(key, default=None):
466
+ comp = webui_manager.id_to_component.get(f"browser_settings.{key}")
467
+ return components.get(comp, default) if comp else default
468
+
469
+ browser_binary_path = get_browser_setting("browser_binary_path") or None
470
+ browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
471
+ use_own_browser = get_browser_setting(
472
+ "use_own_browser", False
473
+ ) # Logic handled by CDP/WSS presence
474
+ keep_browser_open = get_browser_setting("keep_browser_open", False)
475
+ headless = get_browser_setting("headless", False)
476
+ disable_security = get_browser_setting("disable_security", False)
477
+ window_w = int(get_browser_setting("window_w", 1280))
478
+ window_h = int(get_browser_setting("window_h", 1100))
479
+ cdp_url = get_browser_setting("cdp_url") or None
480
+ wss_url = get_browser_setting("wss_url") or None
481
+ save_recording_path = get_browser_setting("save_recording_path") or None
482
+ save_trace_path = get_browser_setting("save_trace_path") or None
483
+ save_agent_history_path = get_browser_setting(
484
+ "save_agent_history_path", "./tmp/agent_history"
485
+ )
486
+ save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
487
+
488
+ stream_vw = 70
489
+ stream_vh = int(70 * window_h // window_w)
490
+
491
+ os.makedirs(save_agent_history_path, exist_ok=True)
492
+ if save_recording_path:
493
+ os.makedirs(save_recording_path, exist_ok=True)
494
+ if save_trace_path:
495
+ os.makedirs(save_trace_path, exist_ok=True)
496
+ if save_download_path:
497
+ os.makedirs(save_download_path, exist_ok=True)
498
+
499
+ # --- 2. Initialize LLM ---
500
+ main_llm = await _initialize_llm(
501
+ llm_provider_name,
502
+ llm_model_name,
503
+ llm_temperature,
504
+ llm_base_url,
505
+ llm_api_key,
506
+ ollama_num_ctx if llm_provider_name == "ollama" else None,
507
+ )
508
+
509
+ # Pass the webui_manager instance to the callback when wrapping it
510
+ async def ask_callback_wrapper(
511
+ query: str, browser_context: BrowserContext
512
+ ) -> Dict[str, Any]:
513
+ return await _ask_assistant_callback(webui_manager, query, browser_context)
514
+
515
+ if not webui_manager.bu_controller:
516
+ webui_manager.bu_controller = CustomController(
517
+ ask_assistant_callback=ask_callback_wrapper
518
+ )
519
+ await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
520
+
521
+ # --- 4. Initialize Browser and Context ---
522
+ should_close_browser_on_finish = not keep_browser_open
523
+
524
+ try:
525
+ # Close existing resources if not keeping open
526
+ if not keep_browser_open:
527
+ if webui_manager.bu_browser_context:
528
+ logger.info("Closing previous browser context.")
529
+ await webui_manager.bu_browser_context.close()
530
+ webui_manager.bu_browser_context = None
531
+ if webui_manager.bu_browser:
532
+ logger.info("Closing previous browser.")
533
+ await webui_manager.bu_browser.close()
534
+ webui_manager.bu_browser = None
535
+
536
+ # Create Browser if needed
537
+ if not webui_manager.bu_browser:
538
+ logger.info("Launching new browser instance.")
539
+ extra_args = []
540
+ if use_own_browser:
541
+ browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
542
+ if browser_binary_path == "":
543
+ browser_binary_path = None
544
+ browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
545
+ if browser_user_data:
546
+ extra_args += [f"--user-data-dir={browser_user_data}"]
547
+ else:
548
+ browser_binary_path = None
549
+
550
+ webui_manager.bu_browser = CustomBrowser(
551
+ config=BrowserConfig(
552
+ headless=headless,
553
+ disable_security=disable_security,
554
+ browser_binary_path=browser_binary_path,
555
+ extra_browser_args=extra_args,
556
+ wss_url=wss_url,
557
+ cdp_url=cdp_url,
558
+ new_context_config=BrowserContextConfig(
559
+ window_width=window_w,
560
+ window_height=window_h,
561
+ )
562
+ )
563
+ )
564
+
565
+ # Create Context if needed
566
+ if not webui_manager.bu_browser_context:
567
+ logger.info("Creating new browser context.")
568
+ context_config = BrowserContextConfig(
569
+ trace_path=save_trace_path if save_trace_path else None,
570
+ save_recording_path=save_recording_path
571
+ if save_recording_path
572
+ else None,
573
+ save_downloads_path=save_download_path if save_download_path else None,
574
+ window_height=window_h,
575
+ window_width=window_w,
576
+ )
577
+ if not webui_manager.bu_browser:
578
+ raise ValueError("Browser not initialized, cannot create context.")
579
+ webui_manager.bu_browser_context = (
580
+ await webui_manager.bu_browser.new_context(config=context_config)
581
+ )
582
+
583
+ # --- 5. Initialize or Update Agent ---
584
+ webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run
585
+ os.makedirs(
586
+ os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id),
587
+ exist_ok=True,
588
+ )
589
+ history_file = os.path.join(
590
+ save_agent_history_path,
591
+ webui_manager.bu_agent_task_id,
592
+ f"{webui_manager.bu_agent_task_id}.json",
593
+ )
594
+ gif_path = os.path.join(
595
+ save_agent_history_path,
596
+ webui_manager.bu_agent_task_id,
597
+ f"{webui_manager.bu_agent_task_id}.gif",
598
+ )
599
+
600
+ # Pass the webui_manager to callbacks when wrapping them
601
+ async def step_callback_wrapper(
602
+ state: BrowserState, output: AgentOutput, step_num: int
603
+ ):
604
+ await _handle_new_step(webui_manager, state, output, step_num)
605
+
606
+ def done_callback_wrapper(history: AgentHistoryList):
607
+ _handle_done(webui_manager, history)
608
+
609
+ if not webui_manager.bu_agent:
610
+ logger.info(f"Initializing new agent for task: {task}")
611
+ if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
612
+ raise ValueError(
613
+ "Browser or Context not initialized, cannot create agent."
614
+ )
615
+ webui_manager.bu_agent = BrowserUseAgent(
616
+ task=task,
617
+ llm=main_llm,
618
+ browser=webui_manager.bu_browser,
619
+ browser_context=webui_manager.bu_browser_context,
620
+ controller=webui_manager.bu_controller,
621
+ register_new_step_callback=step_callback_wrapper,
622
+ register_done_callback=done_callback_wrapper,
623
+ use_vision=use_vision,
624
+ override_system_message=override_system_prompt,
625
+ extend_system_message=extend_system_prompt,
626
+ max_input_tokens=max_input_tokens,
627
+ max_actions_per_step=max_actions,
628
+ tool_calling_method=tool_calling_method,
629
+ planner_llm=planner_llm,
630
+ use_vision_for_planner=planner_use_vision if planner_llm else False,
631
+ source="webui",
632
+ )
633
+ webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
634
+ webui_manager.bu_agent.settings.generate_gif = gif_path
635
+ else:
636
+ webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
637
+ webui_manager.bu_agent.add_new_task(task)
638
+ webui_manager.bu_agent.settings.generate_gif = gif_path
639
+ webui_manager.bu_agent.browser = webui_manager.bu_browser
640
+ webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
641
+ webui_manager.bu_agent.controller = webui_manager.bu_controller
642
+
643
+ # --- 6. Run Agent Task and Stream Updates ---
644
+ agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
645
+ agent_task = asyncio.create_task(agent_run_coro)
646
+ webui_manager.bu_current_task = agent_task # Store the task
647
+
648
+ last_chat_len = len(webui_manager.bu_chat_history)
649
+ while not agent_task.done():
650
+ is_paused = webui_manager.bu_agent.state.paused
651
+ is_stopped = webui_manager.bu_agent.state.stopped
652
+
653
+ # Check for pause state
654
+ if is_paused:
655
+ yield {
656
+ pause_resume_button_comp: gr.update(
657
+ value="▶️ Resume", interactive=True
658
+ ),
659
+ stop_button_comp: gr.update(interactive=True),
660
+ }
661
+ # Wait until pause is released or task is stopped/done
662
+ while is_paused and not agent_task.done():
663
+ # Re-check agent state in loop
664
+ is_paused = webui_manager.bu_agent.state.paused
665
+ is_stopped = webui_manager.bu_agent.state.stopped
666
+ if is_stopped: # Stop signal received while paused
667
+ break
668
+ await asyncio.sleep(0.2)
669
+
670
+ if (
671
+ agent_task.done() or is_stopped
672
+ ): # If stopped or task finished while paused
673
+ break
674
+
675
+ # If resumed, yield UI update
676
+ yield {
677
+ pause_resume_button_comp: gr.update(
678
+ value="⏸️ Pause", interactive=True
679
+ ),
680
+ run_button_comp: gr.update(
681
+ value="⏳ Running...", interactive=False
682
+ ),
683
+ }
684
+
685
+ # Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
686
+ if is_stopped:
687
+ logger.info("Agent has stopped (internally or via stop button).")
688
+ if not agent_task.done():
689
+ # Ensure the task coroutine finishes if agent just set flag
690
+ try:
691
+ await asyncio.wait_for(
692
+ agent_task, timeout=1.0
693
+ ) # Give it a moment to exit run()
694
+ except asyncio.TimeoutError:
695
+ logger.warning(
696
+ "Agent task did not finish quickly after stop signal, cancelling."
697
+ )
698
+ agent_task.cancel()
699
+ except Exception: # Catch task exceptions if it errors on stop
700
+ pass
701
+ break # Exit the streaming loop
702
+
703
+ # Check if agent is asking for help (via response_event)
704
+ update_dict = {}
705
+ if webui_manager.bu_response_event is not None:
706
+ update_dict = {
707
+ business_name_comp: gr.update(
708
+ placeholder="Agent needs help. Enter response and submit.",
709
+ interactive=True,
710
+ ),
711
+ business_website_comp: gr.update(
712
+ placeholder="Agent needs help. Enter response and submit.",
713
+ interactive=True,
714
+ ),
715
+ business_type_comp: gr.update(
716
+ placeholder="Agent needs help. Enter response and submit.",
717
+ interactive=True,
718
+ ),
719
+ additional_info_comp: gr.update(
720
+ placeholder="Agent needs help. Enter response and submit.",
721
+ interactive=True,
722
+ ),
723
+ run_button_comp: gr.update(
724
+ value="✔️ Submit Response", interactive=True
725
+ ),
726
+ pause_resume_button_comp: gr.update(interactive=False),
727
+ stop_button_comp: gr.update(interactive=False),
728
+ chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
729
+ }
730
+ last_chat_len = len(webui_manager.bu_chat_history)
731
+ yield update_dict
732
+ # Wait until response is submitted or task finishes
733
+ while (
734
+ webui_manager.bu_response_event is not None
735
+ and not agent_task.done()
736
+ ):
737
+ await asyncio.sleep(0.2)
738
+ # Restore UI after response submitted or if task ended unexpectedly
739
+ if not agent_task.done():
740
+ yield {
741
+ business_name_comp: gr.update(
742
+ placeholder="Enter business name", interactive=False
743
+ ),
744
+ business_website_comp: gr.update(
745
+ placeholder="Enter business website", interactive=False
746
+ ),
747
+ business_type_comp: gr.update(
748
+ placeholder="Enter business type", interactive=False
749
+ ),
750
+ additional_info_comp: gr.update(
751
+ placeholder="Enter additional information", interactive=False
752
+ ),
753
+ run_button_comp: gr.update(
754
+ value="⏳ Running...", interactive=False
755
+ ),
756
+ pause_resume_button_comp: gr.update(interactive=True),
757
+ stop_button_comp: gr.update(interactive=True),
758
+ }
759
+ else:
760
+ break # Task finished while waiting for response
761
+
762
+ # Update Chatbot if new messages arrived via callbacks
763
+ if len(webui_manager.bu_chat_history) > last_chat_len:
764
+ update_dict[chatbot_comp] = gr.update(
765
+ value=webui_manager.bu_chat_history
766
+ )
767
+ last_chat_len = len(webui_manager.bu_chat_history)
768
+
769
+ # Update Browser View
770
+ if headless and webui_manager.bu_browser_context:
771
+ try:
772
+ screenshot_b64 = (
773
+ await webui_manager.bu_browser_context.take_screenshot()
774
+ )
775
+ if screenshot_b64:
776
+ html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
777
+ update_dict[browser_view_comp] = gr.update(
778
+ value=html_content, visible=True
779
+ )
780
+ else:
781
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
782
+ update_dict[browser_view_comp] = gr.update(
783
+ value=html_content, visible=True
784
+ )
785
+ except Exception as e:
786
+ logger.debug(f"Failed to capture screenshot: {e}")
787
+ update_dict[browser_view_comp] = gr.update(
788
+ value="<div style='...'>Error loading view...</div>",
789
+ visible=True,
790
+ )
791
+ else:
792
+ update_dict[browser_view_comp] = gr.update(visible=False)
793
+
794
+ # Yield accumulated updates
795
+ if update_dict:
796
+ yield update_dict
797
+
798
+ await asyncio.sleep(0.1) # Polling interval
799
+
800
+ # --- 7. Task Finalization ---
801
+ webui_manager.bu_agent.state.paused = False
802
+ webui_manager.bu_agent.state.stopped = False
803
+ final_update = {}
804
+ try:
805
+ logger.info("Agent task completing...")
806
+ # Await the task ensure completion and catch exceptions if not already caught
807
+ if not agent_task.done():
808
+ await agent_task # Retrieve result/exception
809
+ elif agent_task.exception(): # Check if task finished with exception
810
+ agent_task.result() # Raise the exception to be caught below
811
+ logger.info("Agent task completed processing.")
812
+
813
+ logger.info(f"Explicitly saving agent history to: {history_file}")
814
+ webui_manager.bu_agent.save_history(history_file)
815
+
816
+ if os.path.exists(history_file):
817
+ final_update[history_file_comp] = gr.File(value=history_file)
818
+
819
+ if gif_path and os.path.exists(gif_path):
820
+ logger.info(f"GIF found at: {gif_path}")
821
+ final_update[gif_comp] = gr.Image(value=gif_path)
822
+
823
+ # Update task metrics display if metrics are available
824
+ task_metrics_display_comp = webui_manager.get_component_by_id("browser_use_agent.task_metrics_display")
825
+ if hasattr(webui_manager, 'bu_task_metrics') and webui_manager.bu_task_metrics:
826
+ # If we have metrics but no screenshot, try to get the latest screenshot
827
+ if not webui_manager.bu_task_metrics.get('screenshot') and webui_manager.bu_browser_context:
828
+ try:
829
+ final_screenshot = await webui_manager.bu_browser_context.take_screenshot()
830
+ if final_screenshot:
831
+ webui_manager.bu_task_metrics['screenshot'] = final_screenshot
832
+ except Exception as e:
833
+ logger.warning(f"Failed to capture final screenshot for metrics: {e}")
834
+
835
+ # Format the metrics for display
836
+ metrics_md = format_task_metrics(webui_manager.bu_task_metrics)
837
+ final_update[task_metrics_display_comp] = gr.update(value=metrics_md)
838
+
839
+ except asyncio.CancelledError:
840
+ logger.info("Agent task was cancelled.")
841
+ if not any(
842
+ "Cancelled" in msg.get("content", "")
843
+ for msg in webui_manager.bu_chat_history
844
+ if msg.get("role") == "assistant"
845
+ ):
846
+ webui_manager.bu_chat_history.append(
847
+ {"role": "assistant", "content": "**Task Cancelled**."}
848
+ )
849
+ final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
850
+ except Exception as e:
851
+ logger.error(f"Error during agent execution: {e}", exc_info=True)
852
+ error_message = (
853
+ f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
854
+ )
855
+ if not any(
856
+ error_message in msg.get("content", "")
857
+ for msg in webui_manager.bu_chat_history
858
+ if msg.get("role") == "assistant"
859
+ ):
860
+ webui_manager.bu_chat_history.append(
861
+ {"role": "assistant", "content": error_message}
862
+ )
863
+ final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
864
+ gr.Error(f"Agent execution failed: {e}")
865
+
866
+ finally:
867
+ webui_manager.bu_current_task = None # Clear the task reference
868
+
869
+ # Close browser/context if requested
870
+ if should_close_browser_on_finish:
871
+ if webui_manager.bu_browser_context:
872
+ logger.info("Closing browser context after task.")
873
+ await webui_manager.bu_browser_context.close()
874
+ webui_manager.bu_browser_context = None
875
+ if webui_manager.bu_browser:
876
+ logger.info("Closing browser after task.")
877
+ await webui_manager.bu_browser.close()
878
+ webui_manager.bu_browser = None
879
+
880
+ # --- 8. Final UI Update ---
881
+ final_update.update(
882
+ {
883
+ business_name_comp: gr.update(
884
+ value="",
885
+ interactive=True,
886
+ placeholder="Enter business name",
887
+ ),
888
+ business_website_comp: gr.update(
889
+ value="",
890
+ interactive=True,
891
+ placeholder="Enter business website",
892
+ ),
893
+ business_type_comp: gr.update(interactive=True),
894
+ additional_info_comp: gr.update(
895
+ value="",
896
+ interactive=True,
897
+ placeholder="Enter additional information",
898
+ ),
899
+ run_button_comp: gr.update(value="▶️ Start Analysis", interactive=True),
900
+ stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
901
+ pause_resume_button_comp: gr.update(
902
+ value="⏸️ Pause", interactive=False
903
+ ),
904
+ clear_button_comp: gr.update(interactive=True),
905
+ # Ensure final chat history is shown
906
+ chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
907
+ }
908
+ )
909
+ yield final_update
910
+
911
+ except Exception as e:
912
+ # Catch errors during setup (before agent run starts)
913
+ logger.error(f"Error setting up agent task: {e}", exc_info=True)
914
+ webui_manager.bu_current_task = None # Ensure state is reset
915
+ yield {
916
+ business_name_comp: gr.update(
917
+ interactive=True, placeholder="Enter business name"
918
+ ),
919
+ business_website_comp: gr.update(
920
+ interactive=True, placeholder="Enter business website"
921
+ ),
922
+ business_type_comp: gr.update(interactive=True),
923
+ additional_info_comp: gr.update(
924
+ interactive=True, placeholder="Enter additional information"
925
+ ),
926
+ run_button_comp: gr.update(value="▶️ Start Analysis", interactive=True),
927
+ stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
928
+ pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
929
+ clear_button_comp: gr.update(interactive=True),
930
+ chatbot_comp: gr.update(
931
+ value=webui_manager.bu_chat_history
932
+ + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
933
+ ),
934
+ }
935
+
936
+
937
+ # --- Button Click Handlers --- (Need access to webui_manager)
938
+
939
+
940
+ async def handle_submit(
941
+ webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
942
+ ):
943
+ """Handles clicks on the main 'Start Analysis' button."""
944
+ # Get business information from the form
945
+ business_name_comp = webui_manager.get_component_by_id("browser_use_agent.business_name")
946
+ business_website_comp = webui_manager.get_component_by_id("browser_use_agent.business_website")
947
+ business_type_comp = webui_manager.get_component_by_id("browser_use_agent.business_type")
948
+ additional_info_comp = webui_manager.get_component_by_id("browser_use_agent.additional_info")
949
+
950
+ business_name = components.get(business_name_comp, "").strip()
951
+ business_website = components.get(business_website_comp, "").strip()
952
+ business_type = components.get(business_type_comp, "Retail")
953
+ additional_info = components.get(additional_info_comp, "").strip()
954
+
955
+ if not business_name:
956
+ gr.Warning("Please enter a business name.")
957
+ yield {business_name_comp: gr.update(value=business_name)}
958
+ return
959
+
960
+ # Generate the standardized task using our template
961
+ task = create_business_task(
962
+ business_name,
963
+ business_type,
964
+ business_website,
965
+ additional_info
966
+ )
967
+
968
+ # Check if waiting for user assistance
969
+ if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
970
+ logger.info(f"User submitted assistance")
971
+ webui_manager.bu_user_help_response = "Continue with the current task."
972
+ webui_manager.bu_response_event.set()
973
+ # UI updates handled by the main loop reacting to the event being set
974
+ yield {
975
+ business_name_comp: gr.update(
976
+ interactive=False,
977
+ ),
978
+ business_website_comp: gr.update(
979
+ interactive=False,
980
+ ),
981
+ business_type_comp: gr.update(
982
+ interactive=False,
983
+ ),
984
+ additional_info_comp: gr.update(
985
+ interactive=False,
986
+ ),
987
+ webui_manager.get_component_by_id(
988
+ "browser_use_agent.run_button"
989
+ ): gr.update(value="⏳ Running...", interactive=False),
990
+ }
991
+ # Check if a task is currently running (using _current_task)
992
+ elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
993
+ logger.warning(
994
+ "Start button clicked while agent is already running and not asking for help."
995
+ )
996
+ gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
997
+ yield {} # No change
998
+ else:
999
+ # Store the task in the user input field before running
1000
+ components[business_name_comp] = task
1001
+
1002
+ # Handle submission for a new task
1003
+ logger.info(f"Starting analysis for business: {business_name}")
1004
+
1005
+ # Update chat history with the business information
1006
+ webui_manager.bu_chat_history.append({"role": "user", "content": task})
1007
+
1008
+ # Run the task using our agent
1009
+ async for update in run_agent_task(webui_manager, components):
1010
+ yield update
1011
+
1012
+
1013
+ async def handle_stop(webui_manager: WebuiManager):
1014
+ """Handles clicks on the 'Stop' button."""
1015
+ logger.info("Stop button clicked.")
1016
+ agent = webui_manager.bu_agent
1017
+ task = webui_manager.bu_current_task
1018
+
1019
+ if agent and task and not task.done():
1020
+ # Signal the agent to stop by setting its internal flag
1021
+ agent.state.stopped = True
1022
+ agent.state.paused = False # Ensure not paused if stopped
1023
+ return {
1024
+ webui_manager.get_component_by_id(
1025
+ "browser_use_agent.stop_button"
1026
+ ): gr.update(interactive=False, value="⏹️ Stopping..."),
1027
+ webui_manager.get_component_by_id(
1028
+ "browser_use_agent.pause_resume_button"
1029
+ ): gr.update(interactive=False),
1030
+ webui_manager.get_component_by_id(
1031
+ "browser_use_agent.run_button"
1032
+ ): gr.update(interactive=False),
1033
+ }
1034
+ else:
1035
+ logger.warning("Stop clicked but agent is not running or task is already done.")
1036
+ # Reset UI just in case it's stuck
1037
+ return {
1038
+ webui_manager.get_component_by_id(
1039
+ "browser_use_agent.run_button"
1040
+ ): gr.update(interactive=True),
1041
+ webui_manager.get_component_by_id(
1042
+ "browser_use_agent.stop_button"
1043
+ ): gr.update(interactive=False),
1044
+ webui_manager.get_component_by_id(
1045
+ "browser_use_agent.pause_resume_button"
1046
+ ): gr.update(interactive=False),
1047
+ webui_manager.get_component_by_id(
1048
+ "browser_use_agent.clear_button"
1049
+ ): gr.update(interactive=True),
1050
+ }
1051
+
1052
+
1053
+ async def handle_pause_resume(webui_manager: WebuiManager):
1054
+ """Handles clicks on the 'Pause/Resume' button."""
1055
+ agent = webui_manager.bu_agent
1056
+ task = webui_manager.bu_current_task
1057
+
1058
+ if agent and task and not task.done():
1059
+ if agent.state.paused:
1060
+ logger.info("Resume button clicked.")
1061
+ agent.resume()
1062
+ # UI update happens in main loop
1063
+ return {
1064
+ webui_manager.get_component_by_id(
1065
+ "browser_use_agent.pause_resume_button"
1066
+ ): gr.update(value="⏸️ Pause", interactive=True)
1067
+ } # Optimistic update
1068
+ else:
1069
+ logger.info("Pause button clicked.")
1070
+ agent.pause()
1071
+ return {
1072
+ webui_manager.get_component_by_id(
1073
+ "browser_use_agent.pause_resume_button"
1074
+ ): gr.update(value="▶️ Resume", interactive=True)
1075
+ } # Optimistic update
1076
+ else:
1077
+ logger.warning(
1078
+ "Pause/Resume clicked but agent is not running or doesn't support state."
1079
+ )
1080
+ return {} # No change
1081
+
1082
+
1083
+ async def handle_clear(webui_manager: WebuiManager):
1084
+ """Handles clicks on the 'Clear' button."""
1085
+ logger.info("Clear button clicked.")
1086
+
1087
+ # Stop any running task first
1088
+ task = webui_manager.bu_current_task
1089
+ if task and not task.done():
1090
+ logger.info("Clearing requires stopping the current task.")
1091
+ webui_manager.bu_agent.stop()
1092
+ task.cancel()
1093
+ try:
1094
+ await asyncio.wait_for(task, timeout=2.0) # Wait briefly
1095
+ except (asyncio.CancelledError, asyncio.TimeoutError):
1096
+ pass
1097
+ except Exception as e:
1098
+ logger.warning(f"Error stopping task on clear: {e}")
1099
+ webui_manager.bu_current_task = None
1100
+
1101
+ if webui_manager.bu_controller:
1102
+ await webui_manager.bu_controller.close_mcp_client()
1103
+ webui_manager.bu_controller = None
1104
+ webui_manager.bu_agent = None
1105
+
1106
+ # Reset state stored in manager
1107
+ webui_manager.bu_chat_history = []
1108
+ webui_manager.bu_response_event = None
1109
+ webui_manager.bu_user_help_response = None
1110
+ webui_manager.bu_agent_task_id = None
1111
+ webui_manager.bu_task_metrics = None # Clear task metrics
1112
+
1113
+ logger.info("Agent state and browser resources cleared.")
1114
+
1115
+ # Reset UI components
1116
+ return {
1117
+ webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(
1118
+ value=[]
1119
+ ),
1120
+ webui_manager.get_component_by_id("browser_use_agent.business_name"): gr.update(
1121
+ value="", interactive=True
1122
+ ),
1123
+ webui_manager.get_component_by_id("browser_use_agent.business_website"): gr.update(
1124
+ value="", interactive=True
1125
+ ),
1126
+ webui_manager.get_component_by_id("browser_use_agent.business_type"): gr.update(
1127
+ value="Retail", interactive=True
1128
+ ),
1129
+ webui_manager.get_component_by_id("browser_use_agent.additional_info"): gr.update(
1130
+ value="", interactive=True
1131
+ ),
1132
+ webui_manager.get_component_by_id(
1133
+ "browser_use_agent.agent_history_file"
1134
+ ): gr.update(value=None),
1135
+ webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(
1136
+ value=None
1137
+ ),
1138
+ webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
1139
+ value="<div style='...'>Browser Cleared</div>"
1140
+ ),
1141
+ webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(
1142
+ value="▶️ Start Analysis", interactive=True
1143
+ ),
1144
+ webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(
1145
+ interactive=False
1146
+ ),
1147
+ webui_manager.get_component_by_id(
1148
+ "browser_use_agent.pause_resume_button"
1149
+ ): gr.update(value="⏸️ Pause", interactive=False),
1150
+ webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(
1151
+ interactive=True
1152
+ ),
1153
+ webui_manager.get_component_by_id("browser_use_agent.task_metrics_display"): gr.update(
1154
+ value="No task metrics available yet. Run a task to see metrics here."
1155
+ ),
1156
+ }
1157
+
1158
+
1159
+ # --- Tab Creation Function ---
1160
+
1161
+
1162
+ def create_browser_use_agent_tab(webui_manager: WebuiManager):
1163
+ """
1164
+ Create the run agent tab with business-focused UI.
1165
+ """
1166
+ webui_manager.init_browser_use_agent()
1167
+
1168
+ # Initialize task metrics if not already present
1169
+ if not hasattr(webui_manager, 'bu_task_metrics'):
1170
+ webui_manager.bu_task_metrics = None
1171
+
1172
+ # --- Define UI Components ---
1173
+ tab_components = {}
1174
+ with gr.Column():
1175
+ chatbot = gr.Chatbot(
1176
+ lambda: webui_manager.bu_chat_history, # Load history dynamically
1177
+ elem_id="browser_use_chatbot",
1178
+ label="Agent Interaction",
1179
+ type="messages",
1180
+ height=600,
1181
+ show_copy_button=True,
1182
+ )
1183
+
1184
+ # Business information form
1185
+ with gr.Column(elem_id="business_form"):
1186
+ gr.Markdown("### Business Information")
1187
+ business_name = gr.Textbox(
1188
+ label="Business Name",
1189
+ placeholder="Enter business name",
1190
+ elem_id="business_name",
1191
+ )
1192
+ business_website = gr.Textbox(
1193
+ label="Business Website (optional)",
1194
+ placeholder="https://www.example.com",
1195
+ elem_id="business_website",
1196
+ )
1197
+ business_type = gr.Dropdown(
1198
+ label="Business Type",
1199
+ choices=["Retail", "Restaurant", "Service", "Healthcare", "Technology", "Other"],
1200
+ value="Retail",
1201
+ elem_id="business_type",
1202
+ )
1203
+ additional_info = gr.Textbox(
1204
+ label="Additional Information (optional)",
1205
+ placeholder="Any specific details about the business that might help the agent",
1206
+ lines=2,
1207
+ elem_id="additional_info",
1208
+ )
1209
+
1210
+ with gr.Row():
1211
+ stop_button = gr.Button(
1212
+ "⏹️ Stop", interactive=False, variant="stop", scale=2
1213
+ )
1214
+ pause_resume_button = gr.Button(
1215
+ "⏸️ Pause", interactive=False, variant="secondary", scale=2, visible=True
1216
+ )
1217
+ clear_button = gr.Button(
1218
+ "🗑️ Clear", interactive=True, variant="secondary", scale=2
1219
+ )
1220
+ run_button = gr.Button("▶️ Start Analysis", variant="primary", scale=3)
1221
+
1222
+ browser_view = gr.HTML(
1223
+ value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
1224
+ label="Browser Live View",
1225
+ elem_id="browser_view",
1226
+ visible=False,
1227
+ )
1228
+
1229
+ # Task Metrics Section
1230
+ with gr.Column(visible=True) as task_metrics_container:
1231
+ gr.Markdown("### Task Metrics", elem_id="task_metrics_heading")
1232
+
1233
+ task_metrics_display = gr.Markdown(
1234
+ value=lambda: format_task_metrics(webui_manager.bu_task_metrics),
1235
+ elem_id="task_metrics_display",
1236
+ )
1237
+
1238
+ with gr.Column():
1239
+ gr.Markdown("### Task Outputs")
1240
+ agent_history_file = gr.File(label="Agent History JSON", interactive=False)
1241
+ recording_gif = gr.Image(
1242
+ label="Task Recording GIF",
1243
+ format="gif",
1244
+ interactive=False,
1245
+ type="filepath",
1246
+ )
1247
+
1248
+ # --- Store Components in Manager ---
1249
+ tab_components.update(
1250
+ dict(
1251
+ chatbot=chatbot,
1252
+ business_name=business_name,
1253
+ business_website=business_website,
1254
+ business_type=business_type,
1255
+ additional_info=additional_info,
1256
+ clear_button=clear_button,
1257
+ run_button=run_button,
1258
+ stop_button=stop_button,
1259
+ pause_resume_button=pause_resume_button,
1260
+ agent_history_file=agent_history_file,
1261
+ recording_gif=recording_gif,
1262
+ browser_view=browser_view,
1263
+ task_metrics_display=task_metrics_display,
1264
+ )
1265
+ )
1266
+ webui_manager.add_components(
1267
+ "browser_use_agent", tab_components
1268
+ ) # Use "browser_use_agent" as tab_name prefix
1269
+
1270
+ all_managed_components = set(
1271
+ webui_manager.get_components()
1272
+ ) # Get all components known to manager
1273
+ run_tab_outputs = list(tab_components.values())
1274
+
1275
+ async def submit_wrapper(
1276
+ components_dict: Dict[Component, Any],
1277
+ ) -> AsyncGenerator[Dict[Component, Any], None]:
1278
+ """Wrapper for handle_submit that yields its results."""
1279
+ async for update in handle_submit(webui_manager, components_dict):
1280
+ yield update
1281
+
1282
+ async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
1283
+ """Wrapper for handle_stop."""
1284
+ update_dict = await handle_stop(webui_manager)
1285
+ yield update_dict
1286
+
1287
+ async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
1288
+ """Wrapper for handle_pause_resume."""
1289
+ update_dict = await handle_pause_resume(webui_manager)
1290
+ yield update_dict
1291
+
1292
+ async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
1293
+ """Wrapper for handle_clear."""
1294
+ update_dict = await handle_clear(webui_manager)
1295
+ yield update_dict
1296
+
1297
+ # --- Connect Event Handlers using the Wrappers --
1298
+ run_button.click(
1299
+ fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
1300
+ )
1301
+ stop_button.click(fn=stop_wrapper, inputs=None, outputs=run_tab_outputs)
1302
+ pause_resume_button.click(
1303
+ fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs
1304
+ )
1305
+ clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs)
src/webui/components/deep_research_agent_tab.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.components import Component
3
+ from functools import partial
4
+
5
+ from src.webui.webui_manager import WebuiManager
6
+ from src.utils import config
7
+ import logging
8
+ import os
9
+ from typing import Any, Dict, AsyncGenerator, Optional, Tuple, Union
10
+ import asyncio
11
+ import json
12
+ from src.agent.deep_research.deep_research_agent import DeepResearchAgent
13
+ from src.utils import llm_provider
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ async def _initialize_llm(provider: Optional[str], model_name: Optional[str], temperature: float,
19
+ base_url: Optional[str], api_key: Optional[str], num_ctx: Optional[int] = None):
20
+ """Initializes the LLM based on settings. Returns None if provider/model is missing."""
21
+ if not provider or not model_name:
22
+ logger.info("LLM Provider or Model Name not specified, LLM will be None.")
23
+ return None
24
+ try:
25
+ logger.info(f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}")
26
+ # Use your actual LLM provider logic here
27
+ llm = llm_provider.get_llm_model(
28
+ provider=provider,
29
+ model_name=model_name,
30
+ temperature=temperature,
31
+ base_url=base_url or None,
32
+ api_key=api_key or None,
33
+ num_ctx=num_ctx if provider == "ollama" else None
34
+ )
35
+ return llm
36
+ except Exception as e:
37
+ logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
38
+ gr.Warning(
39
+ f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}")
40
+ return None
41
+
42
+
43
+ def _read_file_safe(file_path: str) -> Optional[str]:
44
+ """Safely read a file, returning None if it doesn't exist or on error."""
45
+ if not os.path.exists(file_path):
46
+ return None
47
+ try:
48
+ with open(file_path, 'r', encoding='utf-8') as f:
49
+ return f.read()
50
+ except Exception as e:
51
+ logger.error(f"Error reading file {file_path}: {e}")
52
+ return None
53
+
54
+
55
+ # --- Deep Research Agent Specific Logic ---
56
+
57
+ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Component, Any]) -> AsyncGenerator[
58
+ Dict[Component, Any], None]:
59
+ """Handles initializing and running the DeepResearchAgent."""
60
+
61
+ # --- Get Components ---
62
+ research_task_comp = webui_manager.get_component_by_id("deep_research_agent.research_task")
63
+ resume_task_id_comp = webui_manager.get_component_by_id("deep_research_agent.resume_task_id")
64
+ parallel_num_comp = webui_manager.get_component_by_id("deep_research_agent.parallel_num")
65
+ save_dir_comp = webui_manager.get_component_by_id(
66
+ "deep_research_agent.max_query") # Note: component ID seems misnamed in original code
67
+ start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
68
+ stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
69
+ markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
70
+ markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
71
+ mcp_server_config_comp = webui_manager.get_component_by_id("deep_research_agent.mcp_server_config")
72
+
73
+ # --- 1. Get Task and Settings ---
74
+ task_topic = components.get(research_task_comp, "").strip()
75
+ task_id_to_resume = components.get(resume_task_id_comp, "").strip() or None
76
+ max_parallel_agents = int(components.get(parallel_num_comp, 1))
77
+ base_save_dir = components.get(save_dir_comp, "./tmp/deep_research")
78
+ mcp_server_config_str = components.get(mcp_server_config_comp)
79
+ mcp_config = json.loads(mcp_server_config_str) if mcp_server_config_str else None
80
+
81
+ if not task_topic:
82
+ gr.Warning("Please enter a research task.")
83
+ yield {start_button_comp: gr.update(interactive=True)} # Re-enable start button
84
+ return
85
+
86
+ # Store base save dir for stop handler
87
+ webui_manager.dr_save_dir = base_save_dir
88
+ os.makedirs(base_save_dir, exist_ok=True)
89
+
90
+ # --- 2. Initial UI Update ---
91
+ yield {
92
+ start_button_comp: gr.update(value="⏳ Running...", interactive=False),
93
+ stop_button_comp: gr.update(interactive=True),
94
+ research_task_comp: gr.update(interactive=False),
95
+ resume_task_id_comp: gr.update(interactive=False),
96
+ parallel_num_comp: gr.update(interactive=False),
97
+ save_dir_comp: gr.update(interactive=False),
98
+ markdown_display_comp: gr.update(value="Starting research..."),
99
+ markdown_download_comp: gr.update(value=None, interactive=False)
100
+ }
101
+
102
+ agent_task = None
103
+ running_task_id = None
104
+ plan_file_path = None
105
+ report_file_path = None
106
+ last_plan_content = None
107
+ last_plan_mtime = 0
108
+
109
+ try:
110
+ # --- 3. Get LLM and Browser Config from other tabs ---
111
+ # Access settings values via components dict, getting IDs from webui_manager
112
+ def get_setting(tab: str, key: str, default: Any = None):
113
+ comp = webui_manager.id_to_component.get(f"{tab}.{key}")
114
+ return components.get(comp, default) if comp else default
115
+
116
+ # LLM Config (from agent_settings tab)
117
+ llm_provider_name = get_setting("agent_settings", "llm_provider")
118
+ llm_model_name = get_setting("agent_settings", "llm_model_name")
119
+ llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
120
+ llm_base_url = get_setting("agent_settings", "llm_base_url")
121
+ llm_api_key = get_setting("agent_settings", "llm_api_key")
122
+ ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
123
+
124
+ llm = await _initialize_llm(
125
+ llm_provider_name, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
126
+ ollama_num_ctx if llm_provider_name == "ollama" else None
127
+ )
128
+ if not llm:
129
+ raise ValueError("LLM Initialization failed. Please check Agent Settings.")
130
+
131
+ # Browser Config (from browser_settings tab)
132
+ # Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
133
+ browser_config_dict = {
134
+ "headless": get_setting("browser_settings", "headless", False),
135
+ "disable_security": get_setting("browser_settings", "disable_security", False),
136
+ "browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
137
+ "user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
138
+ "window_width": int(get_setting("browser_settings", "window_w", 1280)),
139
+ "window_height": int(get_setting("browser_settings", "window_h", 1100)),
140
+ # Add other relevant fields if DeepResearchAgent accepts them
141
+ }
142
+
143
+ # --- 4. Initialize or Get Agent ---
144
+ if not webui_manager.dr_agent:
145
+ webui_manager.dr_agent = DeepResearchAgent(
146
+ llm=llm,
147
+ browser_config=browser_config_dict,
148
+ mcp_server_config=mcp_config
149
+ )
150
+ logger.info("DeepResearchAgent initialized.")
151
+
152
+ # --- 5. Start Agent Run ---
153
+ agent_run_coro = webui_manager.dr_agent.run(
154
+ topic=task_topic,
155
+ task_id=task_id_to_resume,
156
+ save_dir=base_save_dir,
157
+ max_parallel_browsers=max_parallel_agents
158
+ )
159
+ agent_task = asyncio.create_task(agent_run_coro)
160
+ webui_manager.dr_current_task = agent_task
161
+
162
+ # Wait briefly for the agent to start and potentially create the task ID/folder
163
+ await asyncio.sleep(1.0)
164
+
165
+ # Determine the actual task ID being used (agent sets this)
166
+ running_task_id = webui_manager.dr_agent.current_task_id
167
+ if not running_task_id:
168
+ # Agent might not have set it yet, try to get from result later? Risky.
169
+ # Or derive from resume_task_id if provided?
170
+ running_task_id = task_id_to_resume
171
+ if not running_task_id:
172
+ logger.warning("Could not determine running task ID immediately.")
173
+ # We can still monitor, but might miss initial plan if ID needed for path
174
+ else:
175
+ logger.info(f"Assuming task ID based on resume ID: {running_task_id}")
176
+ else:
177
+ logger.info(f"Agent started with Task ID: {running_task_id}")
178
+
179
+ webui_manager.dr_task_id = running_task_id # Store for stop handler
180
+
181
+ # --- 6. Monitor Progress via research_plan.md ---
182
+ if running_task_id:
183
+ task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
184
+ plan_file_path = os.path.join(task_specific_dir, "research_plan.md")
185
+ report_file_path = os.path.join(task_specific_dir, "report.md")
186
+ logger.info(f"Monitoring plan file: {plan_file_path}")
187
+ else:
188
+ logger.warning("Cannot monitor plan file: Task ID unknown.")
189
+ plan_file_path = None
190
+ last_plan_content = None
191
+ while not agent_task.done():
192
+ update_dict = {}
193
+ update_dict[resume_task_id_comp] = gr.update(value=running_task_id)
194
+ agent_stopped = getattr(webui_manager.dr_agent, 'stopped', False)
195
+ if agent_stopped:
196
+ logger.info("Stop signal detected from agent state.")
197
+ break # Exit monitoring loop
198
+
199
+ # Check and update research plan display
200
+ if plan_file_path:
201
+ try:
202
+ current_mtime = os.path.getmtime(plan_file_path) if os.path.exists(plan_file_path) else 0
203
+ if current_mtime > last_plan_mtime:
204
+ logger.info(f"Detected change in {plan_file_path}")
205
+ plan_content = _read_file_safe(plan_file_path)
206
+ if last_plan_content is None or (
207
+ plan_content is not None and plan_content != last_plan_content):
208
+ update_dict[markdown_display_comp] = gr.update(value=plan_content)
209
+ last_plan_content = plan_content
210
+ last_plan_mtime = current_mtime
211
+ elif plan_content is None:
212
+ # File might have been deleted or became unreadable
213
+ last_plan_mtime = 0 # Reset to force re-read attempt later
214
+ except Exception as e:
215
+ logger.warning(f"Error checking/reading plan file {plan_file_path}: {e}")
216
+ # Avoid continuous logging for the same error
217
+ await asyncio.sleep(2.0)
218
+
219
+ # Yield updates if any
220
+ if update_dict:
221
+ yield update_dict
222
+
223
+ await asyncio.sleep(1.0) # Check file changes every second
224
+
225
+ # --- 7. Task Finalization ---
226
+ logger.info("Agent task processing finished. Awaiting final result...")
227
+ final_result_dict = await agent_task # Get result or raise exception
228
+ logger.info(f"Agent run completed. Result keys: {final_result_dict.keys() if final_result_dict else 'None'}")
229
+
230
+ # Try to get task ID from result if not known before
231
+ if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
232
+ running_task_id = final_result_dict['task_id']
233
+ webui_manager.dr_task_id = running_task_id
234
+ task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
235
+ report_file_path = os.path.join(task_specific_dir, "report.md")
236
+ logger.info(f"Task ID confirmed from result: {running_task_id}")
237
+
238
+ final_ui_update = {}
239
+ if report_file_path and os.path.exists(report_file_path):
240
+ logger.info(f"Loading final report from: {report_file_path}")
241
+ report_content = _read_file_safe(report_file_path)
242
+ if report_content:
243
+ final_ui_update[markdown_display_comp] = gr.update(value=report_content)
244
+ final_ui_update[markdown_download_comp] = gr.File(value=report_file_path,
245
+ label=f"Report ({running_task_id}.md)",
246
+ interactive=True)
247
+ else:
248
+ final_ui_update[markdown_display_comp] = gr.update(
249
+ value="# Research Complete\n\n*Error reading final report file.*")
250
+ elif final_result_dict and 'report' in final_result_dict:
251
+ logger.info("Using report content directly from agent result.")
252
+ # If agent directly returns report content
253
+ final_ui_update[markdown_display_comp] = gr.update(value=final_result_dict['report'])
254
+ # Cannot offer download if only content is available
255
+ final_ui_update[markdown_download_comp] = gr.update(value=None, label="Download Research Report",
256
+ interactive=False)
257
+ else:
258
+ logger.warning("Final report file not found and not in result dict.")
259
+ final_ui_update[markdown_display_comp] = gr.update(value="# Research Complete\n\n*Final report not found.*")
260
+
261
+ yield final_ui_update
262
+
263
+
264
+ except Exception as e:
265
+ logger.error(f"Error during Deep Research Agent execution: {e}", exc_info=True)
266
+ gr.Error(f"Research failed: {e}")
267
+ yield {markdown_display_comp: gr.update(value=f"# Research Failed\n\n**Error:**\n```\n{e}\n```")}
268
+
269
+ finally:
270
+ # --- 8. Final UI Reset ---
271
+ webui_manager.dr_current_task = None # Clear task reference
272
+ webui_manager.dr_task_id = None # Clear running task ID
273
+
274
+ yield {
275
+ start_button_comp: gr.update(value="▶️ Run", interactive=True),
276
+ stop_button_comp: gr.update(interactive=False),
277
+ research_task_comp: gr.update(interactive=True),
278
+ resume_task_id_comp: gr.update(value="", interactive=True),
279
+ parallel_num_comp: gr.update(interactive=True),
280
+ save_dir_comp: gr.update(interactive=True),
281
+ # Keep download button enabled if file exists
282
+ markdown_download_comp: gr.update() if report_file_path and os.path.exists(report_file_path) else gr.update(
283
+ interactive=False)
284
+ }
285
+
286
+
287
+ async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any]:
288
+ """Handles the Stop button click."""
289
+ logger.info("Stop button clicked for Deep Research.")
290
+ agent = webui_manager.dr_agent
291
+ task = webui_manager.dr_current_task
292
+ task_id = webui_manager.dr_task_id
293
+ base_save_dir = webui_manager.dr_save_dir
294
+
295
+ stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
296
+ start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
297
+ markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
298
+ markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
299
+
300
+ final_update = {
301
+ stop_button_comp: gr.update(interactive=False, value="⏹️ Stopping...")
302
+ }
303
+
304
+ if agent and task and not task.done():
305
+ logger.info("Signalling DeepResearchAgent to stop.")
306
+ try:
307
+ # Assuming stop is synchronous or sets a flag quickly
308
+ await agent.stop()
309
+ except Exception as e:
310
+ logger.error(f"Error calling agent.stop(): {e}")
311
+
312
+ # The run_deep_research loop should detect the stop and exit.
313
+ # We yield an intermediate "Stopping..." state. The final reset is done by run_deep_research.
314
+
315
+ # Try to show the final report if available after stopping
316
+ await asyncio.sleep(1.5) # Give agent a moment to write final files potentially
317
+ report_file_path = None
318
+ if task_id and base_save_dir:
319
+ report_file_path = os.path.join(base_save_dir, str(task_id), "report.md")
320
+
321
+ if report_file_path and os.path.exists(report_file_path):
322
+ report_content = _read_file_safe(report_file_path)
323
+ if report_content:
324
+ final_update[markdown_display_comp] = gr.update(
325
+ value=report_content + "\n\n---\n*Research stopped by user.*")
326
+ final_update[markdown_download_comp] = gr.File(value=report_file_path, label=f"Report ({task_id}.md)",
327
+ interactive=True)
328
+ else:
329
+ final_update[markdown_display_comp] = gr.update(
330
+ value="# Research Stopped\n\n*Error reading final report file after stop.*")
331
+ else:
332
+ final_update[markdown_display_comp] = gr.update(value="# Research Stopped by User")
333
+
334
+ # Keep start button disabled, run_deep_research finally block will re-enable it.
335
+ final_update[start_button_comp] = gr.update(interactive=False)
336
+
337
+ else:
338
+ logger.warning("Stop clicked but no active research task found.")
339
+ # Reset UI state just in case
340
+ final_update = {
341
+ start_button_comp: gr.update(interactive=True),
342
+ stop_button_comp: gr.update(interactive=False),
343
+ webui_manager.get_component_by_id("deep_research_agent.research_task"): gr.update(interactive=True),
344
+ webui_manager.get_component_by_id("deep_research_agent.resume_task_id"): gr.update(interactive=True),
345
+ webui_manager.get_component_by_id("deep_research_agent.max_iteration"): gr.update(interactive=True),
346
+ webui_manager.get_component_by_id("deep_research_agent.max_query"): gr.update(interactive=True),
347
+ }
348
+
349
+ return final_update
350
+
351
+
352
+ async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
353
+ """
354
+ Update the MCP server.
355
+ """
356
+ if hasattr(webui_manager, "dr_agent") and webui_manager.dr_agent:
357
+ logger.warning("⚠️ Close controller because mcp file has changed!")
358
+ await webui_manager.dr_agent.close_mcp_client()
359
+
360
+ if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
361
+ logger.warning(f"{mcp_file} is not a valid MCP file.")
362
+ return None, gr.update(visible=False)
363
+
364
+ with open(mcp_file, 'r') as f:
365
+ mcp_server = json.load(f)
366
+
367
+ return json.dumps(mcp_server, indent=2), gr.update(visible=True)
368
+
369
+
370
+ def create_deep_research_agent_tab(webui_manager: WebuiManager):
371
+ """
372
+ Creates a deep research agent tab
373
+ """
374
+ input_components = set(webui_manager.get_components())
375
+ tab_components = {}
376
+
377
+ with gr.Group():
378
+ with gr.Row():
379
+ mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
380
+ mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
381
+
382
+ with gr.Group():
383
+ research_task = gr.Textbox(label="Research Task", lines=5,
384
+ value="Give me a detailed travel plan to Switzerland from June 1st to 10th.",
385
+ interactive=True)
386
+ with gr.Row():
387
+ resume_task_id = gr.Textbox(label="Resume Task ID", value="",
388
+ interactive=True)
389
+ parallel_num = gr.Number(label="Parallel Agent Num", value=1,
390
+ precision=0,
391
+ interactive=True)
392
+ max_query = gr.Textbox(label="Research Save Dir", value="./tmp/deep_research",
393
+ interactive=True)
394
+ with gr.Row():
395
+ stop_button = gr.Button("⏹️ Stop", variant="stop", scale=2)
396
+ start_button = gr.Button("▶️ Run", variant="primary", scale=3)
397
+ with gr.Group():
398
+ markdown_display = gr.Markdown(label="Research Report")
399
+ markdown_download = gr.File(label="Download Research Report", interactive=False)
400
+ tab_components.update(
401
+ dict(
402
+ research_task=research_task,
403
+ parallel_num=parallel_num,
404
+ max_query=max_query,
405
+ start_button=start_button,
406
+ stop_button=stop_button,
407
+ markdown_display=markdown_display,
408
+ markdown_download=markdown_download,
409
+ resume_task_id=resume_task_id,
410
+ mcp_json_file=mcp_json_file,
411
+ mcp_server_config=mcp_server_config,
412
+ )
413
+ )
414
+ webui_manager.add_components("deep_research_agent", tab_components)
415
+ webui_manager.init_deep_research_agent()
416
+
417
+ async def update_wrapper(mcp_file):
418
+ """Wrapper for handle_pause_resume."""
419
+ update_dict = await update_mcp_server(mcp_file, webui_manager)
420
+ yield update_dict
421
+
422
+ mcp_json_file.change(
423
+ update_wrapper,
424
+ inputs=[mcp_json_file],
425
+ outputs=[mcp_server_config, mcp_server_config]
426
+ )
427
+
428
+ dr_tab_outputs = list(tab_components.values())
429
+ all_managed_inputs = set(webui_manager.get_components())
430
+
431
+ # --- Define Event Handler Wrappers ---
432
+ async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
433
+ async for update in run_deep_research(webui_manager, comps):
434
+ yield update
435
+
436
+ async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
437
+ update_dict = await stop_deep_research(webui_manager)
438
+ yield update_dict
439
+
440
+ # --- Connect Handlers ---
441
+ start_button.click(
442
+ fn=start_wrapper,
443
+ inputs=all_managed_inputs,
444
+ outputs=dr_tab_outputs
445
+ )
446
+
447
+ stop_button.click(
448
+ fn=stop_wrapper,
449
+ inputs=None,
450
+ outputs=dr_tab_outputs
451
+ )
src/webui/components/documentation_tab.py ADDED
@@ -0,0 +1,726 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.components import Component
3
+ from src.webui.webui_manager import WebuiManager
4
+
5
+
6
+ def create_documentation_tab(webui_manager: WebuiManager):
7
+ """
8
+ Creates a documentation tab with detailed project analysis.
9
+ """
10
+ tab_components = {}
11
+
12
+ with gr.Group():
13
+ gr.Markdown(
14
+ """
15
+ # Browser Use WebUI Documentation
16
+
17
+ This documentation provides a comprehensive overview of the Browser Use WebUI project.
18
+ """,
19
+ elem_classes=["tab-header-text"],
20
+ )
21
+
22
+ with gr.Tabs() as doc_tabs:
23
+ with gr.TabItem("Project Overview"):
24
+ gr.Markdown(
25
+ """
26
+ ## Project Overview
27
+
28
+ Browser Use WebUI is a Gradio-based interface for controlling and interacting with web browsers using AI assistance.
29
+ It provides a user-friendly way to automate browser tasks and research using large language models.
30
+
31
+ ### Key Features
32
+
33
+ - **AI-Controlled Browser**: Control Chrome or other browsers with AI assistance
34
+ - **OpenAI LLM Support**: Compatible with OpenAI models including GPT-4 and GPT-3.5
35
+ - **Custom Browser Support**: Use your own browser with persistent sessions
36
+ - **Deep Research Agent**: Specialized agent for conducting in-depth web research
37
+
38
+ ### Recent Updates
39
+
40
+ As of the latest version, the system has been streamlined to support only OpenAI as the LLM provider. This change:
41
+
42
+ - Simplifies the codebase and reduces dependencies
43
+ - Focuses development efforts on optimizing the OpenAI integration
44
+ - Ensures consistent behavior across all agent interactions
45
+ - Improves reliability and reduces potential configuration issues
46
+
47
+ If you were using other LLM providers with previous versions, please update your configurations to use OpenAI.
48
+ """
49
+ )
50
+
51
+ with gr.TabItem("Submit Task Flow"):
52
+ gr.Markdown(
53
+ """
54
+ ## BrowserUse Agent: Submit Task Flow Documentation
55
+
56
+ This documentation provides a detailed overview of what happens when you click the "Submit Task" button in the BrowserUse agent tab.
57
+
58
+ ### Files Involved
59
+
60
+ - **browser_use_agent_tab.py**: Creates the UI for the BrowserUse agent tab and handles the submit task workflow.
61
+ - **webui_manager.py**: Maintains the state of the web UI and stores components and agent instances.
62
+ - **browser_use_agent.py**: Implements the core BrowserUse agent functionality for running tasks.
63
+ - **custom_controller.py**: Handles the execution of browser actions requested by the agent.
64
+ - **custom_browser.py**: Custom browser implementation for the BrowserUse agent.
65
+ - **custom_context.py**: Manages browser contexts for the BrowserUse agent.
66
+
67
+ ### Step-by-Step Process
68
+
69
+ #### Step 1: User Submits a Task
70
+
71
+ The process begins when a user enters a task in the text input field and clicks the "Submit Task" button, triggering the `handle_submit` function.
72
+
73
+ #### Step 2: Task Initialization
74
+
75
+ The `run_agent_task` function retrieves the user's task from UI components, updates the chat history, and initializes UI components for the task execution.
76
+
77
+ #### Step 3: Browser and Context Setup
78
+
79
+ The system initializes or reuses an existing browser instance and browser context, which provide the environment for the agent to interact with web pages.
80
+
81
+ #### Step 4: Agent Initialization
82
+
83
+ The system creates a new BrowserUseAgent instance or updates an existing one with the new task. It also registers callbacks for step updates and task completion.
84
+
85
+ #### Step 5: Task Execution
86
+
87
+ The system executes the agent's `run` method in a new task and waits for its completion, updating the UI with progress.
88
+
89
+ #### Step 6: BrowserUseAgent Run Method
90
+
91
+ The agent's `run` method is the core execution logic that performs the task through a series of steps, each interacting with the browser to accomplish the given task.
92
+
93
+ #### Step 7: Step Processing Callback
94
+
95
+ The `_handle_new_step` callback is called after each agent step, updating the UI with the latest screenshot and agent output.
96
+
97
+ #### Step 8: Task Completion Callback
98
+
99
+ The `_handle_done` callback is triggered when the agent completes the task (success or failure), updating the UI with the final results and metrics.
100
+
101
+ ### System Flow Diagram
102
+
103
+ ```
104
+ User submits task → Task initialization → Browser setup → Agent initialization
105
+
106
+ Task completion ← Agent run method ← Step processing callback ← Task execution
107
+ ```
108
+ """
109
+ )
110
+
111
+ with gr.TabItem("Architecture"):
112
+ gr.Markdown(
113
+ """
114
+ ## System Architecture
115
+
116
+ The project follows a modular architecture with clear separation of concerns:
117
+
118
+ ### Core Components
119
+
120
+ 1. **WebUI Module (`src/webui/`)**:
121
+ - Interface management using Gradio
122
+ - Tab components for different functionalities
123
+ - User input/output handling
124
+
125
+ 2. **Browser Module (`src/browser/`)**:
126
+ - Custom browser implementation extending browser-use library
127
+ - Browser context management
128
+ - Screenshot and session handling
129
+
130
+ 3. **Agent Module (`src/agent/`)**:
131
+ - Browser Use Agent: General-purpose browser automation
132
+ - Deep Research Agent: Specialized for research tasks
133
+ - Agent state and history management
134
+
135
+ 4. **Controller Module (`src/controller/`)**:
136
+ - Action registry for browser control
137
+ - MCP client integration
138
+ - Custom action implementations
139
+
140
+ 5. **Utils Module (`src/utils/`)**:
141
+ - OpenAI LLM integration
142
+ - Configuration helpers
143
+ - MCP client setup
144
+
145
+ ### Data Flow
146
+
147
+ 1. User inputs task via WebUI
148
+ 2. WebUI Manager initializes components
149
+ 3. Agent receives task and configures OpenAI LLM
150
+ 4. Browser is launched or connected
151
+ 5. Agent iteratively performs actions via controller
152
+ 6. Results display in WebUI with screenshots
153
+ """
154
+ )
155
+
156
+ with gr.TabItem("Browser Control"):
157
+ gr.Markdown(
158
+ """
159
+ ## Browser Control System
160
+
161
+ The browser control functionality is built on the browser-use library, with custom extensions:
162
+
163
+ ### Browser Features
164
+
165
+ - **Custom Browser Integration**: Connect to existing browser instances
166
+ - **Browser Context Management**: Create and manage browser contexts
167
+ - **Session Persistence**: Keep browser open between tasks
168
+ - **Screenshot Capture**: Take and display screenshots of browser state
169
+ - **DOM Interaction**: Interact with web page elements
170
+ - **Action Registry**: Comprehensive set of browser actions
171
+
172
+ ### Actions Supported
173
+
174
+ - Navigate to URLs
175
+ - Click elements
176
+ - Input text
177
+ - Extract content
178
+ - Scroll pages
179
+ - Search Google
180
+ - Wait for page load
181
+ - Handle alerts and dialogs
182
+ - Upload files
183
+ - And more through the registry system
184
+ """
185
+ )
186
+
187
+ with gr.TabItem("Agent System"):
188
+ gr.Markdown(
189
+ """
190
+ ## Agent System
191
+
192
+ The application provides two main agent types:
193
+
194
+ ### Browser Use Agent
195
+
196
+ Extends the base Agent class from browser-use library to provide:
197
+
198
+ - Task execution with dynamic tool selection
199
+ - LLM integration with multiple providers
200
+ - Browser control through registered actions
201
+ - Error handling and recovery
202
+ - Execution history tracking
203
+
204
+ ### Deep Research Agent
205
+
206
+ Specialized agent using LangGraph for:
207
+
208
+ - Research planning through LLM
209
+ - Web search and content extraction
210
+ - Information synthesis
211
+ - Structured research report generation
212
+ - Multi-browser parallel processing
213
+
214
+ ### Agent Components
215
+
216
+ - **State Management**: Track agent state during execution
217
+ - **History Recording**: Record steps and results
218
+ - **Output Formatting**: Format results for display
219
+ - **Tool Calling**: Different methods based on LLM capabilities
220
+ """
221
+ )
222
+
223
+ with gr.TabItem("LLM Integration"):
224
+ gr.Markdown(
225
+ """
226
+ ## LLM Integration
227
+
228
+ The system supports OpenAI LLM:
229
+
230
+ ### Supported Provider
231
+
232
+ - **OpenAI**: GPT-4o, GPT-4, GPT-3.5
233
+
234
+ ### Integration Features
235
+
236
+ - **Vision Support**: Vision capabilities with compatible models
237
+ - **Temperature Control**: Adjust randomness in model outputs
238
+ - **Context Length Management**: Handle different model context limits
239
+ - **API Key Management**: Secure handling of API credentials
240
+ - **Tool Calling Methods**: Different methods based on model capabilities (function_calling, json_mode, raw)
241
+ """
242
+ )
243
+
244
+ with gr.TabItem("Web UI Components"):
245
+ gr.Markdown(
246
+ """
247
+ ## Web UI Components
248
+
249
+ The interface is built with Gradio and organized into tabs:
250
+
251
+ ### Main Tabs
252
+
253
+ 1. **Agent Settings**: Configure OpenAI models and parameters
254
+ 2. **Browser Settings**: Set up browser preferences and options
255
+ 3. **Run Agent**: Execute browser tasks and view results
256
+ 4. **Agent Marketplace**: Access specialized agents like Deep Research
257
+ 5. **Documentation**: Comprehensive project documentation (you are here)
258
+ 6. **Load & Save Config**: Save and load UI configurations
259
+
260
+ ### Interface Features
261
+
262
+ - **Chatbot Interface**: View agent interactions and results
263
+ - **Task Input**: Submit tasks to the agent
264
+ - **Control Buttons**: Start, stop, pause, and clear agent execution
265
+ - **Configuration Forms**: Set up OpenAI and browser parameters
266
+ - **Results Display**: View agent output including screenshots
267
+ """
268
+ )
269
+
270
+ with gr.TabItem("API & Libraries"):
271
+ gr.Markdown(
272
+ """
273
+ ## Core Libraries & Dependencies
274
+
275
+ The project relies on several key libraries:
276
+
277
+ ### Primary Dependencies
278
+
279
+ - **browser-use**: Core browser automation library
280
+ - **gradio**: Web UI framework
281
+ - **langchain**: LLM integration framework
282
+ - **langgraph**: Graph-based workflows for agents
283
+ - **playwright**: Browser automation and control
284
+ - **pyperclip**: Clipboard interaction
285
+ - **dotenv**: Environment variable management
286
+
287
+ ### API Integration
288
+
289
+ - **LLM APIs**: OpenAI, Google, Azure, Anthropic, etc.
290
+ - **MCP (Modular Coordination Protocol)**: Tool integration protocol
291
+ - **MainContentExtractor**: Web content extraction
292
+
293
+ ### Browser APIs
294
+
295
+ - **CDP (Chrome DevTools Protocol)**: Browser communication
296
+ - **WSS**: WebSocket connections for browser control
297
+ """
298
+ )
299
+
300
+ with gr.TabItem("File Structure"):
301
+ gr.Markdown(
302
+ """
303
+ ## Project File Structure
304
+
305
+ ```
306
+ web-ui/
307
+ ├── src/
308
+ │ ├── agent/
309
+ │ │ ├── browser_use/
310
+ │ │ │ └── browser_use_agent.py
311
+ │ │ └── deep_research/
312
+ │ │ └── deep_research_agent.py
313
+ │ ├── browser/
314
+ │ │ ├── custom_browser.py
315
+ │ │ └── custom_context.py
316
+ │ ├── controller/
317
+ │ │ └── custom_controller.py
318
+ │ ├── utils/
319
+ │ │ ├── config.py
320
+ │ │ ├── llm_provider.py
321
+ │ │ └── mcp_client.py
322
+ │ ├── webui/
323
+ │ │ ├── components/
324
+ │ │ │ ├── agent_settings_tab.py
325
+ │ │ │ ├── browser_settings_tab.py
326
+ │ │ │ ├── browser_use_agent_tab.py
327
+ │ │ │ ├── deep_research_agent_tab.py
328
+ │ │ │ ├── documentation_tab.py
329
+ │ │ │ └── load_save_config_tab.py
330
+ │ │ ├── interface.py
331
+ │ │ └── webui_manager.py
332
+ │ └── __init__.py
333
+ ├── assets/
334
+ ├── tmp/
335
+ ├── tests/
336
+ ├── .venv/
337
+ ├── webui.py
338
+ ├── Dockerfile
339
+ ├── docker-compose.yml
340
+ ├── requirements.txt
341
+ ├── setup.py
342
+ └── README.md
343
+ ```
344
+ """
345
+ )
346
+
347
+ with gr.TabItem("Setup & Usage"):
348
+ gr.Markdown(
349
+ """
350
+ ## Setup & Usage Guide
351
+
352
+ ### Installation
353
+
354
+ #### Local Installation
355
+
356
+ 1. Clone the repository
357
+ ```bash
358
+ git clone https://github.com/browser-use/web-ui.git
359
+ cd web-ui
360
+ ```
361
+
362
+ 2. Set up Python environment
363
+ ```bash
364
+ uv venv --python 3.11
365
+ source .venv/bin/activate # Linux/Mac
366
+ .venv\\Scripts\\activate # Windows
367
+ ```
368
+
369
+ 3. Install dependencies
370
+ ```bash
371
+ uv pip install -r requirements.txt
372
+ playwright install --with-deps
373
+ ```
374
+
375
+ 4. Configure environment
376
+ ```bash
377
+ cp .env.example .env
378
+ # Edit .env to add your API keys
379
+ ```
380
+
381
+ 5. Run the application
382
+ ```bash
383
+ python webui.py --ip 127.0.0.1 --port 7788
384
+ ```
385
+
386
+ #### Docker Installation
387
+
388
+ ```bash
389
+ docker compose up --build
390
+ ```
391
+
392
+ ### Usage Examples
393
+
394
+ 1. **Simple Web Search**
395
+ - Configure LLM in Agent Settings
396
+ - Configure browser in Browser Settings
397
+ - In Run Agent tab, enter: "Search for the latest news about AI"
398
+ - Click Submit Task
399
+
400
+ 2. **Deep Research**
401
+ - Configure LLM in Agent Settings
402
+ - Go to Agent Marketplace > Deep Research
403
+ - Enter research topic: "Advances in renewable energy in 2023"
404
+ - Click Run
405
+
406
+ 3. **Using Custom Browser**
407
+ - In Browser Settings, check "Use Own Browser"
408
+ - Configure paths to browser and user data
409
+ - Submit tasks as normal
410
+ """
411
+ )
412
+
413
+ with gr.TabItem("Source Code Analysis"):
414
+ gr.Markdown(
415
+ """
416
+ ## Detailed Source Code Analysis
417
+
418
+ This section provides a deep dive into the code structure and implementation details of key components.
419
+
420
+ ### WebUI Manager Class
421
+
422
+ The `WebuiManager` class in `src/webui/webui_manager.py` serves as the central component managing UI elements and application state:
423
+
424
+ ```python
425
+ class WebuiManager:
426
+ def __init__(self, settings_save_dir: str = "./tmp/webui_settings"):
427
+ self.id_to_component: dict[str, Component] = {}
428
+ self.component_to_id: dict[Component, str] = {}
429
+ self.settings_save_dir = settings_save_dir
430
+ os.makedirs(self.settings_save_dir, exist_ok=True)
431
+ ```
432
+
433
+ Key functions:
434
+ - `add_components()`: Registers UI components with unique IDs
435
+ - `get_component_by_id()`: Retrieves components using their ID
436
+ - `save_config()`: Serializes UI settings to JSON
437
+ - `load_config()`: Loads settings from JSON
438
+ - `init_browser_use_agent()`: Creates browser agent instances
439
+
440
+ ### Custom Browser Implementation
441
+
442
+ The `CustomBrowser` class in `src/browser/custom_browser.py` extends the base `Browser` class from the browser-use library:
443
+
444
+ ```python
445
+ class CustomBrowser(Browser):
446
+ async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
447
+ browser_config = self.config.model_dump() if self.config else {}
448
+ context_config = config.model_dump() if config else {}
449
+ merged_config = {**browser_config, **context_config}
450
+ return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
451
+ ```
452
+
453
+ Key features:
454
+ - Extends the browser-use Browser class
455
+ - Creates custom browser contexts
456
+ - Configures Chrome arguments for different environments
457
+ - Handles screen resolution and window dimensions
458
+
459
+ ### Browser Use Agent
460
+
461
+ The `BrowserUseAgent` class in `src/agent/browser_use/browser_use_agent.py` extends the Agent class:
462
+
463
+ ```python
464
+ class BrowserUseAgent(Agent):
465
+ def _set_tool_calling_method(self) -> ToolCallingMethod | None:
466
+ tool_calling_method = self.settings.tool_calling_method
467
+ if tool_calling_method == 'auto':
468
+ if is_model_without_tool_support(self.model_name):
469
+ return 'raw'
470
+ elif self.chat_model_library == 'ChatGoogleGenerativeAI':
471
+ return None
472
+ elif self.chat_model_library == 'ChatOpenAI':
473
+ return 'function_calling'
474
+ # Additional models...
475
+ ```
476
+
477
+ Key capabilities:
478
+ - Automatically selects tool calling method based on LLM
479
+ - Handles agent execution with configurable steps
480
+ - Provides pause/resume functionality
481
+ - Manages execution history and state
482
+ - Implements error handling and recovery
483
+
484
+ ### Deep Research Agent
485
+
486
+ The `DeepResearchAgent` class in `src/agent/deep_research/deep_research_agent.py` implements a specialized research agent:
487
+
488
+ ```python
489
+ class DeepResearchAgent:
490
+ def __init__(
491
+ self,
492
+ llm: Any,
493
+ browser_config: Dict[str, Any],
494
+ mcp_server_config: Optional[Dict[str, Any]] = None,
495
+ ):
496
+ # Initialize agent with LLM and browser config
497
+ ```
498
+
499
+ Key components:
500
+ - Uses LangGraph for structured research workflows
501
+ - Implements planning, research, and synthesis nodes
502
+ - Manages parallel browser instances for efficiency
503
+ - Generates structured research reports
504
+ - Handles task state persistence
505
+
506
+ ### Custom Controller
507
+
508
+ The `CustomController` class in `src/controller/custom_controller.py` extends the Controller class:
509
+
510
+ ```python
511
+ class CustomController(Controller):
512
+ def __init__(self, exclude_actions: list[str] = [],
513
+ output_model: Optional[Type[BaseModel]] = None,
514
+ ask_assistant_callback: Optional[...] = None):
515
+ super().__init__(exclude_actions=exclude_actions, output_model=output_model)
516
+ self._register_custom_actions()
517
+ self.ask_assistant_callback = ask_assistant_callback
518
+ self.mcp_client = None
519
+ self.mcp_server_config = None
520
+ ```
521
+
522
+ Key features:
523
+ - Registers custom browser actions
524
+ - Integrates with MCP (Modular Coordination Protocol)
525
+ - Provides file upload capabilities
526
+ - Implements human assistance features
527
+ - Handles action execution with error management
528
+
529
+ ### UI Components
530
+
531
+ The UI is built using Gradio components:
532
+
533
+ ```python
534
+ def create_ui(theme_name="Ocean"):
535
+ with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js_func) as demo:
536
+ with gr.Tabs() as tabs:
537
+ with gr.TabItem("⚙️ Agent Settings"):
538
+ create_agent_settings_tab(ui_manager)
539
+ # Additional tabs...
540
+ ```
541
+
542
+ Key UI features:
543
+ - Modular tab-based interface
544
+ - Customizable themes
545
+ - Responsive layout
546
+ - Dark mode support
547
+ - Configuration persistence
548
+ """
549
+ )
550
+
551
+ with gr.TabItem("Technical Challenges"):
552
+ gr.Markdown(
553
+ """
554
+ ## Technical Challenges & Solutions
555
+
556
+ This section covers key technical challenges faced during development and the solutions implemented.
557
+
558
+ ### Browser Integration Challenges
559
+
560
+ **Challenge**: Connecting to existing browser instances with proper user profiles.
561
+
562
+ **Solution**: Custom implementation using CDP (Chrome DevTools Protocol) and WebSocket connections:
563
+
564
+ ```python
565
+ # Implementation in custom_browser.py
566
+ chrome_args = {
567
+ f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
568
+ *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
569
+ *(CHROME_HEADLESS_ARGS if self.config.headless else []),
570
+ # Additional args...
571
+ }
572
+
573
+ # Check existing port conflicts
574
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
575
+ if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
576
+ chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
577
+ ```
578
+
579
+ ### OpenAI LLM Integration
580
+
581
+ **Challenge**: Configuring and optimizing OpenAI models for browser automation.
582
+
583
+ **Solution**: Provider abstraction and method detection:
584
+
585
+ ```python
586
+ # In browser_use_agent.py
587
+ def _set_tool_calling_method(self) -> ToolCallingMethod | None:
588
+ tool_calling_method = self.settings.tool_calling_method
589
+ if tool_calling_method == 'auto':
590
+ if is_model_without_tool_support(self.model_name):
591
+ return 'raw'
592
+ else:
593
+ return 'function_calling'
594
+ ```
595
+
596
+ ### Execution State Management
597
+
598
+ **Challenge**: Maintaining agent state across steps and allowing pause/resume.
599
+
600
+ **Solution**: Custom execution loop with state management:
601
+
602
+ ```python
603
+ # In browser_use_agent.py
604
+ async def run(self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
605
+ on_step_end: AgentHookFunc | None = None) -> AgentHistoryList:
606
+
607
+ # Execution loop with state management
608
+ for step in range(max_steps):
609
+ # Check pause state
610
+ if self.state.paused:
611
+ signal_handler.wait_for_resume()
612
+ signal_handler.reset()
613
+
614
+ # Check for stop
615
+ if self.state.stopped:
616
+ logger.info('Agent stopped')
617
+ break
618
+
619
+ # Execute step with callbacks
620
+ if on_step_start is not None:
621
+ await on_step_start(self)
622
+
623
+ step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
624
+ await self.step(step_info)
625
+
626
+ if on_step_end is not None:
627
+ await on_step_end(self)
628
+ ```
629
+
630
+ ### Multi-Browser Research Orchestration
631
+
632
+ **Challenge**: Managing multiple parallel browser instances for research tasks.
633
+
634
+ **Solution**: LangGraph-based workflow with parallel task execution:
635
+
636
+ ```python
637
+ # In deep_research_agent.py
638
+ async def _run_browser_search_tool(
639
+ queries: List[str],
640
+ task_id: str,
641
+ llm: Any,
642
+ browser_config: Dict[str, Any],
643
+ stop_event: threading.Event,
644
+ max_parallel_browsers: int = 1,
645
+ ) -> List[Dict[str, Any]]:
646
+
647
+ # Execute tasks in parallel with limit
648
+ tasks = []
649
+ results = []
650
+
651
+ semaphore = asyncio.Semaphore(max_parallel_browsers)
652
+
653
+ async def task_wrapper(query):
654
+ async with semaphore:
655
+ return await run_single_browser_task(
656
+ query, task_id, llm, browser_config, stop_event
657
+ )
658
+
659
+ # Create and gather tasks
660
+ for query in queries:
661
+ tasks.append(asyncio.create_task(task_wrapper(query)))
662
+
663
+ results = await asyncio.gather(*tasks)
664
+ return results
665
+ ```
666
+
667
+ ### UI State Synchronization
668
+
669
+ **Challenge**: Keeping UI state synchronized with backend operations.
670
+
671
+ **Solution**: Component tracking and event-based updates:
672
+
673
+ ```python
674
+ # In webui_manager.py
675
+ def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
676
+ for comp_name, component in components_dict.items():
677
+ comp_id = f"{tab_name}.{comp_name}"
678
+ self.id_to_component[comp_id] = component
679
+ self.component_to_id[component] = comp_id
680
+
681
+ # In browser_use_agent_tab.py
682
+ async def handle_submit(webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]):
683
+ # Get component values and update UI state
684
+ task_input = _get_config_value(webui_manager, components, "user_input", "")
685
+ webui_manager.bu_chat_history.append({"role": "user", "content": task_input})
686
+ # Additional UI updates...
687
+ ```
688
+
689
+ ### Docker Environment Challenges
690
+
691
+ **Challenge**: Running browser automation in Docker containers.
692
+
693
+ **Solution**: Special Docker configuration for browser support:
694
+
695
+ ```python
696
+ # In custom_browser.py
697
+ CHROME_DOCKER_ARGS = [
698
+ "--no-sandbox",
699
+ "--disable-dev-shm-usage",
700
+ # Additional docker-specific args...
701
+ ]
702
+
703
+ # In docker-compose.yml
704
+ services:
705
+ web-ui:
706
+ build:
707
+ context: .
708
+ volumes:
709
+ - ./tmp:/app/tmp
710
+ ports:
711
+ - "7788:7788"
712
+ - "6080:6080" # VNC for browser viewing
713
+ environment:
714
+ - DISPLAY=:1
715
+ # Additional environment variables...
716
+ ```
717
+ """
718
+ )
719
+
720
+ tab_components.update(dict(
721
+ doc_tabs=doc_tabs,
722
+ ))
723
+
724
+ webui_manager.add_components("documentation", tab_components)
725
+
726
+ return tab_components
src/webui/components/load_save_config_tab.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.components import Component
3
+
4
+ from src.webui.webui_manager import WebuiManager
5
+ from src.utils import config
6
+
7
+
8
+ def create_load_save_config_tab(webui_manager: WebuiManager):
9
+ """
10
+ Creates a load and save config tab.
11
+ """
12
+ input_components = set(webui_manager.get_components())
13
+ tab_components = {}
14
+
15
+ config_file = gr.File(
16
+ label="Load UI Settings from json",
17
+ file_types=[".json"],
18
+ interactive=True
19
+ )
20
+ with gr.Row():
21
+ load_config_button = gr.Button("Load Config", variant="primary")
22
+ save_config_button = gr.Button("Save UI Settings", variant="primary")
23
+
24
+ config_status = gr.Textbox(
25
+ label="Status",
26
+ lines=2,
27
+ interactive=False
28
+ )
29
+
30
+ tab_components.update(dict(
31
+ load_config_button=load_config_button,
32
+ save_config_button=save_config_button,
33
+ config_status=config_status,
34
+ config_file=config_file,
35
+ ))
36
+
37
+ webui_manager.add_components("load_save_config", tab_components)
38
+
39
+ save_config_button.click(
40
+ fn=webui_manager.save_config,
41
+ inputs=set(webui_manager.get_components()),
42
+ outputs=[config_status]
43
+ )
44
+
45
+ load_config_button.click(
46
+ fn=webui_manager.load_config,
47
+ inputs=[config_file],
48
+ outputs=webui_manager.get_components(),
49
+ )
50
+
src/webui/components/vayner_client_research_tab.py ADDED
@@ -0,0 +1,1252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ from typing import Any, AsyncGenerator, Dict, List, Optional
6
+ from datetime import datetime
7
+
8
+ import gradio as gr
9
+ from gradio.components import Component
10
+
11
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
12
+ from src.browser.custom_browser import CustomBrowser
13
+ from src.controller.custom_controller import CustomController
14
+ from src.utils import llm_provider
15
+ from src.webui.webui_manager import WebuiManager
16
+ from browser_use.browser.browser import BrowserConfig
17
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Import environment variables
22
+ from dotenv import load_dotenv
23
+ load_dotenv() # This ensures environment variables are loaded
24
+
25
+ # Get Vayner credentials from environment
26
+ VAYNER_USERNAME = os.getenv("VAYNER_USERNAME", "")
27
+ VAYNER_PASSWORD = os.getenv("VAYNER_PASSWORD", "")
28
+
29
+ VAYNER_CLIENT_TEMPLATE = """
30
+ Task: Research Vayner Commerce data for business: "{business_name}"
31
+
32
+ 1. Log in to https://local.vaynercommerce.com/myclients
33
+ - Username: admin@vaynercommerce.com
34
+ - Password: oKLl4li-HY
35
+ - Use these credentials on the login form
36
+
37
+ 2. After successful login, search for the business named "{business_name}" in the search box
38
+ 3. Click on the business in the search results
39
+
40
+ **Part 1: Keyword Performance Table**
41
+ 4. Extract the keyword performance table (columns: Keyword, Performance, Status)
42
+ - Return this as a formatted table
43
+
44
+ **Part 2: Keyword Ranking History Analysis Table**
45
+ 5. For the first keyword in the list:
46
+ a. Click on the keyword to open its detail view
47
+ b. Look for the **History** section
48
+
49
+ c. Click the **last row (earliest date)** in the History section:
50
+ -extract:
51
+ - Top 3 Rank → This is the **Initial Top 3 Rank (SOV)**
52
+ - Coverage → This is the **Initial Coverage**
53
+ - Then look for the **Your Rankings** section (while this row is selected), extract:
54
+ - ARP → This is the **Initial ARP**
55
+
56
+ d. Then again the table under the **History** section, click the **first row (most recent date)** in the History section:
57
+ - From the **History section**, extract:
58
+ - Top 3 Rank → This is the **Current Rank (SOV) in our table**
59
+ - Coverage → This is the **Current Coverage**
60
+ - Then look for the **Your Rankings** section (while this row is selected), extract:
61
+ - ARP → This is the **Current Scan ARP**
62
+
63
+ 6. Go back to the Keywords list and repeat Step 5 for the second keyword in the list.
64
+
65
+
66
+ Please provide:
67
+
68
+ - The complete keyword performance data as a 1 table.
69
+ - Another new table that Return all of information from **Part 2: Keyword Ranking History Analysis Table** as a second table with the following columns:
70
+ - Keyword
71
+ - Initial ARP
72
+ - Initial Top 3 Rank (SOV)
73
+ - Initial Coverage
74
+ - Current Scan ARP
75
+ - Current Rank (SOV)
76
+ - Current Coverage
77
+ At the bottom of the table, compute and include a final row labeled "Average" showing the average of all numeric columns (excluding the "Keyword" column).
78
+
79
+ """
80
+
81
+
82
+ # Function to generate PDF-like report from task results
83
+ def generate_pdf_report(business_name, history):
84
+ """
85
+ Generate HTML for a PDF-like report based on the agent's history data
86
+ """
87
+ # Extract relevant information from history
88
+ final_result = history.final_result() or {}
89
+ screenshots = []
90
+ keyword_data = []
91
+ ranking_data = []
92
+ performance_data = []
93
+
94
+ # Process agent history to extract information
95
+ try:
96
+ # The history object is itself iterable
97
+ for item in history:
98
+ try:
99
+ # Extract screenshot if available
100
+ if hasattr(item, "state") and hasattr(item.state, "screenshot"):
101
+ if item.state.screenshot and isinstance(item.state.screenshot, str) and len(item.state.screenshot) > 100:
102
+ screenshots.append(item.state.screenshot)
103
+
104
+ # Extract data from actions
105
+ if hasattr(item, "output") and item.output:
106
+ for action in item.output.action:
107
+ if hasattr(action, "thought"):
108
+ thought = action.thought.lower() if action.thought else ""
109
+
110
+ # Look for keyword data in thoughts
111
+ if "keyword" in thought and ("performance" in thought or "score" in thought):
112
+ keyword_data.append(action.thought)
113
+ # Check if action contains ranking data
114
+ elif "ranking" in thought or "rank" in thought:
115
+ ranking_data.append(action.thought)
116
+ # Check if action contains performance data
117
+ elif "performance" in thought and "score" in thought:
118
+ performance_data.append(action.thought)
119
+
120
+ # Check for extracted data in observe action results
121
+ if hasattr(action, "result") and action.result:
122
+ if isinstance(action.result, str):
123
+ result = action.result.lower()
124
+ if "keyword" in result or "performance" in result:
125
+ if action.result not in keyword_data and len(action.result.strip()) > 5:
126
+ keyword_data.append(action.result)
127
+ if "ranking" in result or "rank" in result:
128
+ if action.result not in ranking_data and len(action.result.strip()) > 5:
129
+ ranking_data.append(action.result)
130
+ except Exception as e:
131
+ logger.error(f"Error processing history item: {e}")
132
+ continue
133
+ except Exception as e:
134
+ logger.error(f"Error iterating through history: {e}")
135
+
136
+ # Generate HTML for PDF-like report
137
+ html = f"""
138
+ <div style="font-family: Arial, sans-serif; max-width: 90%; margin: 0 auto; padding: 20px; border: 1px solid #e0e0e0; box-shadow: 0 0 10px rgba(0,0,0,0.1);">
139
+ <div style="text-align: center; border-bottom: 2px solid #2c3e50; padding-bottom: 10px; margin-bottom: 20px;">
140
+ <h1 style="color: #2c3e50;">Vayner Client Research Report</h1>
141
+ <h2 style="color: #3498db;">{business_name}</h2>
142
+ <p style="color: #7f8c8d;">Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
143
+ </div>
144
+
145
+ <div style="margin-bottom: 30px;">
146
+ <h3 style="color: #2c3e50; border-bottom: 1px solid #e0e0e0; padding-bottom: 5px;">Executive Summary</h3>
147
+ <p>This report contains research data for {business_name} extracted from Vayner Commerce platform.
148
+ We analyzed keyword performance data and geographic rankings.</p>
149
+ </div>
150
+ """
151
+
152
+ # If no specific data is found, try to extract from all output
153
+ if not keyword_data and not ranking_data and not performance_data:
154
+ try:
155
+ all_text = []
156
+ for item in history:
157
+ if hasattr(item, "output") and item.output:
158
+ for action in item.output.action:
159
+ if hasattr(action, "thought") and action.thought:
160
+ all_text.append(action.thought)
161
+ if hasattr(action, "result") and action.result:
162
+ all_text.append(action.result)
163
+
164
+ # Look for sections in the text
165
+ for text in all_text:
166
+ if "keyword" in text.lower() or "score" in text.lower() or "performance" in text.lower():
167
+ keyword_data.append(text)
168
+ if "ranking" in text.lower() or "rank" in text.lower():
169
+ ranking_data.append(text)
170
+ except Exception as e:
171
+ logger.error(f"Error extracting all text: {e}")
172
+
173
+ # Add performance data section
174
+ if performance_data or keyword_data:
175
+ html += """
176
+ <div style="margin-bottom: 30px;">
177
+ <h3 style="color: #2c3e50; border-bottom: 1px solid #e0e0e0; padding-bottom: 5px;">Keyword Performance Data</h3>
178
+ """
179
+
180
+ # Try to parse data into a table format
181
+ table_data = []
182
+ try:
183
+ combined_data = performance_data + keyword_data
184
+ for data in combined_data:
185
+ lines = data.split("\n")
186
+ for line in lines:
187
+ if ":" in line:
188
+ parts = line.split(":", 1)
189
+ if len(parts) == 2:
190
+ keyword, value = parts
191
+ table_data.append((keyword.strip(), value.strip()))
192
+ elif "-" in line and not line.strip().startswith("-"):
193
+ parts = line.split("-", 1)
194
+ if len(parts) == 2:
195
+ keyword, value = parts
196
+ table_data.append((keyword.strip(), value.strip()))
197
+
198
+ if table_data:
199
+ html += """
200
+ <table style="width: 100%; border-collapse: collapse;">
201
+ <thead>
202
+ <tr style="background-color: #f2f2f2;">
203
+ <th style="padding: 10px; border: 1px solid #e0e0e0; text-align: left;">Keyword</th>
204
+ <th style="padding: 10px; border: 1px solid #e0e0e0; text-align: left;">Performance/Score</th>
205
+ </tr>
206
+ </thead>
207
+ <tbody>
208
+ """
209
+
210
+ for keyword, value in table_data:
211
+ html += f"""
212
+ <tr>
213
+ <td style="padding: 10px; border: 1px solid #e0e0e0;">{keyword}</td>
214
+ <td style="padding: 10px; border: 1px solid #e0e0e0;">{value}</td>
215
+ </tr>
216
+ """
217
+
218
+ html += """
219
+ </tbody>
220
+ </table>
221
+ """
222
+ else:
223
+ # Display raw data if table parsing failed
224
+ for data in combined_data:
225
+ html += f"""
226
+ <div style="margin-bottom: 15px; padding: 10px; background-color: #f9f9f9; border: 1px solid #e0e0e0;">
227
+ <pre style="margin: 0; white-space: pre-wrap;">{data}</pre>
228
+ </div>
229
+ """
230
+ except Exception as e:
231
+ logger.error(f"Error formatting table data: {e}")
232
+ # Fallback to raw display
233
+ for data in performance_data + keyword_data:
234
+ html += f"""
235
+ <div style="margin-bottom: 15px; padding: 10px; background-color: #f9f9f9; border: 1px solid #e0e0e0;">
236
+ <pre style="margin: 0; white-space: pre-wrap;">{data}</pre>
237
+ </div>
238
+ """
239
+
240
+ html += """
241
+ </div>
242
+ """
243
+
244
+ # Add rankings data section
245
+ if ranking_data:
246
+ html += """
247
+ <div style="margin-bottom: 30px;">
248
+ <h3 style="color: #2c3e50; border-bottom: 1px solid #e0e0e0; padding-bottom: 5px;">Geographic Rankings</h3>
249
+ """
250
+
251
+ for data in ranking_data:
252
+ html += f"""
253
+ <div style="margin-bottom: 15px; padding: 10px; background-color: #f9f9f9; border: 1px solid #e0e0e0;">
254
+ <pre style="margin: 0; white-space: pre-wrap;">{data}</pre>
255
+ </div>
256
+ """
257
+
258
+ html += """
259
+ </div>
260
+ """
261
+
262
+ # Add screenshots section
263
+ if screenshots:
264
+ html += """
265
+ <div style="margin-bottom: 30px;">
266
+ <h3 style="color: #2c3e50; border-bottom: 1px solid #e0e0e0; padding-bottom: 5px;">Map Visualizations</h3>
267
+ <div style="display: flex; flex-wrap: wrap; gap: 15px; justify-content: center;">
268
+ """
269
+
270
+ for idx, screenshot in enumerate(screenshots):
271
+ if isinstance(screenshot, str) and len(screenshot) > 100:
272
+ html += f"""
273
+ <div style="margin-bottom: 15px; text-align: center;">
274
+ <img src="data:image/jpeg;base64,{screenshot}" alt="Map {idx+1}" style="max-width: 100%; border: 1px solid #e0e0e0; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
275
+ <p style="margin-top: 5px; font-style: italic; color: #7f8c8d;">Map Visualization {idx+1}</p>
276
+ </div>
277
+ """
278
+
279
+ html += """
280
+ </div>
281
+ </div>
282
+ """
283
+
284
+ # If no data was found, show a message
285
+ if not keyword_data and not performance_data and not ranking_data and not screenshots:
286
+ html += """
287
+ <div style="margin-bottom: 30px; text-align: center; padding: 20px; background-color: #f8f9fa; border-radius: 5px;">
288
+ <h3 style="color: #e74c3c;">No data extracted</h3>
289
+ <p>The agent was unable to extract specific data for this report. Please check the chat logs for more details on what was found.</p>
290
+ </div>
291
+ """
292
+
293
+ # Add footer
294
+ html += """
295
+ <div style="border-top: 1px solid #e0e0e0; padding-top: 15px; text-align: center; font-size: 12px; color: #7f8c8d;">
296
+ <p>Generated by Vayner Client Research Agent | Browser-Use WebUI</p>
297
+ </div>
298
+ </div>
299
+ """
300
+
301
+ return html
302
+
303
+ # Function to generate live PDF-like report updated during the task
304
+ def generate_live_report(business_name, business_info, keyword_data, ranking_data, screenshots, keyword_table_rows=None, final_result=None):
305
+ """
306
+ Generate HTML for a live-updating PDF-like report based on data collected so far
307
+ Only show the first three pages: cover, second page, and keyword table with final result.
308
+ """
309
+ if keyword_table_rows is None:
310
+ keyword_table_rows = []
311
+ # Cover page (black background, business name, VaynerCommerce logo)
312
+ html = f'''
313
+ <div style="width:100%; min-height:400px; background:#000; color:#fff; display:flex; flex-direction:column; align-items:center; justify-content:center; padding:60px 0 40px 0;">
314
+ <div style="width:70%; max-width:500px; margin-bottom:30px;">
315
+ <div style="text-align:center; margin-bottom:20px;">
316
+ <svg width="120" height="80" viewBox="0 0 120 80" fill="none" xmlns="http://www.w3.org/2000/svg">
317
+ <path d="M60 15C70 15 80 25 90 30C100 35 110 30 115 25C110 40 100 50 80 50C60 50 40 40 30 30C40 35 50 15 60 15Z" fill="white"/>
318
+ <path d="M70 15C65 20 60 18 55 15" stroke="white" stroke-width="2"/>
319
+ <path d="M75 12C70 17 65 15 60 12" stroke="white" stroke-width="2"/>
320
+ </svg>
321
+ </div>
322
+ <div style="font-size:2.7rem; font-weight:600; letter-spacing:2px; text-align:center; line-height:1.1; text-transform:uppercase; font-family: 'Montserrat', Arial, sans-serif;">
323
+ {business_name}
324
+ </div>
325
+ <div style="font-size:1.2rem; text-align:center; letter-spacing:1px; margin-top:5px; text-transform:uppercase; font-family: 'Montserrat', Arial, sans-serif;">
326
+
327
+ </div>
328
+ </div>
329
+
330
+ <div style="font-size:2.5rem; font-weight:600; margin: 30px 0; text-align:center;">X</div>
331
+
332
+ <div style="width:70%; max-width:500px;">
333
+ <div style="font-size:2.1rem; font-weight:700; letter-spacing:2px; text-align:center; text-transform:uppercase; font-family: 'Montserrat', Arial, sans-serif;">
334
+ <div style="display:inline-block; margin-right:10px; vertical-align:middle;">◆</div> VAYNERCOMMERCE
335
+ </div>
336
+ </div>
337
+ </div>
338
+ '''
339
+ # Second page (logo, business name, service, date, image)
340
+ html += f'''
341
+ <div style="width:100%; min-height:400px; background:#fff; color:#222; display:flex; flex-direction:row; align-items:stretch; padding:0;">
342
+ <div style="flex:1; display:flex; flex-direction:column; align-items:center; justify-content:center; padding:40px 20px; border-right:1px solid #eee;">
343
+ <div style="width:240px; margin-bottom:20px;">
344
+ <svg viewBox="0 0 240 140" width="240" height="140" xmlns="http://www.w3.org/2000/svg">
345
+ <path d="M120 30C140 30 160 50 180 60C200 70 220 60 230 50C220 80 200 100 160 100C120 100 80 80 60 60C80 70 100 30 120 30Z" fill="#4A8FBA"/>
346
+ <path d="M140 30C130 40 120 36 110 30" stroke="#4A8FBA" stroke-width="2"/>
347
+ <path d="M150 24C140 34 130 30 120 24" stroke="#4A8FBA" stroke-width="2"/>
348
+ <ellipse cx="140" cy="70" rx="100" ry="15" fill="#E3B151" opacity="0.3"/>
349
+ </svg>
350
+ </div>
351
+ <div style="font-size:2.2rem; font-weight:600; color:#4A8FBA; margin-bottom:10px; font-family: 'Montserrat', Arial, sans-serif; text-transform:uppercase; letter-spacing:1px; text-align:center; line-height:1.1;">
352
+ {business_name}<br>
353
+ <span style="font-size:1.1rem; color:#666; text-transform:uppercase; letter-spacing:1px;">BEHAVIORAL HEALTH</span>
354
+ </div>
355
+ <div style="font-size:1.1rem; color:#666; margin:20px 0; text-align:center;">SEO Services</div>
356
+ <div style="font-size:1rem; color:#444; text-align:center;">Weeks of <span style="font-weight:600;">04/07/25 - 04/21/25</span></div>
357
+ <div style="margin-top:60px; font-size:0.9rem; color:#bbb; font-family: 'Montserrat', Arial, sans-serif;">
358
+ <span style="display:inline-block; margin-right:5px; vertical-align:middle;">◆</span> VAYNERCOMMERCE
359
+ </div>
360
+ </div>
361
+ <div style="flex:1; min-height:400px; background-image:url('https://images.unsplash.com/photo-1577563908411-5077b6dc7624?auto=format&fit=crop&w=700&q=80'); background-size:cover; background-position:center;">
362
+ </div>
363
+ </div>
364
+ '''
365
+ # Third page: Final Result and Keyword Table
366
+ html += f'''
367
+ <div style="width:100%; min-height:600px; background:#000; color:#fff; display:flex; flex-direction:column; align-items:center; justify-content:flex-start; padding:40px 0 40px 0; border-bottom:2px solid #222;">
368
+ <div style="font-size:1.8rem; font-weight:600; color:#fff; margin-bottom:10px; font-family: 'Montserrat', Arial, sans-serif;">Final Research Results</div>
369
+ <div style="width:90%; max-width:900px; margin-bottom:40px;">
370
+ '''
371
+
372
+ # Display Final Result if available
373
+ if final_result:
374
+ # Determine if final_result is likely a table
375
+ is_table = False
376
+ if isinstance(final_result, str):
377
+ lines = final_result.strip().split('\n')
378
+ if any('|' in line for line in lines) or any('keyword' in line.lower() and 'performance' in line.lower() for line in lines):
379
+ is_table = True
380
+
381
+ if is_table:
382
+ # Format as a table
383
+ try:
384
+ html += '<div style="width:100%; overflow-x:auto; margin:20px 0; border-radius:4px; box-shadow:0 2px 10px rgba(0,0,0,0.1);">'
385
+
386
+ # Split the table data
387
+ lines = [line.strip() for line in final_result.split('\n') if line.strip()]
388
+
389
+ # Find the header row
390
+ header_row_index = -1
391
+ for i, line in enumerate(lines):
392
+ if ('keyword' in line.lower() and 'performance' in line.lower()) or ('keyword' in line.lower() and 'sov' in line.lower()):
393
+ header_row_index = i
394
+ break
395
+
396
+ if header_row_index != -1:
397
+ # Create an HTML table
398
+ html += '<table style="width:100%; border-collapse:collapse; font-family:Arial, sans-serif; background:#000; color:#fff;">'
399
+
400
+ # Format the header row
401
+ header = lines[header_row_index]
402
+ header_cells = [cell.strip() for cell in header.strip('|').split('|')]
403
+ html += '<thead><tr style="background-color:#222; color:#fff;">'
404
+ for cell in header_cells:
405
+ html += f'<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #444;">{cell}</th>'
406
+ html += '</tr></thead><tbody>'
407
+
408
+ # Skip the separator row if it exists
409
+ data_start = header_row_index + 2 if header_row_index + 1 < len(lines) and '---' in lines[header_row_index + 1] else header_row_index + 1
410
+
411
+ # Format the data rows
412
+ for i in range(data_start, len(lines)):
413
+ row = lines[i]
414
+ if '|' in row:
415
+ cells = [cell.strip() for cell in row.strip('|').split('|')]
416
+ bg_color = '#111' if i % 2 == 0 else '#181818'
417
+ html += f'<tr style="background-color:{bg_color}; color:#fff;">'
418
+ for cell in cells:
419
+ html += f'<td style="padding:10px 15px; border-bottom:1px solid #333;">{cell}</td>'
420
+ html += '</tr>'
421
+
422
+ html += '</tbody></table>'
423
+ else:
424
+ # If no proper header found, just display the text in a pre tag
425
+ html += f'<pre style="width:100%; background-color:#111; color:#fff; padding:15px; border-radius:4px; white-space:pre-wrap; overflow-x:auto;">{final_result}</pre>'
426
+
427
+ html += '</div>'
428
+ except Exception:
429
+ # If parsing fails, just display the raw text
430
+ html += f'<pre style="width:100%; background-color:#111; color:#fff; padding:15px; border-radius:4px; white-space:pre-wrap; overflow-x:auto;">{final_result}</pre>'
431
+ else:
432
+ # Format as regular text
433
+ html += f'<div style="width:100%; background-color:#111; color:#fff; padding:20px; border-radius:4px; border-left:4px solid #4A8FBA; margin:20px 0;">'
434
+
435
+ if isinstance(final_result, str):
436
+ # Format the text with proper paragraphs
437
+ paragraphs = final_result.split('\n\n')
438
+ for paragraph in paragraphs:
439
+ if paragraph.strip():
440
+ paragraph_html = paragraph.replace("\n", "<br>")
441
+ html += f'<p style="margin-bottom:15px; line-height:1.5;">{paragraph_html}</p>'
442
+ elif isinstance(final_result, list):
443
+ # Handle list of items
444
+ html += '<ul style="margin-left:20px; line-height:1.5;">'
445
+ for item in final_result:
446
+ html += f'<li style="margin-bottom:8px;">{item}</li>'
447
+ html += '</ul>'
448
+ elif isinstance(final_result, dict):
449
+ # Handle dictionary
450
+ html += '<div style="line-height:1.5;">'
451
+ for key, value in final_result.items():
452
+ html += f'<div style="margin-bottom:10px;"><strong>{key}:</strong> {value}</div>'
453
+ html += '</div>'
454
+ else:
455
+ # Generic string representation
456
+ html += f'<p style="line-height:1.5;">{str(final_result)}</p>'
457
+
458
+ html += '</div>'
459
+ else:
460
+ html += '''
461
+ <div style="width:90%; background-color:#111; color:#fff; padding:20px; border-radius:4px; text-align:center; margin:20px 0;">
462
+ <p style="color:#bbb; font-style:italic;">Results will appear here when the task is completed.</p>
463
+ </div>
464
+ '''
465
+
466
+ # Additional keyword table display
467
+ if keyword_table_rows:
468
+ html += '''
469
+ <div style="width:90%; max-width:800px; margin-top:30px;">
470
+ <div style="font-size:1.4rem; font-weight:600; color:#fff; margin-bottom:15px; font-family: 'Montserrat', Arial, sans-serif;">Keyword Performance Summary</div>
471
+ <table style="width:100%; border-collapse:collapse; background:#000; color:#fff;">
472
+ <thead>
473
+ <tr style="background-color:#222; color:#fff;">
474
+ <th style="padding:10px; border:1px solid #333; text-align:left;">Keyword</th>
475
+ <th style="padding:10px; border:1px solid #333; text-align:left;">Performance</th>
476
+ <th style="padding:10px; border:1px solid #333; text-align:left;">SOV</th>
477
+ </tr>
478
+ </thead>
479
+ <tbody>
480
+ '''
481
+
482
+ for row in keyword_table_rows:
483
+ html += f'''<tr style="background-color:#111; color:#fff;">
484
+ <td style="padding:10px; border:1px solid #333;">{row['keyword']}</td>
485
+ <td style="padding:10px; border:1px solid #333;">{row['performance']}</td>
486
+ <td style="padding:10px; border:1px solid #333;">{row['sov']}</td>
487
+ </tr>'''
488
+
489
+ html += '''
490
+ </tbody>
491
+ </table>
492
+ </div>
493
+ '''
494
+
495
+ html += '</div></div>'
496
+ return html
497
+
498
+ async def run_vayner_research(
499
+ webui_manager: WebuiManager,
500
+ components: Dict[gr.components.Component, Any],
501
+ business_name: str
502
+ ) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
503
+ """
504
+ Runs a Vayner client research task and yields UI updates.
505
+ """
506
+ # Get all required UI components
507
+ run_button_comp = webui_manager.get_component_by_id("vayner_client_research.run_button")
508
+ stop_button_comp = webui_manager.get_component_by_id("vayner_client_research.stop_button")
509
+ chatbot_comp = webui_manager.get_component_by_id("vayner_client_research.chatbot")
510
+ browser_view_comp = webui_manager.get_component_by_id("vayner_client_research.browser_view")
511
+ pdf_report_comp = webui_manager.get_component_by_id("vayner_client_research.pdf_report")
512
+
513
+ # Create the task using the template with credentials
514
+ task = VAYNER_CLIENT_TEMPLATE.format(
515
+ business_name=business_name,
516
+ vayner_username=VAYNER_USERNAME,
517
+ vayner_password=VAYNER_PASSWORD
518
+ )
519
+
520
+ # Initialize chat history if needed
521
+ if not hasattr(webui_manager, "vayner_chat_history"):
522
+ webui_manager.vayner_chat_history = []
523
+
524
+ # Show the business being researched
525
+ webui_manager.vayner_chat_history.append(
526
+ {"role": "user", "content": f"Research business: {business_name}"}
527
+ )
528
+ webui_manager.vayner_chat_history.append(
529
+ {"role": "assistant", "content": f"Starting research for {business_name}..."}
530
+ )
531
+
532
+ yield {
533
+ k: v for k, v in {
534
+ chatbot_comp: gr.update(value=webui_manager.vayner_chat_history),
535
+ run_button_comp: gr.update(value="⏳ Researching...", interactive=False),
536
+ stop_button_comp: gr.update(interactive=True),
537
+ pdf_report_comp: gr.update(visible=False)
538
+ }.items() if k is not None
539
+ }
540
+
541
+ # Get settings from agent settings
542
+ def get_setting(name, default=None):
543
+ comp = webui_manager.get_component_by_id(f"agent_settings.{name}")
544
+ return components.get(comp, default) if comp else default
545
+
546
+ # LLM Settings
547
+ llm_provider_name = get_setting("llm_provider", "openai")
548
+ llm_model_name = get_setting("llm_model_name", "gpt-4o")
549
+ llm_temperature = get_setting("llm_temperature", 0.6)
550
+ use_vision = True # Always need vision for this task
551
+ llm_base_url = get_setting("llm_base_url", "")
552
+ llm_api_key = get_setting("llm_api_key", "")
553
+ if not llm_api_key:
554
+ llm_api_key = os.getenv("OPENAI_API_KEY", "")
555
+
556
+ # Browser Settings
557
+ def get_browser_setting(key, default=None):
558
+ comp = webui_manager.get_component_by_id(f"browser_settings.{key}")
559
+ return components.get(comp, default) if comp else default
560
+
561
+ headless = True # Force headless mode for this agent
562
+ disable_security = get_browser_setting("disable_security", False)
563
+ window_w = int(get_browser_setting("window_w", 1920))
564
+ window_h = int(get_browser_setting("window_h", 1080))
565
+ save_recording_path = get_browser_setting("save_recording_path") or "./tmp/vayner_recordings"
566
+ save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
567
+
568
+ # Make sure paths exist
569
+ os.makedirs(save_recording_path, exist_ok=True)
570
+ if save_download_path:
571
+ os.makedirs(save_download_path, exist_ok=True)
572
+
573
+ # Stream settings for view
574
+ stream_vw = 80
575
+ stream_vh = int(80 * window_h // window_w)
576
+
577
+ # Get LLM for agent
578
+ main_llm = llm_provider.get_llm_model(
579
+ provider="openai", # Force OpenAI for vision capabilities
580
+ model_name=str(llm_model_name) if llm_model_name else "gpt-4o",
581
+ temperature=float(llm_temperature),
582
+ base_url=str(llm_base_url) if llm_base_url else None,
583
+ api_key=str(llm_api_key) if llm_api_key else None,
584
+ )
585
+ if main_llm is None:
586
+ raise ValueError("Failed to initialize LLM. Please check your OpenAI API key and model settings in Agent Settings.")
587
+
588
+ # Step and done callbacks
589
+ async def step_callback(state, output, step_num):
590
+ step_num -= 1
591
+ logger.info(f"Step {step_num} completed.")
592
+
593
+ # Process screenshot if available (for PDF only, not chat)
594
+ screenshot_data = getattr(state, "screenshot", None)
595
+ if screenshot_data:
596
+ try:
597
+ if isinstance(screenshot_data, str) and len(screenshot_data) > 100:
598
+ # Store screenshot for report
599
+ if not hasattr(webui_manager, "vayner_screenshots"):
600
+ webui_manager.vayner_screenshots = []
601
+ webui_manager.vayner_screenshots.append(screenshot_data)
602
+ except Exception as e:
603
+ logger.error(f"Error processing screenshot: {e}")
604
+
605
+ # Extract information for real-time PDF report
606
+ try:
607
+ if not hasattr(webui_manager, "vayner_business_info"):
608
+ webui_manager.vayner_business_info = []
609
+ if not hasattr(webui_manager, "vayner_keyword_data"):
610
+ webui_manager.vayner_keyword_data = []
611
+ if not hasattr(webui_manager, "vayner_ranking_data"):
612
+ webui_manager.vayner_ranking_data = []
613
+
614
+ # Extract business info, keywords, and rankings from this step
615
+ for action in output.action:
616
+ if hasattr(action, "thought") and action.thought:
617
+ thought = action.thought.lower()
618
+
619
+ # Extract business info
620
+ if "business" in thought and any(x in thought for x in ["name", "address", "info", "details", "about"]):
621
+ if action.thought not in webui_manager.vayner_business_info:
622
+ webui_manager.vayner_business_info.append(action.thought)
623
+
624
+ # Extract keyword data
625
+ if "keyword" in thought and any(x in thought for x in ["performance", "score", "data"]):
626
+ if action.thought not in webui_manager.vayner_keyword_data:
627
+ webui_manager.vayner_keyword_data.append(action.thought)
628
+
629
+ # Extract ranking data
630
+ if any(x in thought for x in ["ranking", "rank", "geography", "location"]):
631
+ if action.thought not in webui_manager.vayner_ranking_data:
632
+ webui_manager.vayner_ranking_data.append(action.thought)
633
+
634
+ # Also check action results for structured data
635
+ if hasattr(action, "result") and action.result and isinstance(action.result, str):
636
+ result = action.result.lower()
637
+
638
+ # Extract structured data from results
639
+ if "business" in result and len(action.result) > 10:
640
+ if action.result not in webui_manager.vayner_business_info:
641
+ webui_manager.vayner_business_info.append(action.result)
642
+
643
+ if "keyword" in result and len(action.result) > 10:
644
+ if action.result not in webui_manager.vayner_keyword_data:
645
+ webui_manager.vayner_keyword_data.append(action.result)
646
+
647
+ if "rank" in result and len(action.result) > 10:
648
+ if action.result not in webui_manager.vayner_ranking_data:
649
+ webui_manager.vayner_ranking_data.append(action.result)
650
+
651
+ # Extract current URL for page context
652
+ if hasattr(state, "url") and state.url:
653
+ page_url = state.url
654
+ if "business" in page_url.lower() and not any(page_url in info for info in webui_manager.vayner_business_info):
655
+ webui_manager.vayner_business_info.append(f"Page URL: {page_url}")
656
+
657
+ # Extract visible text from the page if available
658
+ if hasattr(state, "text_content") and state.text_content:
659
+ # Extract table-like data or lists that might contain keywords or rankings
660
+ if "keyword" in state.text_content.lower() and len(state.text_content) > 20:
661
+ if state.text_content not in webui_manager.vayner_keyword_data:
662
+ webui_manager.vayner_keyword_data.append(state.text_content)
663
+
664
+ # Extract keyword table data
665
+ if not hasattr(webui_manager, "vayner_keyword_table_rows"):
666
+ webui_manager.vayner_keyword_table_rows = []
667
+ for action in output.action:
668
+ # Try to extract keyword, performance, SOV from action.thought or action.result
669
+ for field in [getattr(action, "thought", None), getattr(action, "result", None)]:
670
+ if field and isinstance(field, str):
671
+ # Simple regex/parse for lines like: "keyword: X, performance: Y, sov: Z"
672
+ import re
673
+ match = re.search(r"keyword[:\s]+([\w\- ]+)[,;\s]+performance[:\s]+([\w\-\.]+)[,;\s]+sov[:\s]+([\w\-\.]+)", field, re.IGNORECASE)
674
+ if match:
675
+ keyword = match.group(1).strip()
676
+ performance = match.group(2).strip()
677
+ sov = match.group(3).strip()
678
+ # Only add if not already present
679
+ if not any(row["keyword"].lower() == keyword.lower() for row in webui_manager.vayner_keyword_table_rows):
680
+ webui_manager.vayner_keyword_table_rows.append({
681
+ "keyword": keyword,
682
+ "performance": performance,
683
+ "sov": sov
684
+ })
685
+
686
+ # Update the PDF report with the latest data
687
+ business_name = getattr(webui_manager, "vayner_current_business", "Unknown Business")
688
+ webui_manager.vayner_pdf_report = generate_live_report(
689
+ business_name,
690
+ webui_manager.vayner_business_info,
691
+ webui_manager.vayner_keyword_data,
692
+ webui_manager.vayner_ranking_data,
693
+ webui_manager.vayner_screenshots,
694
+ webui_manager.vayner_keyword_table_rows,
695
+ history.final_result()
696
+ )
697
+
698
+ # Get the PDF report component and update it in real-time
699
+ pdf_report_comp = webui_manager.get_component_by_id("vayner_client_research.pdf_report")
700
+ if pdf_report_comp and hasattr(webui_manager, "update_queue"):
701
+ webui_manager.update_queue.append({
702
+ pdf_report_comp: gr.update(
703
+ value=webui_manager.vayner_pdf_report,
704
+ visible=True
705
+ )
706
+ })
707
+
708
+ except Exception as e:
709
+ logger.error(f"Error updating PDF report: {e}")
710
+
711
+ # Format logs similar to the screenshot (NO screenshots in chat)
712
+ try:
713
+ log_html = f'''
714
+ <div style="margin: 5px 0; padding: 10px; background-color: #f8f9fa; border-radius: 4px; border-left: 4px solid #3498db; font-family: 'Courier New', monospace;">
715
+ <div style="display: flex; align-items: center; margin-bottom: 5px;">
716
+ <span style="background-color: #e0f0ff; color: #3498db; font-weight: bold; padding: 2px 8px; border-radius: 12px; font-size: 12px; margin-right: 10px;">agent</span>
717
+ <span style="color: #555; font-size: 12px;">{datetime.now().strftime('%H:%M:%S')}</span>
718
+ </div>
719
+ '''
720
+
721
+ # Extract actions
722
+ actions_text = []
723
+ has_content = False
724
+
725
+ # Get full json output
726
+ action_dump = [action.model_dump(exclude_none=True) for action in output.action]
727
+ state_dump = output.current_state.model_dump(exclude_none=True)
728
+
729
+ # Step info
730
+ log_html += f'<div style="font-weight: bold; margin-bottom: 5px; color: #333;">🔶 Step {step_num}</div>'
731
+
732
+ # Add current URL if available
733
+ if hasattr(state, "url") and state.url:
734
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #888;">URL:</span> {state.url}</div>'
735
+
736
+ # Add actions
737
+ for action in action_dump:
738
+ has_content = True
739
+
740
+ if 'action_type' in action:
741
+ action_type = action['action_type'].upper()
742
+
743
+ # Icon based on action type
744
+ if action_type == "CLICK":
745
+ icon = "🖱️"
746
+ elif action_type == "TYPE":
747
+ icon = "⌨️"
748
+ elif action_type == "NAVIGATE":
749
+ icon = "🔗"
750
+ elif action_type == "EXTRACT":
751
+ icon = "📋"
752
+ elif action_type == "WAIT_FOR_ELEMENT":
753
+ icon = "⏳"
754
+ else:
755
+ icon = "⚙️"
756
+
757
+ # Format based on action type
758
+ if action_type == "CLICK" and 'selector' in action:
759
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #e67e22;">{icon} CLICK:</span> <code>{action["selector"]}</code></div>'
760
+ elif action_type == "TYPE" and 'text' in action:
761
+ text = action['text']
762
+ if len(text) > 50:
763
+ text = text[:47] + "..."
764
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #2ecc71;">{icon} TYPE:</span> <code>"{text}"</code></div>'
765
+ elif action_type == "NAVIGATE" and 'url' in action:
766
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #3498db;">{icon} NAVIGATE:</span> <code>{action["url"]}</code></div>'
767
+ elif action_type == "EXTRACT":
768
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #9b59b6;">{icon} EXTRACT DATA</span></div>'
769
+ elif action_type == "WAIT_FOR_ELEMENT" and 'selector' in action:
770
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #f39c12;">{icon} WAIT FOR:</span> <code>{action["selector"]}</code></div>'
771
+ else:
772
+ details = ", ".join([f"{k}={v}" for k, v in action.items() if k != 'action_type' and k != 'thought'])
773
+ log_html += f'<div style="margin-bottom: 5px;"><span style="color: #34495e;">{icon} {action_type}:</span> <code>{details}</code></div>'
774
+
775
+ # Include thoughts with thinking emoji
776
+ if 'thought' in action and action['thought']:
777
+ thought = action['thought'].strip()
778
+ if len(thought) > 150:
779
+ thought = thought[:147] + "..."
780
+ log_html += f'<div style="margin: 5px 0 10px 15px; color: #7f8c8d; font-style: italic;">💭 {thought}</div>'
781
+
782
+ # Close log div
783
+ log_html += '</div>'
784
+
785
+ # If no actions found
786
+ if not has_content:
787
+ log_html = f'''
788
+ <div style="margin: 5px 0; padding: 10px; background-color: #f8f9fa; border-radius: 4px; border-left: 4px solid #e74c3c; font-family: 'Courier New', monospace;">
789
+ <div style="display: flex; align-items: center; margin-bottom: 5px;">
790
+ <span style="background-color: #ffe0e0; color: #e74c3c; font-weight: bold; padding: 2px 8px; border-radius: 12px; font-size: 12px; margin-right: 10px;">agent</span>
791
+ <span style="color: #555; font-size: 12px;">{datetime.now().strftime('%H:%M:%S')}</span>
792
+ </div>
793
+ <div style="font-weight: bold; margin-bottom: 5px; color: #333;">⚠️ Step {step_num} - No actions recorded</div>
794
+ </div>
795
+ '''
796
+
797
+ except Exception as e:
798
+ logger.error(f"Error formatting step output: {e}")
799
+ log_html = f'''
800
+ <div style="margin: 5px 0; padding: 10px; background-color: #f8f9fa; border-radius: 4px; border-left: 4px solid #e74c3c; font-family: 'Courier New', monospace;">
801
+ <div style="display: flex; align-items: center; margin-bottom: 5px;">
802
+ <span style="background-color: #ffe0e0; color: #e74c3c; font-weight: bold; padding: 2px 8px; border-radius: 12px; font-size: 12px; margin-right: 10px;">error</span>
803
+ </div>
804
+ <div style="font-weight: bold; margin-bottom: 5px; color: #333;">⚠️ Error formatting Step {step_num}</div>
805
+ <div style="color: #e74c3c;">{str(e)}</div>
806
+ </div>
807
+ '''
808
+
809
+ # Add to chat history
810
+ webui_manager.vayner_chat_history.append(
811
+ {"role": "assistant", "content": log_html}
812
+ )
813
+
814
+ def done_callback(history):
815
+ logger.info(f"Vayner research task finished. Duration: {history.total_duration_seconds():.2f}s")
816
+
817
+ final_summary = "**Task Completed**\n"
818
+ final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
819
+
820
+ final_result = history.final_result()
821
+ if final_result:
822
+ final_summary += f"- Final Result: {final_result}\n"
823
+ # --- FIX: Parse final_result for keywords and update table ---
824
+ import re
825
+ if not hasattr(webui_manager, "vayner_keyword_table_rows"):
826
+ webui_manager.vayner_keyword_table_rows = []
827
+ # Accept both string and dict/list results
828
+ if isinstance(final_result, str):
829
+ # 1. Parse markdown/pipe table
830
+ lines = [line.strip() for line in final_result.splitlines() if line.strip()]
831
+ table_start = -1
832
+ for i, line in enumerate(lines):
833
+ if re.match(r"\|?\s*keyword\s*\|\s*performance\s*\|\s*sov\s*\|?", line, re.IGNORECASE):
834
+ table_start = i
835
+ break
836
+ if table_start != -1 and table_start + 2 < len(lines):
837
+ # Table header, separator, then data rows
838
+ for row in lines[table_start+2:]:
839
+ if not row.startswith("|"):
840
+ continue
841
+ cells = [c.strip() for c in row.strip("|").split("|")]
842
+ if len(cells) >= 3:
843
+ keyword, performance, sov = cells[:3]
844
+ if keyword and performance and sov:
845
+ if not any(row_item["keyword"].lower() == keyword.lower() for row_item in webui_manager.vayner_keyword_table_rows):
846
+ webui_manager.vayner_keyword_table_rows.append({
847
+ "keyword": keyword,
848
+ "performance": performance,
849
+ "sov": sov
850
+ })
851
+ # 2. Also parse lines like: "keyword: X, performance: Y, sov: Z"
852
+ for line in lines:
853
+ match = re.search(r"keyword[:\s]+([\w\- ]+)[,;\s]+performance[:\s]+([\w\-\.]+)[,;\s]+sov[:\s]+([\w\-\.]+)", line, re.IGNORECASE)
854
+ if match:
855
+ keyword = match.group(1).strip()
856
+ performance = match.group(2).strip()
857
+ sov = match.group(3).strip()
858
+ if not any(row_item["keyword"].lower() == keyword.lower() for row_item in webui_manager.vayner_keyword_table_rows):
859
+ webui_manager.vayner_keyword_table_rows.append({
860
+ "keyword": keyword,
861
+ "performance": performance,
862
+ "sov": sov
863
+ })
864
+ elif isinstance(final_result, list):
865
+ for item in final_result:
866
+ if isinstance(item, dict):
867
+ keyword = item.get("keyword")
868
+ performance = item.get("performance")
869
+ sov = item.get("sov")
870
+ if keyword and performance and sov:
871
+ if not any(row_item["keyword"].lower() == keyword.lower() for row_item in webui_manager.vayner_keyword_table_rows):
872
+ webui_manager.vayner_keyword_table_rows.append({
873
+ "keyword": keyword,
874
+ "performance": performance,
875
+ "sov": sov
876
+ })
877
+
878
+ errors = history.errors()
879
+ if errors and any(errors):
880
+ final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
881
+ else:
882
+ final_summary += "- Status: Success\n"
883
+
884
+ webui_manager.vayner_chat_history.append(
885
+ {"role": "assistant", "content": final_summary}
886
+ )
887
+
888
+ # Generate PDF report using the current live data collections
889
+ try:
890
+ business_name = getattr(webui_manager, "vayner_current_business", "Unknown Business")
891
+ webui_manager.vayner_pdf_report = generate_live_report(
892
+ business_name,
893
+ webui_manager.vayner_business_info,
894
+ webui_manager.vayner_keyword_data,
895
+ webui_manager.vayner_ranking_data,
896
+ webui_manager.vayner_screenshots,
897
+ webui_manager.vayner_keyword_table_rows,
898
+ final_result
899
+ )
900
+ except Exception as e:
901
+ logger.error(f"Error generating PDF report: {e}", exc_info=True)
902
+ webui_manager.vayner_pdf_report = f"<div class='error'>Error generating report: {str(e)}</div>"
903
+
904
+ # Initialize controller and browser
905
+ try:
906
+ if not webui_manager.vayner_controller:
907
+ webui_manager.vayner_controller = CustomController()
908
+
909
+ if not webui_manager.vayner_browser:
910
+ webui_manager.vayner_browser = CustomBrowser(
911
+ config=BrowserConfig(
912
+ headless=headless,
913
+ disable_security=disable_security,
914
+ browser_binary_path=None,
915
+ new_context_config=BrowserContextConfig(
916
+ window_width=window_w,
917
+ window_height=window_h,
918
+ )
919
+ )
920
+ )
921
+
922
+ if not webui_manager.vayner_browser_context:
923
+ context_config = BrowserContextConfig(
924
+ save_recording_path=save_recording_path,
925
+ save_downloads_path=save_download_path,
926
+ window_height=window_h,
927
+ window_width=window_w,
928
+ )
929
+ webui_manager.vayner_browser_context = (
930
+ await webui_manager.vayner_browser.new_context(config=context_config)
931
+ )
932
+
933
+ # Initialize agent
934
+ if not webui_manager.vayner_agent:
935
+ webui_manager.vayner_agent = BrowserUseAgent(
936
+ task=task,
937
+ llm=main_llm,
938
+ browser=webui_manager.vayner_browser,
939
+ browser_context=webui_manager.vayner_browser_context,
940
+ controller=webui_manager.vayner_controller,
941
+ register_new_step_callback=step_callback,
942
+ register_done_callback=done_callback,
943
+ use_vision=use_vision,
944
+ max_input_tokens=128000,
945
+ max_actions_per_step=10,
946
+ source="vayner_research",
947
+ )
948
+ else:
949
+ webui_manager.vayner_agent.add_new_task(task)
950
+
951
+ # Run the agent
952
+ agent_run_coro = webui_manager.vayner_agent.run(max_steps=50)
953
+ agent_task = asyncio.create_task(agent_run_coro)
954
+ webui_manager.vayner_current_task = agent_task
955
+
956
+ # Monitor the task and update UI
957
+ last_chat_len = len(webui_manager.vayner_chat_history)
958
+ while not agent_task.done():
959
+ # Update Chatbot if new messages arrived
960
+ if len(webui_manager.vayner_chat_history) > last_chat_len:
961
+ yield {
962
+ chatbot_comp: gr.update(value=webui_manager.vayner_chat_history)
963
+ }
964
+ last_chat_len = len(webui_manager.vayner_chat_history)
965
+
966
+ # Update Browser View
967
+ if webui_manager.vayner_browser_context:
968
+ try:
969
+ screenshot_b64 = await webui_manager.vayner_browser_context.take_screenshot()
970
+ if screenshot_b64:
971
+ html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh; border:1px solid #ccc;">'
972
+ yield {
973
+ browser_view_comp: gr.update(value=html_content, visible=True)
974
+ }
975
+ except Exception as e:
976
+ logger.debug(f"Failed to capture screenshot: {e}")
977
+
978
+ await asyncio.sleep(0.5) # Polling interval
979
+
980
+ # Wait for the task to complete
981
+ await agent_task
982
+
983
+ # Show PDF Report if generated
984
+ if hasattr(webui_manager, "vayner_pdf_report") and webui_manager.vayner_pdf_report:
985
+ yield {
986
+ run_button_comp: gr.update(value="▶️ Research Client", interactive=True),
987
+ stop_button_comp: gr.update(interactive=False),
988
+ chatbot_comp: gr.update(value=webui_manager.vayner_chat_history),
989
+ pdf_report_comp: gr.update(value=webui_manager.vayner_pdf_report, visible=True)
990
+ }
991
+ else:
992
+ # Update UI when complete without PDF report
993
+ yield {
994
+ run_button_comp: gr.update(value="▶️ Research Client", interactive=True),
995
+ stop_button_comp: gr.update(interactive=False),
996
+ chatbot_comp: gr.update(value=webui_manager.vayner_chat_history)
997
+ }
998
+
999
+ except Exception as e:
1000
+ logger.error(f"Error during Vayner research: {e}", exc_info=True)
1001
+ error_message = f"**Error during research:**\n```\n{str(e)}\n```"
1002
+ webui_manager.vayner_chat_history.append(
1003
+ {"role": "assistant", "content": error_message}
1004
+ )
1005
+
1006
+ yield {
1007
+ chatbot_comp: gr.update(value=webui_manager.vayner_chat_history),
1008
+ run_button_comp: gr.update(value="▶️ Research Client", interactive=True),
1009
+ stop_button_comp: gr.update(interactive=False),
1010
+ pdf_report_comp: gr.update(visible=False)
1011
+ }
1012
+
1013
+ gr.Error(f"Research task failed: {e}")
1014
+
1015
+ async def handle_submit(webui_manager: WebuiManager, business_name: str):
1016
+ """Handles click on the Research Client button."""
1017
+ if not business_name.strip():
1018
+ gr.Warning("Please enter a business name")
1019
+ yield {}
1020
+ else:
1021
+ # Store the current business name
1022
+ webui_manager.vayner_current_business = business_name.strip()
1023
+
1024
+ # Reset report data collections
1025
+ webui_manager.vayner_screenshots = []
1026
+ webui_manager.vayner_business_info = []
1027
+ webui_manager.vayner_keyword_data = []
1028
+ webui_manager.vayner_ranking_data = []
1029
+ webui_manager.vayner_keyword_table_rows = [] # Reset keyword table rows
1030
+
1031
+ # Initialize empty report (cover and second page only)
1032
+ webui_manager.vayner_pdf_report = generate_live_report(
1033
+ business_name.strip(),
1034
+ [], [], [], [], []
1035
+ )
1036
+
1037
+ # Get PDF report component
1038
+ pdf_report_comp = webui_manager.get_component_by_id("vayner_client_research.pdf_report")
1039
+
1040
+ # Show the cover/second page immediately
1041
+ yield {
1042
+ pdf_report_comp: gr.update(
1043
+ value=webui_manager.vayner_pdf_report,
1044
+ visible=True
1045
+ )
1046
+ }
1047
+
1048
+ # Initialize update queue
1049
+ webui_manager.update_queue = []
1050
+
1051
+ # Use async generator to stream updates
1052
+ components = {} # Will be populated by components in run_vayner_research
1053
+ async for update in run_vayner_research(webui_manager, components, business_name.strip()):
1054
+ # Include any queued PDF report updates
1055
+ while webui_manager.update_queue:
1056
+ pdf_updates = webui_manager.update_queue.pop(0)
1057
+ update.update(pdf_updates)
1058
+
1059
+ yield update
1060
+
1061
+ async def handle_stop(webui_manager: WebuiManager):
1062
+ """Handles clicks on the 'Stop' button."""
1063
+ logger.info("Stop button clicked.")
1064
+
1065
+ agent = webui_manager.vayner_agent
1066
+ task = webui_manager.vayner_current_task
1067
+
1068
+ if agent and task and not task.done():
1069
+ # Safely try to stop the agent
1070
+ try:
1071
+ if hasattr(agent, 'stop'):
1072
+ agent.stop()
1073
+ else:
1074
+ # Alternative method
1075
+ agent.state.stopped = True
1076
+ agent.state.paused = False
1077
+ except Exception as e:
1078
+ logger.warning(f"Error stopping agent: {e}")
1079
+
1080
+ task.cancel()
1081
+ try:
1082
+ await asyncio.wait_for(task, timeout=2.0)
1083
+ except (asyncio.CancelledError, asyncio.TimeoutError, Exception):
1084
+ pass
1085
+
1086
+ run_button_comp = webui_manager.get_component_by_id("vayner_client_research.run_button")
1087
+ stop_button_comp = webui_manager.get_component_by_id("vayner_client_research.stop_button")
1088
+
1089
+ yield {
1090
+ run_button_comp: gr.update(value="▶️ Research Client", interactive=True),
1091
+ stop_button_comp: gr.update(interactive=False)
1092
+ }
1093
+ else:
1094
+ yield {}
1095
+
1096
+ async def handle_clear(webui_manager: WebuiManager):
1097
+ """Handles clicks on the 'Clear' button."""
1098
+ logger.info("Clear button clicked.")
1099
+
1100
+ # Stop any running task
1101
+ task = webui_manager.vayner_current_task
1102
+ if task and not task.done():
1103
+ # Stop the agent instead of using handle_stop
1104
+ try:
1105
+ agent = webui_manager.vayner_agent
1106
+ if agent and hasattr(agent, 'stop'):
1107
+ agent.stop()
1108
+ elif agent:
1109
+ agent.state.stopped = True
1110
+ agent.state.paused = False
1111
+
1112
+ # Cancel the task
1113
+ task.cancel()
1114
+ try:
1115
+ await asyncio.wait_for(task, timeout=1.0)
1116
+ except (asyncio.CancelledError, asyncio.TimeoutError, Exception):
1117
+ pass
1118
+ except Exception as e:
1119
+ logger.warning(f"Error stopping agent: {e}")
1120
+
1121
+ # Reset the chat history and PDF report
1122
+ webui_manager.vayner_chat_history = []
1123
+ webui_manager.vayner_pdf_report = generate_live_report(
1124
+ "Business Name",
1125
+ [], [], [], [], []
1126
+ )
1127
+
1128
+ # Reset data collections for PDF report
1129
+ webui_manager.vayner_screenshots = []
1130
+ webui_manager.vayner_business_info = []
1131
+ webui_manager.vayner_keyword_data = []
1132
+ webui_manager.vayner_ranking_data = []
1133
+ webui_manager.vayner_keyword_table_rows = []
1134
+ webui_manager.vayner_current_business = "Business Name"
1135
+ webui_manager.update_queue = []
1136
+
1137
+ # Get components
1138
+ chatbot_comp = webui_manager.get_component_by_id("vayner_client_research.chatbot")
1139
+ run_button_comp = webui_manager.get_component_by_id("vayner_client_research.run_button")
1140
+ stop_button_comp = webui_manager.get_component_by_id("vayner_client_research.stop_button")
1141
+ browser_view_comp = webui_manager.get_component_by_id("vayner_client_research.browser_view")
1142
+ business_name_comp = webui_manager.get_component_by_id("vayner_client_research.business_name")
1143
+ pdf_report_comp = webui_manager.get_component_by_id("vayner_client_research.pdf_report")
1144
+
1145
+ yield {
1146
+ chatbot_comp: gr.update(value=[]),
1147
+ run_button_comp: gr.update(value="▶️ Research Client", interactive=True),
1148
+ stop_button_comp: gr.update(interactive=False),
1149
+ browser_view_comp: gr.update(value="<div style='text-align:center;'>Browser View</div>"),
1150
+ business_name_comp: gr.update(value=""),
1151
+ pdf_report_comp: gr.update(value=webui_manager.vayner_pdf_report, visible=True)
1152
+ }
1153
+
1154
+ def create_vayner_client_research_tab(webui_manager: WebuiManager):
1155
+ """
1156
+ Create the Vayner Client Research tab with specialized agent functionality.
1157
+ """
1158
+ # Initialize manager for Vayner client research
1159
+ webui_manager.init_vayner_client_research()
1160
+
1161
+ # Create UI layout with left panel for agent interaction and right panel for browser view
1162
+ with gr.Row(elem_id="vayner_client_research_container"):
1163
+ # Left Panel - Agent Interaction (30% width)
1164
+ with gr.Column(scale=3):
1165
+ gr.Markdown("### Vayner Client Research Agent")
1166
+
1167
+ chatbot = gr.Chatbot(
1168
+ value=webui_manager.vayner_chat_history,
1169
+ label="Agent Interaction",
1170
+ height=700,
1171
+ show_copy_button=True,
1172
+ type="messages"
1173
+ )
1174
+
1175
+ with gr.Row():
1176
+ business_name = gr.Textbox(
1177
+ label="Business Name",
1178
+ placeholder="Enter the business name to research",
1179
+ lines=1
1180
+ )
1181
+
1182
+ with gr.Row():
1183
+ run_button = gr.Button("▶️ Research Client", variant="primary", scale=3)
1184
+ stop_button = gr.Button("⏹️ Stop", interactive=False, variant="stop", scale=2)
1185
+ clear_button = gr.Button("🗑️ Clear", variant="secondary", scale=2)
1186
+
1187
+ # Right Panel - Browser View (70% width)
1188
+ with gr.Column(scale=7):
1189
+ with gr.Tabs():
1190
+ with gr.TabItem("Browser View"):
1191
+ browser_view = gr.HTML(
1192
+ value="<div style='width:100%; height:700px; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser view will appear here during research</p></div>",
1193
+ label="Browser Live View",
1194
+ )
1195
+
1196
+ with gr.TabItem("PDF Report"):
1197
+ pdf_report = gr.HTML(
1198
+ value="<div style='width:100%; height:700px; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>PDF Report will appear here after task completion</p></div>",
1199
+ label="Research Report",
1200
+ visible=False
1201
+ )
1202
+
1203
+ # Store components in manager
1204
+ tab_components = {
1205
+ "chatbot": chatbot,
1206
+ "business_name": business_name,
1207
+ "run_button": run_button,
1208
+ "stop_button": stop_button,
1209
+ "clear_button": clear_button,
1210
+ "browser_view": browser_view,
1211
+ "pdf_report": pdf_report
1212
+ }
1213
+ webui_manager.add_components("vayner_client_research", tab_components)
1214
+
1215
+ # Wrapper functions for button handlers
1216
+ async def submit_wrapper(business_name_value):
1217
+ async for update in handle_submit(webui_manager, business_name_value):
1218
+ yield update
1219
+
1220
+ async def stop_wrapper():
1221
+ async for update in handle_stop(webui_manager):
1222
+ yield update
1223
+
1224
+ async def clear_wrapper():
1225
+ async for update in handle_clear(webui_manager):
1226
+ yield update
1227
+
1228
+ # Connect event handlers
1229
+ run_button.click(
1230
+ fn=submit_wrapper,
1231
+ inputs=[business_name],
1232
+ outputs=list(tab_components.values())
1233
+ )
1234
+
1235
+ business_name.submit(
1236
+ fn=submit_wrapper,
1237
+ inputs=[business_name],
1238
+ outputs=list(tab_components.values())
1239
+ )
1240
+
1241
+ stop_button.click(
1242
+ fn=stop_wrapper,
1243
+ inputs=None,
1244
+ outputs=list(tab_components.values())
1245
+ )
1246
+
1247
+ clear_button.click(
1248
+ fn=clear_wrapper,
1249
+ inputs=None,
1250
+ outputs=list(tab_components.values())
1251
+ )
1252
+
src/webui/interface.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio import themes
3
+
4
+ from src.webui.webui_manager import WebuiManager
5
+ from src.webui.components.agent_settings_tab import create_agent_settings_tab
6
+ from src.webui.components.browser_settings_tab import create_browser_settings_tab
7
+ from src.webui.components.browser_use_agent_tab import create_browser_use_agent_tab
8
+ from src.webui.components.deep_research_agent_tab import create_deep_research_agent_tab
9
+ from src.webui.components.load_save_config_tab import create_load_save_config_tab
10
+ from src.webui.components.documentation_tab import create_documentation_tab
11
+ from src.webui.components.vayner_client_research_tab import create_vayner_client_research_tab
12
+
13
+ theme_map = {
14
+ "Default": themes.Default(),
15
+ "Soft": themes.Soft(),
16
+ "Monochrome": themes.Monochrome(),
17
+ "Glass": themes.Glass(),
18
+ "Origin": themes.Origin(),
19
+ "Citrus": themes.Citrus(),
20
+ "Ocean": themes.Ocean(),
21
+ "Base": themes.Base()
22
+ }
23
+
24
+
25
+ def create_ui(theme_name="Ocean"):
26
+ css = """
27
+ .gradio-container {
28
+ width: 100vw !important;
29
+ max-width: 100% !important;
30
+ margin-left: auto !important;
31
+ margin-right: auto !important;
32
+ padding-top: 10px !important;
33
+ }
34
+ .header-text {
35
+ text-align: center;
36
+ margin-bottom: 20px;
37
+ }
38
+ .tab-header-text {
39
+ text-align: center;
40
+ }
41
+ .theme-section {
42
+ margin-bottom: 10px;
43
+ padding: 15px;
44
+ border-radius: 10px;
45
+ }
46
+ """
47
+
48
+ # dark mode in default
49
+ js_func = """
50
+ function refresh() {
51
+ const url = new URL(window.location);
52
+
53
+ if (url.searchParams.get('__theme') !== 'dark') {
54
+ url.searchParams.set('__theme', 'dark');
55
+ window.location.href = url.href;
56
+ }
57
+ }
58
+ """
59
+
60
+ ui_manager = WebuiManager()
61
+
62
+ with gr.Blocks(
63
+ title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js_func,
64
+ ) as demo:
65
+ with gr.Row():
66
+ gr.Markdown(
67
+ """
68
+ # 🌐 Browser Use WebUI
69
+ ### Control your browser with AI assistance
70
+ """,
71
+ elem_classes=["header-text"],
72
+ )
73
+
74
+ with gr.Tabs() as tabs:
75
+ with gr.TabItem("Vayner Client Research"):
76
+ create_vayner_client_research_tab(ui_manager)
77
+
78
+ with gr.TabItem("📚 Documentation"):
79
+ create_documentation_tab(ui_manager)
80
+
81
+ return demo
src/webui/webui_manager.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from collections.abc import Generator
3
+ from typing import TYPE_CHECKING
4
+ import os
5
+ import gradio as gr
6
+ from datetime import datetime
7
+ from typing import Optional, Dict, List, Any
8
+ import uuid
9
+ import asyncio
10
+
11
+ from gradio.components import Component
12
+ from browser_use.browser.browser import Browser
13
+ from browser_use.browser.context import BrowserContext
14
+ from browser_use.agent.service import Agent
15
+ from src.browser.custom_browser import CustomBrowser
16
+ from src.browser.custom_context import CustomBrowserContext
17
+ from src.controller.custom_controller import CustomController
18
+ from src.agent.deep_research.deep_research_agent import DeepResearchAgent
19
+
20
+
21
+ class WebuiManager:
22
+ def __init__(self, settings_save_dir: str = "./tmp/webui_settings"):
23
+ self.id_to_component: dict[str, Component] = {}
24
+ self.component_to_id: dict[Component, str] = {}
25
+
26
+ self.settings_save_dir = settings_save_dir
27
+ os.makedirs(self.settings_save_dir, exist_ok=True)
28
+
29
+ # Add type annotation for vayner_pdf_report
30
+ self.vayner_pdf_report: Optional[str] = None
31
+
32
+ def init_browser_use_agent(self) -> None:
33
+ """
34
+ init browser use agent
35
+ """
36
+ self.bu_agent: Optional[Agent] = None
37
+ self.bu_browser: Optional[CustomBrowser] = None
38
+ self.bu_browser_context: Optional[CustomBrowserContext] = None
39
+ self.bu_controller: Optional[CustomController] = None
40
+ self.bu_chat_history: List[Dict[str, Optional[str]]] = []
41
+ self.bu_response_event: Optional[asyncio.Event] = None
42
+ self.bu_user_help_response: Optional[str] = None
43
+ self.bu_current_task: Optional[asyncio.Task] = None
44
+ self.bu_agent_task_id: Optional[str] = None
45
+ self.bu_task_metrics: Optional[Dict[str, Any]] = None
46
+
47
+ def init_vayner_client_research(self) -> None:
48
+ """
49
+ Initialize Vayner Client Research components and state
50
+ """
51
+ if not hasattr(self, "vayner_chat_history"):
52
+ self.vayner_chat_history = []
53
+
54
+ if not hasattr(self, "vayner_pdf_report"):
55
+ self.vayner_pdf_report = None
56
+
57
+ if not hasattr(self, "vayner_controller"):
58
+ self.vayner_controller = None
59
+
60
+ if not hasattr(self, "vayner_browser"):
61
+ self.vayner_browser = None
62
+
63
+ if not hasattr(self, "vayner_browser_context"):
64
+ self.vayner_browser_context = None
65
+
66
+ if not hasattr(self, "vayner_agent"):
67
+ self.vayner_agent = None
68
+
69
+ if not hasattr(self, "vayner_current_task"):
70
+ self.vayner_current_task = None
71
+
72
+ # Initialize data collections for PDF report
73
+ if not hasattr(self, "vayner_screenshots"):
74
+ self.vayner_screenshots = []
75
+
76
+ if not hasattr(self, "vayner_business_info"):
77
+ self.vayner_business_info = []
78
+
79
+ if not hasattr(self, "vayner_keyword_data"):
80
+ self.vayner_keyword_data = []
81
+
82
+ if not hasattr(self, "vayner_ranking_data"):
83
+ self.vayner_ranking_data = []
84
+
85
+ if not hasattr(self, "vayner_current_business"):
86
+ self.vayner_current_business = "Unknown Business"
87
+
88
+ # New: keyword table rows for third page
89
+ if not hasattr(self, "vayner_keyword_table_rows"):
90
+ self.vayner_keyword_table_rows = []
91
+
92
+ # Queue for updates during task execution
93
+ if not hasattr(self, "update_queue"):
94
+ self.update_queue = []
95
+
96
+ def init_deep_research_agent(self) -> None:
97
+ """
98
+ init deep research agent
99
+ """
100
+ self.dr_agent: Optional[DeepResearchAgent] = None
101
+ self.dr_current_task = None
102
+ self.dr_agent_task_id: Optional[str] = None
103
+ self.dr_save_dir: Optional[str] = None
104
+
105
+ def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
106
+ """
107
+ Add tab components
108
+ """
109
+ for comp_name, component in components_dict.items():
110
+ comp_id = f"{tab_name}.{comp_name}"
111
+ self.id_to_component[comp_id] = component
112
+ self.component_to_id[component] = comp_id
113
+
114
+ def get_components(self) -> list["Component"]:
115
+ """
116
+ Get all components
117
+ """
118
+ return list(self.id_to_component.values())
119
+
120
+ def get_component_by_id(self, comp_id: str) -> Optional["Component"]:
121
+ """
122
+ Get component by id. Returns None if not found.
123
+ """
124
+ return self.id_to_component.get(comp_id, None)
125
+
126
+ def get_id_by_component(self, comp: "Component") -> str:
127
+ """
128
+ Get id by component. Raises KeyError if not found.
129
+ """
130
+ return self.component_to_id[comp]
131
+
132
+ def save_config(self, components: Dict["Component", str]) -> str:
133
+ """
134
+ Save config
135
+ """
136
+ cur_settings = {}
137
+ for comp in components:
138
+ if not isinstance(comp, gr.Button) and not isinstance(comp, gr.File) and str(
139
+ getattr(comp, "interactive", True)).lower() != "false":
140
+ comp_id = self.get_id_by_component(comp)
141
+ cur_settings[comp_id] = components[comp]
142
+
143
+ config_name = datetime.now().strftime("%Y%m%d-%H%M%S")
144
+ with open(os.path.join(self.settings_save_dir, f"{config_name}.json"), "w") as fw:
145
+ json.dump(cur_settings, fw, indent=4)
146
+
147
+ return os.path.join(self.settings_save_dir, f"{config_name}.json")
148
+
149
+ def load_config(self, config_path: str):
150
+ """
151
+ Load config
152
+ """
153
+ with open(config_path, "r") as fr:
154
+ ui_settings = json.load(fr)
155
+
156
+ update_components = {}
157
+ for comp_id, comp_val in ui_settings.items():
158
+ if comp_id in self.id_to_component:
159
+ comp = self.id_to_component[comp_id]
160
+ update_components[comp] = gr.update(value=comp_val)
161
+
162
+ config_status = self.id_to_component["load_save_config.config_status"]
163
+ update_components.update(
164
+ {
165
+ config_status: gr.update(value=f"Successfully loaded config: {config_path}")
166
+ }
167
+ )
168
+ yield update_components
supervisord.conf ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [supervisord]
2
+ user=root
3
+ nodaemon=true
4
+ logfile=/dev/stdout
5
+ logfile_maxbytes=0
6
+ loglevel=error
7
+
8
+ [program:xvfb]
9
+ command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
10
+ autorestart=true
11
+ stdout_logfile=/dev/stdout
12
+ stdout_logfile_maxbytes=0
13
+ stderr_logfile=/dev/stderr
14
+ stderr_logfile_maxbytes=0
15
+ priority=100
16
+ startsecs=3
17
+ stopsignal=TERM
18
+ stopwaitsecs=10
19
+
20
+ [program:vnc_setup]
21
+ command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
22
+ autorestart=false
23
+ startsecs=0
24
+ priority=150
25
+ stdout_logfile=/dev/stdout
26
+ stdout_logfile_maxbytes=0
27
+ stderr_logfile=/dev/stderr
28
+ stderr_logfile_maxbytes=0
29
+
30
+ [program:x11vnc]
31
+ command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
32
+ autorestart=true
33
+ stdout_logfile=/dev/stdout
34
+ stdout_logfile_maxbytes=0
35
+ stderr_logfile=/dev/stderr
36
+ stderr_logfile_maxbytes=0
37
+ priority=200
38
+ startretries=10
39
+ startsecs=10
40
+ stopsignal=TERM
41
+ stopwaitsecs=10
42
+ depends_on=vnc_setup,xvfb
43
+
44
+ [program:x11vnc_log]
45
+ command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
46
+ autorestart=true
47
+ stdout_logfile=/dev/stdout
48
+ stdout_logfile_maxbytes=0
49
+ stderr_logfile=/dev/stderr
50
+ stderr_logfile_maxbytes=0
51
+ priority=250
52
+ stopsignal=TERM
53
+ stopwaitsecs=5
54
+ depends_on=x11vnc
55
+
56
+ [program:novnc]
57
+ command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
58
+ autorestart=true
59
+ stdout_logfile=/dev/stdout
60
+ stdout_logfile_maxbytes=0
61
+ stderr_logfile=/dev/stderr
62
+ stderr_logfile_maxbytes=0
63
+ priority=300
64
+ startretries=5
65
+ startsecs=3
66
+ depends_on=x11vnc
67
+
68
+ [program:webui]
69
+ command=python webui.py --ip 0.0.0.0 --port 7788
70
+ directory=/app
71
+ autorestart=true
72
+ stdout_logfile=/dev/stdout
73
+ stdout_logfile_maxbytes=0
74
+ stderr_logfile=/dev/stderr
75
+ stderr_logfile_maxbytes=0
76
+ priority=400
77
+ startretries=3
78
+ startsecs=3
79
+ stopsignal=TERM
80
+ stopwaitsecs=10
tests/test_agents.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdb
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+ import sys
7
+
8
+ sys.path.append(".")
9
+ import asyncio
10
+ import os
11
+ import sys
12
+ from pprint import pprint
13
+
14
+ from browser_use import Agent
15
+ from browser_use.agent.views import AgentHistoryList
16
+
17
+ from src.utils import utils
18
+
19
+
20
+ async def test_browser_use_agent():
21
+ from browser_use.browser.browser import Browser, BrowserConfig
22
+ from browser_use.browser.context import (
23
+ BrowserContextConfig
24
+ )
25
+ from browser_use.agent.service import Agent
26
+
27
+ from src.browser.custom_browser import CustomBrowser
28
+ from src.controller.custom_controller import CustomController
29
+ from src.utils import llm_provider
30
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
31
+
32
+ llm = llm_provider.get_llm_model(
33
+ provider="openai",
34
+ model_name="gpt-4o",
35
+ temperature=0.8,
36
+ )
37
+
38
+ # llm = llm_provider.get_llm_model(
39
+ # provider="google",
40
+ # model_name="gemini-2.0-flash",
41
+ # temperature=0.6,
42
+ # api_key=os.getenv("GOOGLE_API_KEY", "")
43
+ # )
44
+
45
+ # llm = utils.get_llm_model(
46
+ # provider="deepseek",
47
+ # model_name="deepseek-reasoner",
48
+ # temperature=0.8
49
+ # )
50
+
51
+ # llm = utils.get_llm_model(
52
+ # provider="deepseek",
53
+ # model_name="deepseek-chat",
54
+ # temperature=0.8
55
+ # )
56
+
57
+ # llm = utils.get_llm_model(
58
+ # provider="ollama", model_name="qwen2.5:7b", temperature=0.5
59
+ # )
60
+
61
+ # llm = utils.get_llm_model(
62
+ # provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
63
+ # )
64
+
65
+ window_w, window_h = 1280, 1100
66
+
67
+ # llm = llm_provider.get_llm_model(
68
+ # provider="azure_openai",
69
+ # model_name="gpt-4o",
70
+ # temperature=0.5,
71
+ # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
72
+ # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
73
+ # )
74
+
75
+ mcp_server_config = {
76
+ "mcpServers": {
77
+ # "markitdown": {
78
+ # "command": "docker",
79
+ # "args": [
80
+ # "run",
81
+ # "--rm",
82
+ # "-i",
83
+ # "markitdown-mcp:latest"
84
+ # ]
85
+ # },
86
+ "desktop-commander": {
87
+ "command": "npx",
88
+ "args": [
89
+ "-y",
90
+ "@wonderwhy-er/desktop-commander"
91
+ ]
92
+ },
93
+ }
94
+ }
95
+ controller = CustomController()
96
+ await controller.setup_mcp_client(mcp_server_config)
97
+ use_own_browser = True
98
+ use_vision = True # Set to False when using DeepSeek
99
+
100
+ max_actions_per_step = 10
101
+ browser = None
102
+ browser_context = None
103
+
104
+ try:
105
+ extra_browser_args = []
106
+ if use_own_browser:
107
+ browser_binary_path = os.getenv("BROWSER_PATH", None)
108
+ if browser_binary_path == "":
109
+ browser_binary_path = None
110
+ browser_user_data = os.getenv("BROWSER_USER_DATA", None)
111
+ if browser_user_data:
112
+ extra_browser_args += [f"--user-data-dir={browser_user_data}"]
113
+ else:
114
+ browser_binary_path = None
115
+ browser = CustomBrowser(
116
+ config=BrowserConfig(
117
+ headless=False,
118
+ browser_binary_path=browser_binary_path,
119
+ extra_browser_args=extra_browser_args,
120
+ new_context_config=BrowserContextConfig(
121
+ window_width=window_w,
122
+ window_height=window_h,
123
+ )
124
+ )
125
+ )
126
+ browser_context = await browser.new_context(
127
+ config=BrowserContextConfig(
128
+ trace_path=None,
129
+ save_recording_path=None,
130
+ save_downloads_path="./tmp/downloads",
131
+ window_height=window_h,
132
+ window_width=window_w,
133
+ )
134
+ )
135
+ agent = BrowserUseAgent(
136
+ # task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
137
+ task="give me nvidia stock price",
138
+ llm=llm,
139
+ browser=browser,
140
+ browser_context=browser_context,
141
+ controller=controller,
142
+ use_vision=use_vision,
143
+ max_actions_per_step=max_actions_per_step,
144
+ generate_gif=True
145
+ )
146
+ history: AgentHistoryList = await agent.run(max_steps=100)
147
+
148
+ print("Final Result:")
149
+ pprint(history.final_result(), indent=4)
150
+
151
+ print("\nErrors:")
152
+ pprint(history.errors(), indent=4)
153
+
154
+ except Exception:
155
+ import traceback
156
+ traceback.print_exc()
157
+ finally:
158
+ if browser_context:
159
+ await browser_context.close()
160
+ if browser:
161
+ await browser.close()
162
+ if controller:
163
+ await controller.close_mcp_client()
164
+
165
+
166
+ async def test_browser_use_parallel():
167
+ from browser_use.browser.browser import Browser, BrowserConfig
168
+ from browser_use.browser.context import (
169
+ BrowserContextConfig,
170
+ )
171
+ from browser_use.agent.service import Agent
172
+
173
+ from src.browser.custom_browser import CustomBrowser
174
+ from src.controller.custom_controller import CustomController
175
+ from src.utils import llm_provider
176
+ from src.agent.browser_use.browser_use_agent import BrowserUseAgent
177
+
178
+ # llm = utils.get_llm_model(
179
+ # provider="openai",
180
+ # model_name="gpt-4o",
181
+ # temperature=0.8,
182
+ # base_url=os.getenv("OPENAI_ENDPOINT", ""),
183
+ # api_key=os.getenv("OPENAI_API_KEY", ""),
184
+ # )
185
+
186
+ # llm = utils.get_llm_model(
187
+ # provider="google",
188
+ # model_name="gemini-2.0-flash",
189
+ # temperature=0.6,
190
+ # api_key=os.getenv("GOOGLE_API_KEY", "")
191
+ # )
192
+
193
+ # llm = utils.get_llm_model(
194
+ # provider="deepseek",
195
+ # model_name="deepseek-reasoner",
196
+ # temperature=0.8
197
+ # )
198
+
199
+ # llm = utils.get_llm_model(
200
+ # provider="deepseek",
201
+ # model_name="deepseek-chat",
202
+ # temperature=0.8
203
+ # )
204
+
205
+ # llm = utils.get_llm_model(
206
+ # provider="ollama", model_name="qwen2.5:7b", temperature=0.5
207
+ # )
208
+
209
+ # llm = utils.get_llm_model(
210
+ # provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
211
+ # )
212
+
213
+ window_w, window_h = 1280, 1100
214
+
215
+ llm = llm_provider.get_llm_model(
216
+ provider="azure_openai",
217
+ model_name="gpt-4o",
218
+ temperature=0.5,
219
+ base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
220
+ api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
221
+ )
222
+
223
+ mcp_server_config = {
224
+ "mcpServers": {
225
+ # "markitdown": {
226
+ # "command": "docker",
227
+ # "args": [
228
+ # "run",
229
+ # "--rm",
230
+ # "-i",
231
+ # "markitdown-mcp:latest"
232
+ # ]
233
+ # },
234
+ "desktop-commander": {
235
+ "command": "npx",
236
+ "args": [
237
+ "-y",
238
+ "@wonderwhy-er/desktop-commander"
239
+ ]
240
+ },
241
+ # "filesystem": {
242
+ # "command": "npx",
243
+ # "args": [
244
+ # "-y",
245
+ # "@modelcontextprotocol/server-filesystem",
246
+ # "/Users/xxx/ai_workspace",
247
+ # ]
248
+ # },
249
+ }
250
+ }
251
+ controller = CustomController()
252
+ await controller.setup_mcp_client(mcp_server_config)
253
+ use_own_browser = True
254
+ use_vision = True # Set to False when using DeepSeek
255
+
256
+ max_actions_per_step = 10
257
+ browser = None
258
+ browser_context = None
259
+
260
+ try:
261
+ extra_browser_args = []
262
+ if use_own_browser:
263
+ browser_binary_path = os.getenv("BROWSER_PATH", None)
264
+ if browser_binary_path == "":
265
+ browser_binary_path = None
266
+ browser_user_data = os.getenv("BROWSER_USER_DATA", None)
267
+ if browser_user_data:
268
+ extra_browser_args += [f"--user-data-dir={browser_user_data}"]
269
+ else:
270
+ browser_binary_path = None
271
+ browser = CustomBrowser(
272
+ config=BrowserConfig(
273
+ headless=False,
274
+ browser_binary_path=browser_binary_path,
275
+ extra_browser_args=extra_browser_args,
276
+ new_context_config=BrowserContextConfig(
277
+ window_width=window_w,
278
+ window_height=window_h,
279
+ )
280
+ )
281
+ )
282
+ browser_context = await browser.new_context(
283
+ config=BrowserContextConfig(
284
+ trace_path=None,
285
+ save_recording_path=None,
286
+ save_downloads_path="./tmp/downloads",
287
+ window_height=window_h,
288
+ window_width=window_w,
289
+ force_new_context=True
290
+ )
291
+ )
292
+ agents = [
293
+ BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
294
+ for task in [
295
+ 'Search Google for weather in Tokyo',
296
+ # 'Check Reddit front page title',
297
+ # 'Find NASA image of the day',
298
+ # 'Check top story on CNN',
299
+ # 'Search latest SpaceX launch date',
300
+ # 'Look up population of Paris',
301
+ 'Find current time in Sydney',
302
+ 'Check who won last Super Bowl',
303
+ # 'Search trending topics on Twitter',
304
+ ]
305
+ ]
306
+
307
+ history = await asyncio.gather(*[agent.run() for agent in agents])
308
+ print("Final Result:")
309
+ pprint(history.final_result(), indent=4)
310
+
311
+ print("\nErrors:")
312
+ pprint(history.errors(), indent=4)
313
+
314
+ pdb.set_trace()
315
+
316
+ except Exception:
317
+ import traceback
318
+
319
+ traceback.print_exc()
320
+ finally:
321
+ if browser_context:
322
+ await browser_context.close()
323
+ if browser:
324
+ await browser.close()
325
+ if controller:
326
+ await controller.close_mcp_client()
327
+
328
+
329
+ async def test_deep_research_agent():
330
+ from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME
331
+ from src.utils import llm_provider
332
+
333
+ llm = llm_provider.get_llm_model(
334
+ provider="openai",
335
+ model_name="gpt-4o",
336
+ temperature=0.5
337
+ )
338
+
339
+ # llm = llm_provider.get_llm_model(
340
+ # provider="bedrock",
341
+ # )
342
+
343
+ mcp_server_config = {
344
+ "mcpServers": {
345
+ "desktop-commander": {
346
+ "command": "npx",
347
+ "args": [
348
+ "-y",
349
+ "@wonderwhy-er/desktop-commander"
350
+ ]
351
+ },
352
+ }
353
+ }
354
+
355
+ browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
356
+ agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
357
+ research_topic = "Give me investment advices of nvidia and tesla."
358
+ task_id_to_resume = "" # Set this to resume a previous task ID
359
+
360
+ print(f"Starting research on: {research_topic}")
361
+
362
+ try:
363
+ # Call run and wait for the final result dictionary
364
+ result = await agent.run(research_topic,
365
+ task_id=task_id_to_resume,
366
+ save_dir="./tmp/deep_research",
367
+ max_parallel_browsers=1,
368
+ )
369
+
370
+ print("\n--- Research Process Ended ---")
371
+ print(f"Status: {result.get('status')}")
372
+ print(f"Message: {result.get('message')}")
373
+ print(f"Task ID: {result.get('task_id')}")
374
+
375
+ # Check the final state for the report
376
+ final_state = result.get('final_state', {})
377
+ if final_state:
378
+ print("\n--- Final State Summary ---")
379
+ print(
380
+ f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
381
+ print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}")
382
+ if final_state.get("final_report"):
383
+ print(" Final Report: Generated (content omitted). You can find it in the output directory.")
384
+ # print("\n--- Final Report ---") # Optionally print report
385
+ # print(final_state["final_report"])
386
+ else:
387
+ print(" Final Report: Not generated.")
388
+ else:
389
+ print("Final state information not available.")
390
+
391
+
392
+ except Exception as e:
393
+ print(f"\n--- An unhandled error occurred outside the agent run ---")
394
+ print(e)
395
+
396
+
397
+ if __name__ == "__main__":
398
+ asyncio.run(test_browser_use_agent())
399
+ # asyncio.run(test_browser_use_parallel())
400
+ # asyncio.run(test_deep_research_agent())
tests/test_controller.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import pdb
3
+ import sys
4
+ import time
5
+
6
+ sys.path.append(".")
7
+
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+
12
+
13
+ async def test_mcp_client():
14
+ from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
15
+
16
+ test_server_config = {
17
+ "mcpServers": {
18
+ # "markitdown": {
19
+ # "command": "docker",
20
+ # "args": [
21
+ # "run",
22
+ # "--rm",
23
+ # "-i",
24
+ # "markitdown-mcp:latest"
25
+ # ]
26
+ # },
27
+ "desktop-commander": {
28
+ "command": "npx",
29
+ "args": [
30
+ "-y",
31
+ "@wonderwhy-er/desktop-commander"
32
+ ]
33
+ },
34
+ # "filesystem": {
35
+ # "command": "npx",
36
+ # "args": [
37
+ # "-y",
38
+ # "@modelcontextprotocol/server-filesystem",
39
+ # "/Users/xxx/ai_workspace",
40
+ # ]
41
+ # },
42
+ }
43
+ }
44
+
45
+ mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
46
+
47
+ for tool in mcp_tools:
48
+ tool_param_model = create_tool_param_model(tool)
49
+ print(tool.name)
50
+ print(tool.description)
51
+ print(tool_param_model.model_json_schema())
52
+ pdb.set_trace()
53
+
54
+
55
+ async def test_controller_with_mcp():
56
+ import os
57
+ from src.controller.custom_controller import CustomController
58
+ from browser_use.controller.registry.views import ActionModel
59
+
60
+ mcp_server_config = {
61
+ "mcpServers": {
62
+ # "markitdown": {
63
+ # "command": "docker",
64
+ # "args": [
65
+ # "run",
66
+ # "--rm",
67
+ # "-i",
68
+ # "markitdown-mcp:latest"
69
+ # ]
70
+ # },
71
+ "desktop-commander": {
72
+ "command": "npx",
73
+ "args": [
74
+ "-y",
75
+ "@wonderwhy-er/desktop-commander"
76
+ ]
77
+ },
78
+ # "filesystem": {
79
+ # "command": "npx",
80
+ # "args": [
81
+ # "-y",
82
+ # "@modelcontextprotocol/server-filesystem",
83
+ # "/Users/xxx/ai_workspace",
84
+ # ]
85
+ # },
86
+ }
87
+ }
88
+
89
+ controller = CustomController()
90
+ await controller.setup_mcp_client(mcp_server_config)
91
+ action_name = "mcp.desktop-commander.execute_command"
92
+ action_info = controller.registry.registry.actions[action_name]
93
+ param_model = action_info.param_model
94
+ print(param_model.model_json_schema())
95
+ params = {"command": f"python ./tmp/test.py"
96
+ }
97
+ validated_params = param_model(**params)
98
+ ActionModel_ = controller.registry.create_action_model()
99
+ # Create ActionModel instance with the validated parameters
100
+ action_model = ActionModel_(**{action_name: validated_params})
101
+ result = await controller.act(action_model)
102
+ result = result.extracted_content
103
+ print(result)
104
+ if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
105
+ result.split("\n")[0]:
106
+ pid = int(result.split("\n")[0].split("PID")[-1].strip())
107
+ action_name = "mcp.desktop-commander.read_output"
108
+ action_info = controller.registry.registry.actions[action_name]
109
+ param_model = action_info.param_model
110
+ print(param_model.model_json_schema())
111
+ params = {"pid": pid}
112
+ validated_params = param_model(**params)
113
+ action_model = ActionModel_(**{action_name: validated_params})
114
+ output_result = ""
115
+ while True:
116
+ time.sleep(1)
117
+ result = await controller.act(action_model)
118
+ result = result.extracted_content
119
+ if result:
120
+ pdb.set_trace()
121
+ output_result = result
122
+ break
123
+ print(output_result)
124
+ pdb.set_trace()
125
+ await controller.close_mcp_client()
126
+ pdb.set_trace()
127
+
128
+
129
+ if __name__ == '__main__':
130
+ # asyncio.run(test_mcp_client())
131
+ asyncio.run(test_controller_with_mcp())