Rajesh Betkiker commited on
Commit
7465fd2
·
1 Parent(s): 62bd5e8

Dockerized the solution

Browse files
Files changed (8) hide show
  1. .gitignore +178 -0
  2. .python-version +1 -0
  3. Dockerfile +31 -0
  4. README.md +17 -4
  5. app.py +558 -0
  6. mcp_server.py +184 -0
  7. pyproject.toml +22 -0
  8. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ .DS_Store
177
+
178
+ videos/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ # Create a non-root user
4
+ RUN useradd -m -u 1000 appuser
5
+ USER appuser
6
+
7
+ # Environment
8
+ ENV PATH="/home/appuser/.local/bin:$PATH"
9
+
10
+ # Set working directory
11
+ WORKDIR /app
12
+ RUN chown -R appuser:1000 /app
13
+
14
+ # Install pip and uv
15
+ RUN pip install --upgrade pip && pip install uv
16
+
17
+ # Copy project files
18
+ COPY --chown=appuser:1000 pyproject.toml .
19
+ COPY --chown=appuser:1000 app.py .
20
+ COPY --chown=appuser:1000 Dockerfile .
21
+ COPY --chown=appuser:1000 README.md .
22
+ COPY --chown=appuser:1000 uv.lock .
23
+
24
+ # Install dependencies
25
+ RUN uv sync --frozen
26
+
27
+ # Expose port
28
+ EXPOSE 7860
29
+
30
+ # Run the application
31
+ CMD ["uv", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,14 +1,27 @@
1
  ---
2
  title: Unit3 HF Tagging Bot
3
  emoji: ⚡
4
- colorFrom: red
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.34.1
8
  app_file: app.py
9
- pinned: false
 
10
  license: apache-2.0
11
- short_description: This is a bot that tags HuggingFace models when they are men
 
 
 
 
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Unit3 HF Tagging Bot
3
  emoji: ⚡
4
+ colorFrom: purple
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.34.1
8
  app_file: app.py
9
+ pinned: true
10
+ base_path: /gradio
11
  license: apache-2.0
12
+ short_description: Bot that tags HuggingFace models when they are mentioned
13
+ tags:
14
+ - mcp-course
15
+ - huggingface
16
+ - pr-agent
17
  ---
18
 
19
+ # HF Tagging Bot
20
+
21
+ This is a bot that tags HuggingFace models when they are mentioned in discussions.
22
+
23
+ ## How it works
24
+
25
+ 1. The bot listens to discussions on the HuggingFace Hub
26
+ 2. When a discussion is created, the bot checks for tag mentions in the comment
27
+ 3. If a tag is mentioned, the bot adds the tag to the model repository via a PR
app.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ from datetime import datetime
5
+ from typing import List, Dict, Any, Optional, Literal
6
+
7
+ from fastapi import FastAPI, Request, BackgroundTasks
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ import gradio as gr
10
+ import uvicorn
11
+ from pydantic import BaseModel
12
+ from huggingface_hub.inference._mcp.agent import Agent
13
+ from dotenv import load_dotenv
14
+
15
+ load_dotenv()
16
+
17
+ # Configuration
18
+ WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
19
+ HF_TOKEN = os.getenv("HF_TOKEN")
20
+ HF_MODEL = os.getenv("HF_MODEL", "microsoft/DialoGPT-medium")
21
+ # Use a valid provider literal from the documentation
22
+ DEFAULT_PROVIDER: Literal["hf-inference"] = "hf-inference"
23
+ HF_PROVIDER = os.getenv("HF_PROVIDER", DEFAULT_PROVIDER)
24
+
25
+ # Simple storage for processed tag operations
26
+ tag_operations_store: List[Dict[str, Any]] = []
27
+
28
+ # Agent instance
29
+ agent_instance: Optional[Agent] = None
30
+
31
+ # Common ML tags that we recognize for auto-tagging
32
+ RECOGNIZED_TAGS = {
33
+ "pytorch",
34
+ "tensorflow",
35
+ "jax",
36
+ "transformers",
37
+ "diffusers",
38
+ "text-generation",
39
+ "text-classification",
40
+ "question-answering",
41
+ "text-to-image",
42
+ "image-classification",
43
+ "object-detection",
44
+ " ",
45
+ "fill-mask",
46
+ "token-classification",
47
+ "translation",
48
+ "summarization",
49
+ "feature-extraction",
50
+ "sentence-similarity",
51
+ "zero-shot-classification",
52
+ "image-to-text",
53
+ "automatic-speech-recognition",
54
+ "audio-classification",
55
+ "voice-activity-detection",
56
+ "depth-estimation",
57
+ "image-segmentation",
58
+ "video-classification",
59
+ "reinforcement-learning",
60
+ "tabular-classification",
61
+ "tabular-regression",
62
+ "time-series-forecasting",
63
+ "graph-ml",
64
+ "robotics",
65
+ "computer-vision",
66
+ "nlp",
67
+ "cv",
68
+ "multimodal",
69
+ }
70
+
71
+
72
+ class WebhookEvent(BaseModel):
73
+ event: Dict[str, str]
74
+ comment: Dict[str, Any]
75
+ discussion: Dict[str, Any]
76
+ repo: Dict[str, str]
77
+
78
+
79
+ app = FastAPI(title="HF Tagging Bot")
80
+ app.add_middleware(CORSMiddleware, allow_origins=["*"])
81
+
82
+ async def get_agent():
83
+ """Get or create Agent instance"""
84
+ print("🤖 get_agent() called...")
85
+ global agent_instance
86
+ if agent_instance is None and HF_TOKEN:
87
+ print("🔧 Creating new Agent instance...")
88
+ print(f"🔑 HF_TOKEN present: {bool(HF_TOKEN)}")
89
+ print(f"🤖 Model: {HF_MODEL}")
90
+ print(f"🔗 Provider: {DEFAULT_PROVIDER}")
91
+
92
+ try:
93
+ agent_instance = Agent(
94
+ model=HF_MODEL,
95
+ provider=DEFAULT_PROVIDER,
96
+ api_key=HF_TOKEN,
97
+ servers=[
98
+ {
99
+ "type": "http",
100
+ "config": {
101
+ "url": "https://betki-unit3-mcp-pr-agent.hf.space/mcp-server/mcp",
102
+ "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {},
103
+ },
104
+ }
105
+ ],
106
+ )
107
+ print("✅ Agent instance created successfully")
108
+ print("🔧 Loading tools...")
109
+ await agent_instance.load_tools()
110
+ print("✅ Tools loaded successfully")
111
+ except Exception as e:
112
+ print(f"❌ Error creating/loading agent: {str(e)}")
113
+ agent_instance = None
114
+ elif agent_instance is None:
115
+ print("❌ No HF_TOKEN available, cannot create agent")
116
+ else:
117
+ print("✅ Using existing agent instance")
118
+
119
+ return agent_instance
120
+
121
+ def extract_tags_from_text(text: str) -> List[str]:
122
+ """Extract potential tags from discussion text"""
123
+ text_lower = text.lower()
124
+
125
+ # Look for explicit tag mentions like "tag: pytorch" or "#pytorch"
126
+ explicit_tags = []
127
+
128
+ # Pattern 1: "tag: something" or "tags: something"
129
+ tag_pattern = r"tags?:\s*([a-zA-Z0-9-_,\s]+)"
130
+ matches = re.findall(tag_pattern, text_lower)
131
+ for match in matches:
132
+ # Split by comma and clean up
133
+ tags = [tag.strip() for tag in match.split(",")]
134
+ explicit_tags.extend(tags)
135
+
136
+ # Pattern 2: "#hashtag" style
137
+ hashtag_pattern = r"#([a-zA-Z0-9-_]+)"
138
+ hashtag_matches = re.findall(hashtag_pattern, text_lower)
139
+ explicit_tags.extend(hashtag_matches)
140
+
141
+ # Pattern 3: Look for recognized tags mentioned in natural text
142
+ mentioned_tags = []
143
+ for tag in RECOGNIZED_TAGS:
144
+ if tag in text_lower:
145
+ mentioned_tags.append(tag)
146
+
147
+ # Combine and deduplicate
148
+ all_tags = list(set(explicit_tags + mentioned_tags))
149
+
150
+ # Filter to only include recognized tags or explicitly mentioned ones
151
+ valid_tags = []
152
+ for tag in all_tags:
153
+ if tag in RECOGNIZED_TAGS or tag in explicit_tags:
154
+ valid_tags.append(tag)
155
+
156
+ return valid_tags
157
+
158
+
159
+ async def process_webhook_comment(webhook_data: Dict[str, Any]):
160
+ """Process webhook to detect and add tags"""
161
+ print("🏷️ Starting process_webhook_comment...")
162
+
163
+ try:
164
+ comment_content = webhook_data["comment"]["content"]
165
+ discussion_title = webhook_data["discussion"]["title"]
166
+ repo_name = webhook_data["repo"]["name"]
167
+ discussion_num = webhook_data["discussion"]["num"]
168
+ # Author is an object with "id" field
169
+ comment_author = webhook_data["comment"]["author"].get("id", "unknown")
170
+
171
+ print(f"📝 Comment content: {comment_content}")
172
+ print(f"📰 Discussion title: {discussion_title}")
173
+ print(f"📦 Repository: {repo_name}")
174
+
175
+ # Extract potential tags from the comment and discussion title
176
+ comment_tags = extract_tags_from_text(comment_content)
177
+ title_tags = extract_tags_from_text(discussion_title)
178
+ all_tags = list(set(comment_tags + title_tags))
179
+
180
+ print(f"🔍 Comment tags found: {comment_tags}")
181
+ print(f"🔍 Title tags found: {title_tags}")
182
+ print(f"🏷️ All unique tags: {all_tags}")
183
+
184
+ result_messages = []
185
+
186
+ if not all_tags:
187
+ msg = "No recognizable tags found in the discussion."
188
+ print(f"❌ {msg}")
189
+ result_messages.append(msg)
190
+ else:
191
+ print("🤖 Getting agent instance...")
192
+ agent = await get_agent()
193
+ if not agent:
194
+ msg = "Error: Agent not configured (missing HF_TOKEN)"
195
+ print(f"❌ {msg}")
196
+ result_messages.append(msg)
197
+ else:
198
+ print("✅ Agent instance obtained successfully")
199
+
200
+ # Process all tags in a single conversation with the agent
201
+ try:
202
+ # Create a comprehensive prompt for the agent
203
+ user_prompt = f"""
204
+ I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
205
+
206
+ For each tag, please:
207
+ 1. Check if the tag already exists on the repository using get_current_tags
208
+ 2. If the tag doesn't exist, add it using add_new_tag
209
+ 3. Provide a summary of what was done for each tag
210
+
211
+ Please process all {len(all_tags)} tags: {", ".join(all_tags)}
212
+ """
213
+
214
+ print("💬 Sending comprehensive prompt to agent...")
215
+ print(f"📝 Prompt: {user_prompt}")
216
+
217
+ # Let the agent handle the entire conversation
218
+ conversation_result = []
219
+
220
+ try:
221
+ async for item in agent.run(user_prompt):
222
+ # The agent yields different types of items
223
+ item_str = str(item)
224
+ conversation_result.append(item_str)
225
+
226
+ # Log important events
227
+ if (
228
+ "tool_call" in item_str.lower()
229
+ or "function" in item_str.lower()
230
+ ):
231
+ print(f"🔧 Agent using tools: {item_str[:200]}...")
232
+ elif "content" in item_str and len(item_str) < 500:
233
+ print(f"💭 Agent response: {item_str}")
234
+
235
+ # Extract the final response from the conversation
236
+ full_response = " ".join(conversation_result)
237
+ print(f"📋 Agent conversation completed successfully")
238
+
239
+ # Try to extract meaningful results for each tag
240
+ for tag in all_tags:
241
+ tag_mentioned = tag.lower() in full_response.lower()
242
+
243
+ if (
244
+ "already exists" in full_response.lower()
245
+ and tag_mentioned
246
+ ):
247
+ msg = f"Tag '{tag}': Already exists"
248
+ elif (
249
+ "pr" in full_response.lower()
250
+ or "pull request" in full_response.lower()
251
+ ):
252
+ if tag_mentioned:
253
+ msg = f"Tag '{tag}': PR created successfully"
254
+ else:
255
+ msg = (
256
+ f"Tag '{tag}': Processed "
257
+ "(PR may have been created)"
258
+ )
259
+ elif "success" in full_response.lower() and tag_mentioned:
260
+ msg = f"Tag '{tag}': Successfully processed"
261
+ elif "error" in full_response.lower() and tag_mentioned:
262
+ msg = f"Tag '{tag}': Error during processing"
263
+ else:
264
+ msg = f"Tag '{tag}': Processed by agent"
265
+
266
+ print(f"✅ Result for tag '{tag}': {msg}")
267
+ result_messages.append(msg)
268
+
269
+ except Exception as agent_error:
270
+ print(f"⚠️ Agent streaming failed: {str(agent_error)}")
271
+ print("🔄 Falling back to direct MCP tool calls...")
272
+
273
+ # Import the MCP server functions directly as fallback
274
+ try:
275
+ import sys
276
+ import importlib.util
277
+
278
+ # Load the MCP server module
279
+ spec = importlib.util.spec_from_file_location(
280
+ "mcp_server", "./mcp_server.py"
281
+ )
282
+ mcp_module = importlib.util.module_from_spec(spec)
283
+ spec.loader.exec_module(mcp_module)
284
+
285
+ # Use the MCP tools directly for each tag
286
+ for tag in all_tags:
287
+ try:
288
+ print(
289
+ f"🔧 Directly calling get_current_tags for '{tag}'"
290
+ )
291
+ current_tags_result = mcp_module.get_current_tags(
292
+ repo_name
293
+ )
294
+ print(
295
+ f"📄 Current tags result: {current_tags_result}"
296
+ )
297
+
298
+ # Parse the JSON result
299
+ import json
300
+
301
+ tags_data = json.loads(current_tags_result)
302
+
303
+ if tags_data.get("status") == "success":
304
+ current_tags = tags_data.get("current_tags", [])
305
+ if tag in current_tags:
306
+ msg = f"Tag '{tag}': Already exists"
307
+ print(f"✅ {msg}")
308
+ else:
309
+ print(
310
+ f"🔧 Directly calling add_new_tag for '{tag}'"
311
+ )
312
+ add_result = mcp_module.add_new_tag(
313
+ repo_name, tag
314
+ )
315
+ print(f"📄 Add tag result: {add_result}")
316
+
317
+ add_data = json.loads(add_result)
318
+ if add_data.get("status") == "success":
319
+ pr_url = add_data.get("pr_url", "")
320
+ msg = f"Tag '{tag}': PR created - {pr_url}"
321
+ elif (
322
+ add_data.get("status")
323
+ == "already_exists"
324
+ ):
325
+ msg = f"Tag '{tag}': Already exists"
326
+ else:
327
+ msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
328
+ print(f"✅ {msg}")
329
+ else:
330
+ error_msg = tags_data.get(
331
+ "error", "Unknown error"
332
+ )
333
+ msg = f"Tag '{tag}': Error - {error_msg}"
334
+ print(f"❌ {msg}")
335
+
336
+ result_messages.append(msg)
337
+
338
+ except Exception as direct_error:
339
+ error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
340
+ print(f"❌ {error_msg}")
341
+ result_messages.append(error_msg)
342
+
343
+ except Exception as fallback_error:
344
+ error_msg = (
345
+ f"Fallback approach failed: {str(fallback_error)}"
346
+ )
347
+ print(f"❌ {error_msg}")
348
+ result_messages.append(error_msg)
349
+
350
+ except Exception as e:
351
+ error_msg = f"Error during agent processing: {str(e)}"
352
+ print(f"❌ {error_msg}")
353
+ result_messages.append(error_msg)
354
+
355
+ # Store the interaction
356
+ base_url = "https://huggingface.co"
357
+ discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
358
+
359
+ interaction = {
360
+ "timestamp": datetime.now().isoformat(),
361
+ "repo": repo_name,
362
+ "discussion_title": discussion_title,
363
+ "discussion_num": discussion_num,
364
+ "discussion_url": discussion_url,
365
+ "original_comment": comment_content,
366
+ "comment_author": comment_author,
367
+ "detected_tags": all_tags,
368
+ "results": result_messages,
369
+ }
370
+
371
+ tag_operations_store.append(interaction)
372
+ final_result = " | ".join(result_messages)
373
+ print(f"💾 Stored interaction and returning result: {final_result}")
374
+ return final_result
375
+
376
+ except Exception as e:
377
+ error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
378
+ print(error_msg)
379
+ return error_msg
380
+
381
+
382
+ @app.post("/webhook")
383
+ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
384
+ """Handle HF Hub webhooks"""
385
+ webhook_secret = request.headers.get("X-Webhook-Secret")
386
+ if webhook_secret != WEBHOOK_SECRET:
387
+ print("❌ Invalid webhook secret")
388
+ return {"error": "Invalid webhook secret"}
389
+
390
+ payload = await request.json()
391
+ print(f"📥 Received webhook payload: {json.dumps(payload, indent=2)}")
392
+
393
+ event = payload.get("event", {})
394
+ scope = event.get("scope")
395
+ action = event.get("action")
396
+
397
+ print(f"🔍 Event details - scope: {scope}, action: {action}")
398
+
399
+ # Check if this is a discussion comment creation
400
+ scope_check = scope == "discussion"
401
+ action_check = action == "create"
402
+ not_pr = not payload["discussion"]["isPullRequest"]
403
+ scope_check = scope_check and not_pr
404
+ print(f"✅ not_pr: {not_pr}")
405
+ print(f"✅ scope_check: {scope_check}")
406
+ print(f"✅ action_check: {action_check}")
407
+
408
+ if scope_check and action_check:
409
+ # Verify we have the required fields
410
+ required_fields = ["comment", "discussion", "repo"]
411
+ missing_fields = [field for field in required_fields if field not in payload]
412
+
413
+ if missing_fields:
414
+ error_msg = f"Missing required fields: {missing_fields}"
415
+ print(f"❌ {error_msg}")
416
+ return {"error": error_msg}
417
+
418
+ print(f"🚀 Processing webhook for repo: {payload['repo']['name']}")
419
+ background_tasks.add_task(process_webhook_comment, payload)
420
+ return {"status": "processing"}
421
+
422
+ print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
423
+ return {"status": "ignored"}
424
+
425
+
426
+ async def simulate_webhook(
427
+ repo_name: str, discussion_title: str, comment_content: str
428
+ ) -> str:
429
+ """Simulate webhook for testing"""
430
+ if not all([repo_name, discussion_title, comment_content]):
431
+ return "Please fill in all fields."
432
+
433
+ mock_payload = {
434
+ "event": {"action": "create", "scope": "discussion"},
435
+ "comment": {
436
+ "content": comment_content,
437
+ "author": {"id": "test-user-id"},
438
+ "id": "mock-comment-id",
439
+ "hidden": False,
440
+ },
441
+ "discussion": {
442
+ "title": discussion_title,
443
+ "num": len(tag_operations_store) + 1,
444
+ "id": "mock-discussion-id",
445
+ "status": "open",
446
+ "isPullRequest": False,
447
+ },
448
+ "repo": {
449
+ "name": repo_name,
450
+ "type": "model",
451
+ "private": False,
452
+ },
453
+ }
454
+
455
+ response = await process_webhook_comment(mock_payload)
456
+ return f"✅ Processed! Results: {response}"
457
+
458
+
459
+ def create_gradio_app():
460
+ """Create Gradio interface"""
461
+ with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as demo:
462
+ gr.Markdown("# 🏷️ HF Tagging Bot Dashboard")
463
+ gr.Markdown("*Automatically adds tags to models when mentioned in discussions*")
464
+
465
+ gr.Markdown("""
466
+ ## How it works:
467
+ - Monitors HuggingFace Hub discussions
468
+ - Detects tag mentions in comments (e.g., "tag: pytorch",
469
+ "#transformers")
470
+ - Automatically adds recognized tags to the model repository
471
+ - Supports common ML tags like: pytorch, tensorflow,
472
+ text-generation, etc.
473
+ """)
474
+
475
+ with gr.Column():
476
+ sim_repo = gr.Textbox(
477
+ label="Repository",
478
+ value="betki/MCP-Course-Model",
479
+ placeholder="username/model-name",
480
+ )
481
+ sim_title = gr.Textbox(
482
+ label="Discussion Title",
483
+ value="Add pytorch tag",
484
+ placeholder="Discussion title",
485
+ )
486
+ sim_comment = gr.Textbox(
487
+ label="Comment",
488
+ lines=3,
489
+ value="This model should have tags: pytorch, text-generation",
490
+ placeholder="Comment mentioning tags...",
491
+ )
492
+ sim_btn = gr.Button("🏷️ Test Tag Detection")
493
+
494
+ with gr.Column():
495
+ sim_result = gr.Textbox(label="Result", lines=8)
496
+
497
+ sim_btn.click(
498
+ fn=simulate_webhook,
499
+ inputs=[sim_repo, sim_title, sim_comment],
500
+ outputs=sim_result,
501
+ )
502
+
503
+ gr.Markdown(f"""
504
+ ## Recognized Tags:
505
+ {", ".join(sorted(RECOGNIZED_TAGS))}
506
+ """)
507
+
508
+ return demo
509
+
510
+ @app.get("/")
511
+ async def root():
512
+ """Root endpoint with basic information"""
513
+ return {
514
+ "name": "HF Tagging Bot",
515
+ "status": "running",
516
+ "description": "Webhook listener for automatic model tagging",
517
+ "endpoints": {
518
+ "webhook": "/webhook",
519
+ "health": "/health",
520
+ "operations": "/operations"
521
+ }
522
+ }
523
+
524
+ @app.get("/health")
525
+ async def health_check():
526
+ """Health check endpoint for monitoring"""
527
+ agent = await get_agent()
528
+
529
+ return {
530
+ "status": "healthy",
531
+ "timestamp": datetime.now().isoformat(),
532
+ "components": {
533
+ "webhook_secret": "configured" if WEBHOOK_SECRET else "missing",
534
+ "hf_token": "configured" if HF_TOKEN else "missing",
535
+ "mcp_agent": "ready" if agent else "not_ready"
536
+ }
537
+ }
538
+
539
+ @app.get("/operations")
540
+ async def get_operations():
541
+ """Get recent tag operations for monitoring"""
542
+ # Return last 50 operations
543
+ recent_ops = tag_operations_store[-50:] if tag_operations_store else []
544
+ return {
545
+ "total_operations": len(tag_operations_store),
546
+ "recent_operations": recent_ops
547
+ }
548
+
549
+ # Mount Gradio app
550
+ gradio_app = create_gradio_app()
551
+ app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
552
+
553
+
554
+ if __name__ == "__main__":
555
+ print("🚀 Starting HF Tagging Bot...")
556
+ print("📊 Dashboard: http://localhost:7860/gradio")
557
+ print("🔗 Webhook: http://localhost:7860/webhook")
558
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
mcp_server.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simplified MCP Server for HuggingFace Hub Tagging Operations using FastMCP
4
+ """
5
+
6
+ import asyncio
7
+ import os
8
+ import json
9
+ from fastmcp import FastMCP
10
+ from huggingface_hub import HfApi, model_info, ModelCard, ModelCardData
11
+ from huggingface_hub.utils import HfHubHTTPError
12
+ from dotenv import load_dotenv
13
+
14
+ load_dotenv()
15
+
16
+ # Configuration
17
+ HF_TOKEN = os.getenv("HF_TOKEN")
18
+
19
+ # Initialize HF API client
20
+ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
21
+
22
+ # Create the FastMCP server
23
+ mcp = FastMCP("hf-tagging-bot")
24
+
25
+ @mcp.tool()
26
+ def get_current_tags(repo_id: str) -> str:
27
+ """Get current tags from a HuggingFace model repository"""
28
+ print(f"🔧 get_current_tags called with repo_id: {repo_id}")
29
+
30
+ if not hf_api:
31
+ error_result = {"error": "HF token not configured"}
32
+ json_str = json.dumps(error_result)
33
+ print(f"❌ No HF API token - returning: {json_str}")
34
+ return json_str
35
+
36
+ try:
37
+ print(f"📡 Fetching model info for: {repo_id}")
38
+ info = model_info(repo_id=repo_id, token=HF_TOKEN)
39
+ current_tags = info.tags if info.tags else []
40
+ print(f"🏷️ Found {len(current_tags)} tags: {current_tags}")
41
+
42
+ result = {
43
+ "status": "success",
44
+ "repo_id": repo_id,
45
+ "current_tags": current_tags,
46
+ "count": len(current_tags),
47
+ }
48
+ json_str = json.dumps(result)
49
+ print(f"✅ get_current_tags returning: {json_str}")
50
+ return json_str
51
+
52
+ except Exception as e:
53
+ print(f"❌ Error in get_current_tags: {str(e)}")
54
+ error_result = {"status": "error", "repo_id": repo_id, "error": str(e)}
55
+ json_str = json.dumps(error_result)
56
+ print(f"❌ get_current_tags error returning: {json_str}")
57
+ return json_str
58
+
59
+
60
+ @mcp.tool()
61
+ def add_new_tag(repo_id: str, new_tag: str) -> str:
62
+ """Add a new tag to a HuggingFace model repository via PR"""
63
+ print(f"🔧 add_new_tag called with repo_id: {repo_id}, new_tag: {new_tag}")
64
+
65
+ if not hf_api:
66
+ error_result = {"error": "HF token not configured"}
67
+ json_str = json.dumps(error_result)
68
+ print(f"❌ No HF API token - returning: {json_str}")
69
+ return json_str
70
+
71
+ try:
72
+ # Get current model info and tags
73
+ print(f"📡 Fetching current model info for: {repo_id}")
74
+ info = model_info(repo_id=repo_id, token=HF_TOKEN)
75
+ current_tags = info.tags if info.tags else []
76
+ print(f"🏷️ Current tags: {current_tags}")
77
+
78
+ # Check if tag already exists
79
+ if new_tag in current_tags:
80
+ print(f"⚠️ Tag '{new_tag}' already exists in {current_tags}")
81
+ result = {
82
+ "status": "already_exists",
83
+ "repo_id": repo_id,
84
+ "tag": new_tag,
85
+ "message": f"Tag '{new_tag}' already exists",
86
+ }
87
+ json_str = json.dumps(result)
88
+ print(f"🏷️ add_new_tag (already exists) returning: {json_str}")
89
+ return json_str
90
+
91
+ # Add the new tag to existing tags
92
+ updated_tags = current_tags + [new_tag]
93
+ print(f"🆕 Will update tags from {current_tags} to {updated_tags}")
94
+
95
+ # Create model card content with updated tags
96
+ try:
97
+ # Load existing model card
98
+ print(f"📄 Loading existing model card...")
99
+ card = ModelCard.load(repo_id, token=HF_TOKEN)
100
+ if not hasattr(card, "data") or card.data is None:
101
+ card.data = ModelCardData()
102
+ except HfHubHTTPError:
103
+ # Create new model card if none exists
104
+ print(f"📄 Creating new model card (none exists)")
105
+ card = ModelCard("")
106
+ card.data = ModelCardData()
107
+
108
+ # Update tags - create new ModelCardData with updated tags
109
+ card_dict = card.data.to_dict()
110
+ card_dict["tags"] = updated_tags
111
+ card.data = ModelCardData(**card_dict)
112
+
113
+ # Create a pull request with the updated model card
114
+ pr_title = f"Add '{new_tag}' tag"
115
+ pr_description = f"""
116
+ ## Add tag: {new_tag}
117
+
118
+ This PR adds the `{new_tag}` tag to the model repository.
119
+
120
+ **Changes:**
121
+ - Added `{new_tag}` to model tags
122
+ - Updated from {len(current_tags)} to {len(updated_tags)} tags
123
+
124
+ **Current tags:** {", ".join(current_tags) if current_tags else "None"}
125
+ **New tags:** {", ".join(updated_tags)}
126
+ """
127
+
128
+ print(f"🚀 Creating PR with title: {pr_title}")
129
+
130
+ # Create commit with updated model card using CommitOperationAdd
131
+ from huggingface_hub import CommitOperationAdd
132
+
133
+ commit_info = hf_api.create_commit(
134
+ repo_id=repo_id,
135
+ operations=[
136
+ CommitOperationAdd(
137
+ path_in_repo="README.md", path_or_fileobj=str(card).encode("utf-8")
138
+ )
139
+ ],
140
+ commit_message=pr_title,
141
+ commit_description=pr_description,
142
+ token=HF_TOKEN,
143
+ create_pr=True,
144
+ )
145
+
146
+ # Extract PR URL from commit info
147
+ pr_url_attr = commit_info.pr_url
148
+ pr_url = pr_url_attr if hasattr(commit_info, "pr_url") else str(commit_info)
149
+
150
+ print(f"✅ PR created successfully! URL: {pr_url}")
151
+
152
+ result = {
153
+ "status": "success",
154
+ "repo_id": repo_id,
155
+ "tag": new_tag,
156
+ "pr_url": pr_url,
157
+ "previous_tags": current_tags,
158
+ "new_tags": updated_tags,
159
+ "message": f"Created PR to add tag '{new_tag}'",
160
+ }
161
+ json_str = json.dumps(result)
162
+ print(f"✅ add_new_tag success returning: {json_str}")
163
+ return json_str
164
+
165
+ except Exception as e:
166
+ print(f"❌ Error in add_new_tag: {str(e)}")
167
+ print(f"❌ Error type: {type(e)}")
168
+ import traceback
169
+
170
+ print(f"❌ Traceback: {traceback.format_exc()}")
171
+
172
+ error_result = {
173
+ "status": "error",
174
+ "repo_id": repo_id,
175
+ "tag": new_tag,
176
+ "error": str(e),
177
+ }
178
+ json_str = json.dumps(error_result)
179
+ print(f"❌ add_new_tag error returning: {json_str}")
180
+ return json_str
181
+
182
+ # Add a simple main block for direct execution
183
+ if __name__ == "__main__":
184
+ mcp.run()
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "unit3-hf-tagging-bot"
3
+ version = "0.1.0"
4
+ description = "FastAPI and Gradio app for Hugging Face Hub discussion webhooks"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "fastapi",
9
+ "fastmcp",
10
+ "gradio",
11
+ "huggingface-hub[mcp]",
12
+ "pydantic",
13
+ "python-multipart",
14
+ "requests"
15
+ ]
16
+
17
+ [build-system]
18
+ requires = ["hatchling"]
19
+ build-backend = "hatchling.build"
20
+
21
+ [tool.hatch.build.targets.wheel]
22
+ packages = ["src"]
uv.lock ADDED
The diff for this file is too large to render. See raw diff