ABDALLALSWAITI commited on
Commit
a855629
Β·
verified Β·
1 Parent(s): b6a9892

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -62
app.py CHANGED
@@ -4,9 +4,9 @@ from bs4 import BeautifulSoup
4
  import json
5
  from typing import List, Dict, Any, Optional
6
  import re
7
- from urllib.parse import urljoin, urlparse
8
  import time
9
- from functools import lru_cache
10
  import logging
11
  from datetime import datetime, timedelta
12
 
@@ -14,7 +14,8 @@ from datetime import datetime, timedelta
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- class HF_API: # Renamed class for brevity
 
18
  def __init__(self):
19
  self.base_url = "https://huggingface.co"
20
  self.docs_url = "https://huggingface.co/docs"
@@ -160,28 +161,325 @@ class HF_API: # Renamed class for brevity
160
  return content
161
 
162
  def search_documentation(self, query: str, max_results: int = 3) -> str:
163
- # ... (implementation remains the same)
164
- return f"Documentation for {query} with {max_results} results."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def get_model_info(self, model_name: str) -> str:
167
- # ... (implementation remains the same)
168
- return f"Info for model {model_name}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  def get_dataset_info(self, dataset_name: str) -> str:
171
- # ... (implementation remains the same)
172
- return f"Info for dataset {dataset_name}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  def search_models(self, task: str, limit: str = "5") -> str:
175
- # ... (implementation remains the same)
176
- return f"Models for task {task} with limit {limit}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  def get_transformers_docs(self, topic: str) -> str:
179
- # ... (implementation remains the same)
180
- return f"Transformer docs for {topic}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  def get_trending_models(self, limit: str = "10") -> str:
183
- # ... (implementation remains the same)
184
- return f"Trending models with limit {limit}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  # Initialize the API server
187
  hf_api = HF_API()
@@ -189,43 +487,32 @@ hf_api = HF_API()
189
  # --- Named Functions for Gradio UI ---
190
 
191
  def clear_output():
192
- """Clears the Gradio output component."""
193
  return ""
194
 
 
 
 
 
195
  # --- Doc Search Tab Functions ---
196
  def run_doc_search(query, max_results):
197
  return hf_api.search_documentation(query, int(max_results) if str(max_results).isdigit() else 2)
198
 
199
- def set_doc_query(text):
200
- return text
201
-
202
  # --- Model Info Tab Functions ---
203
  def run_model_info(model_name):
204
  return hf_api.get_model_info(model_name)
205
 
206
- def set_model_name(text):
207
- return text
208
-
209
  # --- Dataset Info Tab Functions ---
210
  def run_dataset_info(dataset_name):
211
  return hf_api.get_dataset_info(dataset_name)
212
 
213
- def set_dataset_name(text):
214
- return text
215
-
216
  # --- Model Search Tab Functions ---
217
  def run_model_search(task, limit):
218
  return hf_api.search_models(task, int(limit) if str(limit).isdigit() else 5)
219
 
220
- def set_search_task(text):
221
- return text
222
-
223
  # --- Transformers Docs Tab Functions ---
224
  def run_transformers_docs(topic):
225
  return hf_api.get_transformers_docs(topic)
226
-
227
- def set_transformer_topic(text):
228
- return text
229
 
230
  # --- Trending Models Tab Functions ---
231
  def run_trending_models(limit):
@@ -238,17 +525,8 @@ with gr.Blocks(
238
  title="πŸ€— Hugging Face Information Server",
239
  theme=gr.themes.Soft(),
240
  css="""
241
- .gradio-container {
242
- font-family: 'Inter', sans-serif;
243
- }
244
- .main-header {
245
- text-align: center;
246
- padding: 20px;
247
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
248
- color: white;
249
- border-radius: 10px;
250
- margin-bottom: 20px;
251
- }
252
  """) as demo:
253
  # Header
254
  with gr.Row():
@@ -272,10 +550,10 @@ with gr.Blocks(
272
  doc_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
273
  gr.Markdown("**Quick Examples:**")
274
  with gr.Row():
275
- gr.Button("Pipeline", size="sm").click(lambda: set_doc_query("pipeline"), outputs=doc_query)
276
- gr.Button("Tokenizer", size="sm").click(lambda: set_doc_query("tokenizer"), outputs=doc_query)
277
- gr.Button("Fine-tuning", size="sm").click(lambda: set_doc_query("fine-tuning"), outputs=doc_query)
278
- gr.Button("PEFT", size="sm").click(lambda: set_doc_query("peft"), outputs=doc_query)
279
 
280
  doc_btn.click(run_doc_search, inputs=[doc_query, doc_max_results], outputs=doc_output)
281
  doc_clear.click(clear_output, outputs=doc_output)
@@ -289,10 +567,10 @@ with gr.Blocks(
289
  model_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
290
  gr.Markdown("**Popular Models:**")
291
  with gr.Row():
292
- gr.Button("BERT", size="sm").click(lambda: set_model_name("bert-base-uncased"), outputs=model_name)
293
- gr.Button("GPT-2", size="sm").click(lambda: set_model_name("gpt2"), outputs=model_name)
294
- gr.Button("T5", size="sm").click(lambda: set_model_name("t5-small"), outputs=model_name)
295
- gr.Button("DistilBERT", size="sm").click(lambda: set_model_name("distilbert-base-uncased"), outputs=model_name)
296
 
297
  model_btn.click(run_model_info, inputs=model_name, outputs=model_output)
298
  model_clear.click(clear_output, outputs=model_output)
@@ -306,10 +584,10 @@ with gr.Blocks(
306
  dataset_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
307
  gr.Markdown("**Popular Datasets:**")
308
  with gr.Row():
309
- gr.Button("SQuAD", size="sm").click(lambda: set_dataset_name("squad"), outputs=dataset_name)
310
- gr.Button("IMDB", size="sm").click(lambda: set_dataset_name("imdb"), outputs=dataset_name)
311
- gr.Button("GLUE", size="sm").click(lambda: set_dataset_name("glue"), outputs=dataset_name)
312
- gr.Button("Common Voice", size="sm").click(lambda: set_dataset_name("common_voice"), outputs=dataset_name)
313
 
314
  dataset_btn.click(run_dataset_info, inputs=dataset_name, outputs=dataset_output)
315
  dataset_clear.click(clear_output, outputs=dataset_output)
@@ -327,10 +605,10 @@ with gr.Blocks(
327
  search_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
328
  gr.Markdown("**Popular Tasks:**")
329
  with gr.Row():
330
- gr.Button("Text Classification", size="sm").click(lambda: set_search_task("text-classification"), outputs=search_task)
331
- gr.Button("Question Answering", size="sm").click(lambda: set_search_task("question-answering"), outputs=search_task)
332
- gr.Button("Text Generation", size="sm").click(lambda: set_search_task("text-generation"), outputs=search_task)
333
- gr.Button("Image Classification", size="sm").click(lambda: set_search_task("image-classification"), outputs=search_task)
334
 
335
  search_btn.click(run_model_search, inputs=[search_task, search_limit], outputs=search_output)
336
  search_clear.click(clear_output, outputs=search_output)
@@ -344,10 +622,10 @@ with gr.Blocks(
344
  transformers_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
345
  gr.Markdown("**Core Topics:**")
346
  with gr.Row():
347
- gr.Button("Pipeline", size="sm").click(lambda: set_transformer_topic("pipeline"), outputs=transformers_topic)
348
- gr.Button("Tokenizer", size="sm").click(lambda: set_transformer_topic("tokenizer"), outputs=transformers_topic)
349
- gr.Button("Trainer", size="sm").click(lambda: set_transformer_topic("trainer"), outputs=transformers_topic)
350
- gr.Button("Generation", size="sm").click(lambda: set_transformer_topic("generation"), outputs=transformers_topic)
351
 
352
  transformers_btn.click(run_transformers_docs, inputs=transformers_topic, outputs=transformers_output)
353
  transformers_clear.click(clear_output, outputs=transformers_output)
 
4
  import json
5
  from typing import List, Dict, Any, Optional
6
  import re
7
+ from urllib.parse import urljoin
8
  import time
9
+ import functools
10
  import logging
11
  from datetime import datetime, timedelta
12
 
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Renamed class for brevity to avoid long tool names
18
+ class HF_API:
19
  def __init__(self):
20
  self.base_url = "https://huggingface.co"
21
  self.docs_url = "https://huggingface.co/docs"
 
161
  return content
162
 
163
  def search_documentation(self, query: str, max_results: int = 3) -> str:
164
+ """
165
+ Searches the official Hugging Face documentation for a specific topic and returns a summary.
166
+ This tool is useful for finding how-to guides, explanations of concepts like 'pipeline' or 'tokenizer', and usage examples.
167
+ Args:
168
+ query (str): The topic or keyword to search for in the documentation (e.g., 'fine-tuning', 'peft', 'datasets').
169
+ max_results (int): The maximum number of documentation pages to retrieve and summarize. Defaults to 3.
170
+ """
171
+ try:
172
+ max_results = int(max_results) if isinstance(max_results, str) else max_results
173
+ max_results = min(max_results, 5)
174
+ query_lower = query.lower().strip()
175
+ if not query_lower:
176
+ return "Please provide a search query."
177
+ doc_sections = {
178
+ 'transformers': {'base_url': 'https://huggingface.co/docs/transformers', 'topics': {'pipeline': '/main_classes/pipelines', 'tokenizer': '/main_classes/tokenizer', 'trainer': '/main_classes/trainer', 'model': '/main_classes/model', 'quicktour': '/quicktour', 'installation': '/installation', 'fine-tuning': '/training', 'training': '/training', 'inference': '/main_classes/pipelines', 'preprocessing': '/preprocessing', 'tutorial': '/tutorials', 'configuration': '/main_classes/configuration', 'peft': '/peft', 'lora': '/peft', 'quantization': '/main_classes/quantization', 'generation': '/main_classes/text_generation', 'optimization': '/perf_train_gpu_one', 'deployment': '/deployment', 'custom': '/custom_models'}},
179
+ 'datasets': {'base_url': 'https://huggingface.co/docs/datasets', 'topics': {'loading': '/load_hub', 'load': '/load_hub', 'processing': '/process', 'streaming': '/stream', 'audio': '/audio_process', 'image': '/image_process', 'text': '/nlp_process', 'arrow': '/about_arrow', 'cache': '/cache', 'upload': '/upload_dataset', 'custom': '/dataset_script'}},
180
+ 'diffusers': {'base_url': 'https://huggingface.co/docs/diffusers', 'topics': {'pipeline': '/using-diffusers/loading', 'stable diffusion': '/using-diffusers/stable_diffusion', 'controlnet': '/using-diffusers/controlnet', 'inpainting': '/using-diffusers/inpaint', 'training': '/training/overview', 'optimization': '/optimization/fp16', 'schedulers': '/using-diffusers/schedulers'}},
181
+ 'hub': {'base_url': 'https://huggingface.co/docs/hub', 'topics': {'repositories': '/repositories', 'git': '/repositories-getting-started', 'spaces': '/spaces', 'models': '/models', 'datasets': '/datasets'}}
182
+ }
183
+ relevant_urls = []
184
+ for section_name, section_data in doc_sections.items():
185
+ base_url = section_data['base_url']
186
+ topics = section_data['topics']
187
+ for topic, path in topics.items():
188
+ relevance = 0
189
+ if query_lower == topic.lower(): relevance = 1.0
190
+ elif query_lower in topic.lower(): relevance = 0.9
191
+ elif any(word in topic.lower() for word in query_lower.split()): relevance = 0.7
192
+ elif any(word in query_lower for word in topic.lower().split()): relevance = 0.6
193
+ if relevance > 0:
194
+ full_url = base_url + path
195
+ relevant_urls.append({'url': full_url, 'topic': topic, 'section': section_name, 'relevance': relevance})
196
+ relevant_urls.sort(key=lambda x: x['relevance'], reverse=True)
197
+ relevant_urls = relevant_urls[:max_results]
198
+ if not relevant_urls:
199
+ return f"❌ No documentation found for '{query}'. Try: pipeline, tokenizer, trainer, model, fine-tuning, datasets, diffusers, or peft."
200
+ result = f"# πŸ“š Hugging Face Documentation: {query}\n\n"
201
+ for i, url_info in enumerate(relevant_urls, 1):
202
+ section_emoji = {'transformers': 'πŸ€–', 'datasets': 'πŸ“Š', 'diffusers': '🎨', 'hub': '🌐'}.get(url_info['section'], 'πŸ“„')
203
+ result += f"## {i}. {section_emoji} {url_info['topic'].title()} ({url_info['section'].title()})\n\n"
204
+ content = self._fetch_with_retry(url_info['url'])
205
+ if content:
206
+ soup = BeautifulSoup(content, 'html.parser')
207
+ practical_content = self._extract_practical_content(soup, url_info['topic'])
208
+ if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
209
+ if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
210
+ if practical_content['code_examples']:
211
+ result += "**πŸ’» Code Examples:**\n\n"
212
+ for j, code_block in enumerate(practical_content['code_examples'][:3], 1):
213
+ lang = code_block.get('language', 'python')
214
+ code_type = code_block.get('type', 'example')
215
+ result += f"*{code_type.title()} {j}:*\n```{lang}\n{code_block['code']}\n```\n\n"
216
+ if practical_content['usage_instructions']:
217
+ result += "**πŸ› οΈ Usage Instructions:**\n"
218
+ for idx, instruction in enumerate(practical_content['usage_instructions'][:4], 1):
219
+ result += f"{idx}. {instruction}\n"
220
+ result += "\n"
221
+ if practical_content['parameters']:
222
+ result += "**βš™οΈ Parameters:**\n"
223
+ for param in practical_content['parameters'][:6]:
224
+ param_type = f" (`{param['type']}`)" if param.get('type') else ""
225
+ default_val = f" *Default: {param['default']}*" if param.get('default') else ""
226
+ result += f"β€’ **{param['name']}**{param_type}: {param['description']}{default_val}\n"
227
+ result += "\n"
228
+ result += f"**πŸ”— Full Documentation:** {url_info['url']}\n\n"
229
+ else:
230
+ result += f"⚠️ Could not fetch content. Visit directly: {url_info['url']}\n\n"
231
+ result += "---\n\n"
232
+ return result
233
+ except Exception as e:
234
+ logger.error(f"Error in search_documentation: {e}")
235
+ return f"❌ Error searching documentation: {str(e)}\n\nTry a simpler search term or check your internet connection."
236
 
237
  def get_model_info(self, model_name: str) -> str:
238
+ """
239
+ Fetches comprehensive information about a specific model from the Hugging Face Hub.
240
+ Provides statistics like downloads and likes, a description, usage examples, and a quick-start code snippet.
241
+ Args:
242
+ model_name (str): The full identifier of the model on the Hub, such as 'bert-base-uncased' or 'meta-llama/Llama-2-7b-hf'.
243
+ """
244
+ try:
245
+ model_name = model_name.strip()
246
+ if not model_name: return "Please provide a model name."
247
+ api_url = f"{self.api_url}/models/{model_name}"
248
+ response = self.session.get(api_url, timeout=15)
249
+ if response.status_code == 404: return f"❌ Model '{model_name}' not found. Please check the model name."
250
+ elif response.status_code != 200: return f"❌ Error fetching model info (Status: {response.status_code})"
251
+ model_data = response.json()
252
+ result = f"# πŸ€– Model: {model_name}\n\n"
253
+ downloads = model_data.get('downloads', 0)
254
+ likes = model_data.get('likes', 0)
255
+ task = model_data.get('pipeline_tag', 'N/A')
256
+ library = model_data.get('library_name', 'N/A')
257
+ result += f"**πŸ“Š Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Task:** {task}\nβ€’ **Library:** {library}\nβ€’ **Created:** {model_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {model_data.get('lastModified', 'N/A')[:10]}\n\n"
258
+ if 'tags' in model_data and model_data['tags']: result += f"**🏷️ Tags:** {', '.join(model_data['tags'][:10])}\n\n"
259
+ model_url = f"{self.base_url}/{model_name}"
260
+ page_content = self._fetch_with_retry(model_url)
261
+ if page_content:
262
+ soup = BeautifulSoup(page_content, 'html.parser')
263
+ readme_content = soup.find('div', class_=re.compile(r'prose|readme|model-card'))
264
+ if readme_content:
265
+ paragraphs = readme_content.find_all('p')[:3]
266
+ description_parts = []
267
+ for p in paragraphs:
268
+ text = p.get_text(strip=True)
269
+ if len(text) > 30 and not any(skip in text.lower() for skip in ['table of contents', 'toc']):
270
+ description_parts.append(text)
271
+ if description_parts:
272
+ description = ' '.join(description_parts)
273
+ result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
274
+ code_examples = self._extract_code_examples(soup)
275
+ if code_examples:
276
+ result += "**πŸ’» Usage Examples:**\n\n"
277
+ for i, code_block in enumerate(code_examples[:3], 1):
278
+ lang = code_block.get('language', 'python')
279
+ result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
280
+ if task and task != 'N/A':
281
+ result += f"**πŸš€ Quick Start Template:**\n"
282
+ if library == 'transformers':
283
+ result += f"```python\nfrom transformers import pipeline\n\n# Load the model\nmodel = pipeline('{task}', model='{model_name}')\n\n# Use the model\n# result = model(your_input_here)\n# print(result)\n```\n\n"
284
+ else:
285
+ result += f"```python\n# Load and use {model_name}\n# Refer to the documentation for specific usage\n```\n\n"
286
+ if 'siblings' in model_data:
287
+ files = [f['rfilename'] for f in model_data['siblings'][:10]]
288
+ if files:
289
+ result += f"**πŸ“ Model Files:** {', '.join(files)}\n\n"
290
+ result += f"**πŸ”— Model Page:** {model_url}\n"
291
+ return result
292
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
293
+ except Exception as e:
294
+ logger.error(f"Error in get_model_info: {e}")
295
+ return f"❌ Error fetching model info: {str(e)}"
296
 
297
  def get_dataset_info(self, dataset_name: str) -> str:
298
+ """
299
+ Retrieves detailed information about a specific dataset from the Hugging Face Hub.
300
+ Includes statistics, a description, and a quick-start code snippet showing how to load the dataset.
301
+ Args:
302
+ dataset_name (str): The full identifier of the dataset on the Hub, for example 'squad' or 'imdb'.
303
+ """
304
+ try:
305
+ dataset_name = dataset_name.strip()
306
+ if not dataset_name: return "Please provide a dataset name."
307
+ api_url = f"{self.api_url}/datasets/{dataset_name}"
308
+ response = self.session.get(api_url, timeout=15)
309
+ if response.status_code == 404: return f"❌ Dataset '{dataset_name}' not found. Please check the dataset name."
310
+ elif response.status_code != 200: return f"❌ Error fetching dataset info (Status: {response.status_code})"
311
+ dataset_data = response.json()
312
+ result = f"# πŸ“Š Dataset: {dataset_name}\n\n"
313
+ downloads = dataset_data.get('downloads', 0)
314
+ likes = dataset_data.get('likes', 0)
315
+ result += f"**πŸ“ˆ Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Created:** {dataset_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {dataset_data.get('lastModified', 'N/A')[:10]}\n\n"
316
+ if 'tags' in dataset_data and dataset_data['tags']: result += f"**🏷️ Tags:** {', '.join(dataset_data['tags'][:10])}\n\n"
317
+ dataset_url = f"{self.base_url}/datasets/{dataset_name}"
318
+ page_content = self._fetch_with_retry(dataset_url)
319
+ if page_content:
320
+ soup = BeautifulSoup(page_content, 'html.parser')
321
+ readme_content = soup.find('div', class_=re.compile(r'prose|readme|dataset-card'))
322
+ if readme_content:
323
+ paragraphs = readme_content.find_all('p')[:3]
324
+ description_parts = []
325
+ for p in paragraphs:
326
+ text = p.get_text(strip=True)
327
+ if len(text) > 30: description_parts.append(text)
328
+ if description_parts:
329
+ description = ' '.join(description_parts)
330
+ result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
331
+ code_examples = self._extract_code_examples(soup)
332
+ if code_examples:
333
+ result += "**πŸ’» Usage Examples:**\n\n"
334
+ for i, code_block in enumerate(code_examples[:3], 1):
335
+ lang = code_block.get('language', 'python')
336
+ result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
337
+ result += f"**πŸš€ Quick Start Template:**\n"
338
+ result += f"```python\nfrom datasets import load_dataset\n\n# Load the dataset\ndataset = load_dataset('{dataset_name}')\n\n# Explore the dataset\n# print(dataset)\n# print(f\"Dataset keys: {{list(dataset.keys())}}\")\n\n# Access first example\n# if 'train' in dataset:\n# print(\"First example:\")\n# print(dataset['train'][0])\n```\n\n"
339
+ result += f"**πŸ”— Dataset Page:** {dataset_url}\n"
340
+ return result
341
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
342
+ except Exception as e:
343
+ logger.error(f"Error in get_dataset_info: {e}")
344
+ return f"❌ Error fetching dataset info: {str(e)}"
345
 
346
  def search_models(self, task: str, limit: str = "5") -> str:
347
+ """
348
+ Searches the Hugging Face Hub for models based on a specified task or keyword and returns a list of top models.
349
+ Each result includes statistics and a quick usage example.
350
+ Args:
351
+ task (str): The task to search for, such as 'text-classification', 'image-generation', or 'question-answering'.
352
+ limit (str): The maximum number of models to return. Defaults to '5'.
353
+ """
354
+ try:
355
+ task = task.strip()
356
+ if not task: return "Please provide a search task or keyword."
357
+ limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 5
358
+ limit = min(max(limit, 1), 10)
359
+ params = {'search': task, 'limit': limit * 3, 'sort': 'downloads', 'direction': -1}
360
+ response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
361
+ response.raise_for_status()
362
+ models = response.json()
363
+ if not models: return f"❌ No models found for task: '{task}'. Try different keywords."
364
+ filtered_models = []
365
+ for model in models:
366
+ if (model.get('downloads', 0) > 0 or model.get('likes', 0) > 0 or 'pipeline_tag' in model):
367
+ filtered_models.append(model)
368
+ if len(filtered_models) >= limit: break
369
+ if not filtered_models: filtered_models = models[:limit]
370
+ result = f"# πŸ” Top {len(filtered_models)} Models for '{task}'\n\n"
371
+ for i, model in enumerate(filtered_models, 1):
372
+ model_id = model.get('id', 'Unknown')
373
+ downloads = model.get('downloads', 0)
374
+ likes = model.get('likes', 0)
375
+ task_type = model.get('pipeline_tag', 'N/A')
376
+ library = model.get('library_name', 'N/A')
377
+ quality_score = ""
378
+ if downloads > 10000: quality_score = "⭐ Popular"
379
+ elif downloads > 1000: quality_score = "πŸ”₯ Active"
380
+ elif likes > 10: quality_score = "πŸ‘ Liked"
381
+ result += f"## {i}. {model_id} {quality_score}\n\n"
382
+ result += f"**πŸ“Š Stats:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes}\nβ€’ **Task:** {task_type}\nβ€’ **Library:** {library}\n\n"
383
+ if task_type and task_type != 'N/A':
384
+ result += f"**πŸš€ Quick Usage:**\n"
385
+ if library == 'transformers':
386
+ result += f"```python\nfrom transformers import pipeline\n\n# Load model\nmodel = pipeline('{task_type}', model='{model_id}')\n\n# Use model\n# result = model(\"Your input here\")\n# print(result)\n```\n\n"
387
+ else:
388
+ result += f"```python\n# Load and use {model_id}\n# Check model page for specific usage instructions\n```\n\n"
389
+ result += f"**πŸ”— Model Page:** {self.base_url}/{model_id}\n\n---\n\n"
390
+ return result
391
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
392
+ except Exception as e:
393
+ logger.error(f"Error in search_models: {e}")
394
+ return f"❌ Error searching models: {str(e)}"
395
 
396
  def get_transformers_docs(self, topic: str) -> str:
397
+ """
398
+ Fetches detailed documentation specifically for the Hugging Face Transformers library on a given topic.
399
+ This provides in-depth explanations, code examples, and parameter descriptions for core library components.
400
+ Args:
401
+ topic (str): The Transformers library topic to look up, such as 'pipeline', 'tokenizer', 'trainer', or 'generation'.
402
+ """
403
+ try:
404
+ topic = topic.strip().lower()
405
+ if not topic: return "Please provide a topic to search for."
406
+ docs_url = "https://huggingface.co/docs/transformers"
407
+ topic_map = {'pipeline': f"{docs_url}/main_classes/pipelines", 'pipelines': f"{docs_url}/main_classes/pipelines", 'tokenizer': f"{docs_url}/main_classes/tokenizer", 'tokenizers': f"{docs_url}/main_classes/tokenizer", 'trainer': f"{docs_url}/main_classes/trainer", 'training': f"{docs_url}/training", 'model': f"{docs_url}/main_classes/model", 'models': f"{docs_url}/main_classes/model", 'configuration': f"{docs_url}/main_classes/configuration", 'config': f"{docs_url}/main_classes/configuration", 'quicktour': f"{docs_url}/quicktour", 'quick': f"{docs_url}/quicktour", 'installation': f"{docs_url}/installation", 'install': f"{docs_url}/installation", 'tutorial': f"{docs_url}/tutorials", 'tutorials': f"{docs_url}/tutorials", 'generation': f"{docs_url}/main_classes/text_generation", 'text_generation': f"{docs_url}/main_classes/text_generation", 'preprocessing': f"{docs_url}/preprocessing", 'preprocess': f"{docs_url}/preprocessing", 'peft': f"{docs_url}/peft", 'lora': f"{docs_url}/peft", 'quantization': f"{docs_url}/main_classes/quantization", 'optimization': f"{docs_url}/perf_train_gpu_one", 'performance': f"{docs_url}/perf_train_gpu_one", 'deployment': f"{docs_url}/deployment", 'custom': f"{docs_url}/custom_models", 'fine-tuning': f"{docs_url}/training", 'finetuning': f"{docs_url}/training"}
408
+ url = topic_map.get(topic)
409
+ if not url:
410
+ for key, value in topic_map.items():
411
+ if topic in key or key in topic:
412
+ url = value
413
+ topic = key
414
+ break
415
+ if not url:
416
+ url = f"{docs_url}/quicktour"
417
+ topic = "quicktour"
418
+ content = self._fetch_with_retry(url)
419
+ if not content: return f"❌ Could not fetch documentation for '{topic}'. Please try again or visit: {url}"
420
+ soup = BeautifulSoup(content, 'html.parser')
421
+ practical_content = self._extract_practical_content(soup, topic)
422
+ result = f"# πŸ“š Transformers Documentation: {topic.replace('_', ' ').title()}\n\n"
423
+ if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
424
+ if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
425
+ if practical_content['code_examples']:
426
+ result += "**πŸ’» Code Examples:**\n\n"
427
+ for i, code_block in enumerate(practical_content['code_examples'][:4], 1):
428
+ lang = code_block.get('language', 'python')
429
+ code_type = code_block.get('type', 'example')
430
+ result += f"### {code_type.title()} {i}:\n```{lang}\n{code_block['code']}\n```\n\n"
431
+ if practical_content['usage_instructions']:
432
+ result += "**πŸ› οΈ Step-by-Step Usage:**\n"
433
+ for i, instruction in enumerate(practical_content['usage_instructions'][:6], 1):
434
+ result += f"{i}. {instruction}\n"
435
+ result += "\n"
436
+ if practical_content['parameters']:
437
+ result += "**βš™οΈ Key Parameters:**\n"
438
+ for param in practical_content['parameters'][:10]:
439
+ param_type = f" (`{param['type']}`)" if param.get('type') else ""
440
+ default_val = f" *Default: `{param['default']}`*" if param.get('default') else ""
441
+ result += f"β€’ **`{param['name']}`**{param_type}: {param['description']}{default_val}\n"
442
+ result += "\n"
443
+ related_topics = [k for k in topic_map.keys() if k != topic][:5]
444
+ if related_topics: result += f"**πŸ”— Related Topics:** {', '.join(related_topics)}\n\n"
445
+ result += f"**πŸ“„ Full Documentation:** {url}\n"
446
+ return result
447
+ except Exception as e:
448
+ logger.error(f"Error in get_transformers_docs: {e}")
449
+ return f"❌ Error fetching Transformers documentation: {str(e)}"
450
 
451
  def get_trending_models(self, limit: str = "10") -> str:
452
+ """
453
+ Fetches a list of the most downloaded models currently trending on the Hugging Face Hub.
454
+ This is useful for discovering popular and widely-used models.
455
+ Args:
456
+ limit (str): The number of trending models to return. Defaults to '10'.
457
+ """
458
+ try:
459
+ limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 10
460
+ limit = min(max(limit, 1), 20)
461
+ params = {'sort': 'downloads', 'direction': -1, 'limit': limit}
462
+ response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
463
+ response.raise_for_status()
464
+ models = response.json()
465
+ if not models: return "❌ Could not fetch trending models."
466
+ result = f"# πŸ”₯ Trending Models (Top {len(models)})\n\n"
467
+ for i, model in enumerate(models, 1):
468
+ model_id = model.get('id', 'Unknown')
469
+ downloads = model.get('downloads', 0)
470
+ likes = model.get('likes', 0)
471
+ task = model.get('pipeline_tag', 'N/A')
472
+ if downloads > 1000000: trend = "πŸš€ Mega Popular"
473
+ elif downloads > 100000: trend = "πŸ”₯ Very Popular"
474
+ elif downloads > 10000: trend = "⭐ Popular"
475
+ else: trend = "πŸ“ˆ Trending"
476
+ result += f"## {i}. {model_id} {trend}\n"
477
+ result += f"β€’ **Downloads:** {downloads:,} | **Likes:** {likes} | **Task:** {task}\n"
478
+ result += f"β€’ **Link:** {self.base_url}/{model_id}\n\n"
479
+ return result
480
+ except Exception as e:
481
+ logger.error(f"Error in get_trending_models: {e}")
482
+ return f"❌ Error fetching trending models: {str(e)}"
483
 
484
  # Initialize the API server
485
  hf_api = HF_API()
 
487
  # --- Named Functions for Gradio UI ---
488
 
489
  def clear_output():
490
+ """Clears a Gradio output component."""
491
  return ""
492
 
493
+ def set_textbox_value(text):
494
+ """Sets a Gradio Textbox to a specific value."""
495
+ return text
496
+
497
  # --- Doc Search Tab Functions ---
498
  def run_doc_search(query, max_results):
499
  return hf_api.search_documentation(query, int(max_results) if str(max_results).isdigit() else 2)
500
 
 
 
 
501
  # --- Model Info Tab Functions ---
502
  def run_model_info(model_name):
503
  return hf_api.get_model_info(model_name)
504
 
 
 
 
505
  # --- Dataset Info Tab Functions ---
506
  def run_dataset_info(dataset_name):
507
  return hf_api.get_dataset_info(dataset_name)
508
 
 
 
 
509
  # --- Model Search Tab Functions ---
510
  def run_model_search(task, limit):
511
  return hf_api.search_models(task, int(limit) if str(limit).isdigit() else 5)
512
 
 
 
 
513
  # --- Transformers Docs Tab Functions ---
514
  def run_transformers_docs(topic):
515
  return hf_api.get_transformers_docs(topic)
 
 
 
516
 
517
  # --- Trending Models Tab Functions ---
518
  def run_trending_models(limit):
 
525
  title="πŸ€— Hugging Face Information Server",
526
  theme=gr.themes.Soft(),
527
  css="""
528
+ .gradio-container { font-family: 'Inter', sans-serif; }
529
+ .main-header { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; }
 
 
 
 
 
 
 
 
 
530
  """) as demo:
531
  # Header
532
  with gr.Row():
 
550
  doc_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
551
  gr.Markdown("**Quick Examples:**")
552
  with gr.Row():
553
+ gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=doc_query)
554
+ gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=doc_query)
555
+ gr.Button("Fine-tuning", size="sm").click(functools.partial(set_textbox_value, "fine-tuning"), outputs=doc_query)
556
+ gr.Button("PEFT", size="sm").click(functools.partial(set_textbox_value, "peft"), outputs=doc_query)
557
 
558
  doc_btn.click(run_doc_search, inputs=[doc_query, doc_max_results], outputs=doc_output)
559
  doc_clear.click(clear_output, outputs=doc_output)
 
567
  model_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
568
  gr.Markdown("**Popular Models:**")
569
  with gr.Row():
570
+ gr.Button("BERT", size="sm").click(functools.partial(set_textbox_value, "bert-base-uncased"), outputs=model_name)
571
+ gr.Button("GPT-2", size="sm").click(functools.partial(set_textbox_value, "gpt2"), outputs=model_name)
572
+ gr.Button("T5", size="sm").click(functools.partial(set_textbox_value, "t5-small"), outputs=model_name)
573
+ gr.Button("DistilBERT", size="sm").click(functools.partial(set_textbox_value, "distilbert-base-uncased"), outputs=model_name)
574
 
575
  model_btn.click(run_model_info, inputs=model_name, outputs=model_output)
576
  model_clear.click(clear_output, outputs=model_output)
 
584
  dataset_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
585
  gr.Markdown("**Popular Datasets:**")
586
  with gr.Row():
587
+ gr.Button("SQuAD", size="sm").click(functools.partial(set_textbox_value, "squad"), outputs=dataset_name)
588
+ gr.Button("IMDB", size="sm").click(functools.partial(set_textbox_value, "imdb"), outputs=dataset_name)
589
+ gr.Button("GLUE", size="sm").click(functools.partial(set_textbox_value, "glue"), outputs=dataset_name)
590
+ gr.Button("Common Voice", size="sm").click(functools.partial(set_textbox_value, "common_voice"), outputs=dataset_name)
591
 
592
  dataset_btn.click(run_dataset_info, inputs=dataset_name, outputs=dataset_output)
593
  dataset_clear.click(clear_output, outputs=dataset_output)
 
605
  search_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
606
  gr.Markdown("**Popular Tasks:**")
607
  with gr.Row():
608
+ gr.Button("Text Classification", size="sm").click(functools.partial(set_textbox_value, "text-classification"), outputs=search_task)
609
+ gr.Button("Question Answering", size="sm").click(functools.partial(set_textbox_value, "question-answering"), outputs=search_task)
610
+ gr.Button("Text Generation", size="sm").click(functools.partial(set_textbox_value, "text-generation"), outputs=search_task)
611
+ gr.Button("Image Classification", size="sm").click(functools.partial(set_textbox_value, "image-classification"), outputs=search_task)
612
 
613
  search_btn.click(run_model_search, inputs=[search_task, search_limit], outputs=search_output)
614
  search_clear.click(clear_output, outputs=search_output)
 
622
  transformers_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
623
  gr.Markdown("**Core Topics:**")
624
  with gr.Row():
625
+ gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=transformers_topic)
626
+ gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=transformers_topic)
627
+ gr.Button("Trainer", size="sm").click(functools.partial(set_textbox_value, "trainer"), outputs=transformers_topic)
628
+ gr.Button("Generation", size="sm").click(functools.partial(set_textbox_value, "generation"), outputs=transformers_topic)
629
 
630
  transformers_btn.click(run_transformers_docs, inputs=transformers_topic, outputs=transformers_output)
631
  transformers_clear.click(clear_output, outputs=transformers_output)