File size: 42,495 Bytes
edca24f
 
 
 
 
 
a855629
edca24f
a855629
edca24f
 
 
 
 
 
 
a855629
 
edca24f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
edca24f
 
 
d00c422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
a855629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95c4da0
 
 
 
 
 
 
a855629
95c4da0
 
a855629
 
 
 
95c4da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00c422
 
 
 
a855629
 
d00c422
95c4da0
d00c422
 
 
 
 
 
 
95c4da0
d00c422
 
 
 
 
 
 
 
 
 
 
 
 
a855629
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
 
 
a855629
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
 
 
a855629
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
 
 
 
 
 
 
a855629
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
 
 
a855629
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
95c4da0
 
 
 
d00c422
 
 
 
 
 
 
b366845
d00c422
 
 
 
 
 
95c4da0
d00c422
c4a3626
b366845
edca24f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
import gradio as gr
import requests
from bs4 import BeautifulSoup
import json
from typing import List, Dict, Any, Optional
import re
from urllib.parse import urljoin
import time
import functools
import logging
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Renamed class for brevity to avoid long tool names
class HF_API:
    def __init__(self):
        self.base_url = "https://huggingface.co"
        self.docs_url = "https://huggingface.co/docs"
        self.api_url = "https://huggingface.co/api"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'HF-Info-Server/1.0 (Educational Purpose)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        })
        self.cache = {}
        self.cache_ttl = 3600  # 1 hour cache TTL

    def _is_cache_valid(self, cache_key: str) -> bool:
        if cache_key not in self.cache:
            return False
        cache_time = self.cache[cache_key].get('timestamp', 0)
        return time.time() - cache_time < self.cache_ttl

    def _get_from_cache(self, cache_key: str) -> Optional[str]:
        if self._is_cache_valid(cache_key):
            return self.cache[cache_key]['content']
        return None

    def _store_in_cache(self, cache_key: str, content: str):
        self.cache[cache_key] = {
            'content': content,
            'timestamp': time.time()
        }

    def _fetch_with_retry(self, url: str, max_retries: int = 3) -> Optional[str]:
        cache_key = f"url_{hash(url)}"
        cached_content = self._get_from_cache(cache_key)
        if cached_content:
            logger.info(f"Cache hit for {url}")
            return cached_content
        for attempt in range(max_retries):
            try:
                logger.info(f"Fetching {url} (attempt {attempt + 1})")
                response = self.session.get(url, timeout=20)
                response.raise_for_status()
                content = response.text
                self._store_in_cache(cache_key, content)
                return content
            except requests.exceptions.RequestException as e:
                logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)
                else:
                    logger.error(f"All attempts failed for {url}")
                    return None
        return None

    def _extract_code_examples(self, soup: BeautifulSoup) -> List[Dict[str, str]]:
        code_blocks = []
        code_elements = soup.find_all(['code', 'pre'])
        for code_elem in code_elements:
            lang_class = code_elem.get('class', [])
            language = 'python'
            for cls in lang_class:
                if 'language-' in str(cls):
                    language = str(cls).replace('language-', '')
                    break
                elif any(lang in str(cls).lower() for lang in ['python', 'bash', 'javascript', 'json']):
                    language = str(cls).lower()
                    break
            code_text = code_elem.get_text(strip=True)
            if len(code_text) > 20 and any(keyword in code_text.lower() for keyword in ['import', 'from', 'def', 'class', 'pip install', 'transformers']):
                code_blocks.append({'code': code_text, 'language': language, 'type': 'usage' if any(word in code_text.lower() for word in ['import', 'load', 'pipeline']) else 'example'})
        highlight_blocks = soup.find_all('div', class_=re.compile(r'highlight|code-block|language'))
        for block in highlight_blocks:
            code_text = block.get_text(strip=True)
            if len(code_text) > 20:
                code_blocks.append({'code': code_text, 'language': 'python', 'type': 'example'})
        seen = set()
        unique_blocks = []
        for block in code_blocks:
            code_hash = hash(block['code'][:100])
            if code_hash not in seen:
                seen.add(code_hash)
                unique_blocks.append(block)
                if len(unique_blocks) >= 5:
                    break
        return unique_blocks

    def _extract_practical_content(self, soup: BeautifulSoup, topic: str) -> Dict[str, Any]:
        content = {'overview': '', 'code_examples': [], 'usage_instructions': [], 'parameters': [], 'methods': [], 'installation': '', 'quickstart': ''}
        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|docs|prose'))
        if not main_content:
            return content
        overview_sections = main_content.find_all('p', limit=5)
        overview_texts = []
        for p in overview_sections:
            text = p.get_text(strip=True)
            if len(text) > 30 and not text.startswith('Table of contents'):
                overview_texts.append(text)
        if overview_texts:
            overview = ' '.join(overview_texts)
            content['overview'] = overview[:1000] + "..." if len(overview) > 1000 else overview
        content['code_examples'] = self._extract_code_examples(main_content)
        install_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'], string=re.compile(r'install|setup|getting started', re.IGNORECASE))
        for heading in install_headings:
            next_elem = heading.find_next_sibling()
            install_text = []
            while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4'] and len(install_text) < 3:
                if next_elem.name in ['p', 'pre', 'code']:
                    text = next_elem.get_text(strip=True)
                    if text and len(text) > 10:
                        install_text.append(text)
                next_elem = next_elem.find_next_sibling()
            if install_text:
                content['installation'] = ' '.join(install_text)
                break
        usage_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'])
        for heading in usage_headings:
            heading_text = heading.get_text(strip=True).lower()
            if any(keyword in heading_text for keyword in ['usage', 'example', 'how to', 'quickstart', 'getting started']):
                next_elem = heading.find_next_sibling()
                instruction_parts = []
                while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4']:
                    if next_elem.name in ['p', 'li', 'div', 'ol', 'ul']:
                        text = next_elem.get_text(strip=True)
                        if text and len(text) > 15:
                            instruction_parts.append(text)
                    next_elem = next_elem.find_next_sibling()
                    if len(instruction_parts) >= 5:
                        break
                if instruction_parts:
                    content['usage_instructions'].extend(instruction_parts)
        tables = main_content.find_all('table')
        for table in tables:
            headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
            if any(keyword in ' '.join(headers) for keyword in ['parameter', 'argument', 'option', 'attribute', 'name', 'type']):
                rows = table.find_all('tr')[1:]
                for row in rows[:8]:
                    cells = [td.get_text(strip=True) for td in row.find_all('td')]
                    if len(cells) >= 2:
                        param_info = {'name': cells[0], 'description': cells[1] if len(cells) > 1 else '', 'type': cells[2] if len(cells) > 2 else '', 'default': cells[3] if len(cells) > 3 else ''}
                        content['parameters'].append(param_info)
        return content

    def search_documentation(self, query: str, max_results: int = 3) -> str:
        """
        Searches the official Hugging Face documentation for a specific topic and returns a summary.
        This tool is useful for finding how-to guides, explanations of concepts like 'pipeline' or 'tokenizer', and usage examples.
        Args:
            query (str): The topic or keyword to search for in the documentation (e.g., 'fine-tuning', 'peft', 'datasets').
            max_results (int): The maximum number of documentation pages to retrieve and summarize. Defaults to 3.
        """
        try:
            max_results = int(max_results) if isinstance(max_results, str) else max_results
            max_results = min(max_results, 5)
            query_lower = query.lower().strip()
            if not query_lower:
                return "Please provide a search query."
            doc_sections = {
                'transformers': {'base_url': 'https://huggingface.co/docs/transformers', 'topics': {'pipeline': '/main_classes/pipelines', 'tokenizer': '/main_classes/tokenizer', 'trainer': '/main_classes/trainer', 'model': '/main_classes/model', 'quicktour': '/quicktour', 'installation': '/installation', 'fine-tuning': '/training', 'training': '/training', 'inference': '/main_classes/pipelines', 'preprocessing': '/preprocessing', 'tutorial': '/tutorials', 'configuration': '/main_classes/configuration', 'peft': '/peft', 'lora': '/peft', 'quantization': '/main_classes/quantization', 'generation': '/main_classes/text_generation', 'optimization': '/perf_train_gpu_one', 'deployment': '/deployment', 'custom': '/custom_models'}},
                'datasets': {'base_url': 'https://huggingface.co/docs/datasets', 'topics': {'loading': '/load_hub', 'load': '/load_hub', 'processing': '/process', 'streaming': '/stream', 'audio': '/audio_process', 'image': '/image_process', 'text': '/nlp_process', 'arrow': '/about_arrow', 'cache': '/cache', 'upload': '/upload_dataset', 'custom': '/dataset_script'}},
                'diffusers': {'base_url': 'https://huggingface.co/docs/diffusers', 'topics': {'pipeline': '/using-diffusers/loading', 'stable diffusion': '/using-diffusers/stable_diffusion', 'controlnet': '/using-diffusers/controlnet', 'inpainting': '/using-diffusers/inpaint', 'training': '/training/overview', 'optimization': '/optimization/fp16', 'schedulers': '/using-diffusers/schedulers'}},
                'hub': {'base_url': 'https://huggingface.co/docs/hub', 'topics': {'repositories': '/repositories', 'git': '/repositories-getting-started', 'spaces': '/spaces', 'models': '/models', 'datasets': '/datasets'}}
            }
            relevant_urls = []
            for section_name, section_data in doc_sections.items():
                base_url = section_data['base_url']
                topics = section_data['topics']
                for topic, path in topics.items():
                    relevance = 0
                    if query_lower == topic.lower(): relevance = 1.0
                    elif query_lower in topic.lower(): relevance = 0.9
                    elif any(word in topic.lower() for word in query_lower.split()): relevance = 0.7
                    elif any(word in query_lower for word in topic.lower().split()): relevance = 0.6
                    if relevance > 0:
                        full_url = base_url + path
                        relevant_urls.append({'url': full_url, 'topic': topic, 'section': section_name, 'relevance': relevance})
            relevant_urls.sort(key=lambda x: x['relevance'], reverse=True)
            relevant_urls = relevant_urls[:max_results]
            if not relevant_urls:
                return f"❌ No documentation found for '{query}'. Try: pipeline, tokenizer, trainer, model, fine-tuning, datasets, diffusers, or peft."
            result = f"# πŸ“š Hugging Face Documentation: {query}\n\n"
            for i, url_info in enumerate(relevant_urls, 1):
                section_emoji = {'transformers': 'πŸ€–', 'datasets': 'πŸ“Š', 'diffusers': '🎨', 'hub': '🌐'}.get(url_info['section'], 'πŸ“„')
                result += f"## {i}. {section_emoji} {url_info['topic'].title()} ({url_info['section'].title()})\n\n"
                content = self._fetch_with_retry(url_info['url'])
                if content:
                    soup = BeautifulSoup(content, 'html.parser')
                    practical_content = self._extract_practical_content(soup, url_info['topic'])
                    if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
                    if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
                    if practical_content['code_examples']:
                        result += "**πŸ’» Code Examples:**\n\n"
                        for j, code_block in enumerate(practical_content['code_examples'][:3], 1):
                            lang = code_block.get('language', 'python')
                            code_type = code_block.get('type', 'example')
                            result += f"*{code_type.title()} {j}:*\n```{lang}\n{code_block['code']}\n```\n\n"
                    if practical_content['usage_instructions']:
                        result += "**πŸ› οΈ Usage Instructions:**\n"
                        for idx, instruction in enumerate(practical_content['usage_instructions'][:4], 1):
                            result += f"{idx}. {instruction}\n"
                        result += "\n"
                    if practical_content['parameters']:
                        result += "**βš™οΈ Parameters:**\n"
                        for param in practical_content['parameters'][:6]:
                            param_type = f" (`{param['type']}`)" if param.get('type') else ""
                            default_val = f" *Default: {param['default']}*" if param.get('default') else ""
                            result += f"β€’ **{param['name']}**{param_type}: {param['description']}{default_val}\n"
                        result += "\n"
                    result += f"**πŸ”— Full Documentation:** {url_info['url']}\n\n"
                else:
                    result += f"⚠️ Could not fetch content. Visit directly: {url_info['url']}\n\n"
                result += "---\n\n"
            return result
        except Exception as e:
            logger.error(f"Error in search_documentation: {e}")
            return f"❌ Error searching documentation: {str(e)}\n\nTry a simpler search term or check your internet connection."

    def get_model_info(self, model_name: str) -> str:
        """
        Fetches comprehensive information about a specific model from the Hugging Face Hub.
        Provides statistics like downloads and likes, a description, usage examples, and a quick-start code snippet.
        Args:
            model_name (str): The full identifier of the model on the Hub, such as 'bert-base-uncased' or 'meta-llama/Llama-2-7b-hf'.
        """
        try:
            model_name = model_name.strip()
            if not model_name: return "Please provide a model name."
            api_url = f"{self.api_url}/models/{model_name}"
            response = self.session.get(api_url, timeout=15)
            if response.status_code == 404: return f"❌ Model '{model_name}' not found. Please check the model name."
            elif response.status_code != 200: return f"❌ Error fetching model info (Status: {response.status_code})"
            model_data = response.json()
            result = f"# πŸ€– Model: {model_name}\n\n"
            downloads = model_data.get('downloads', 0)
            likes = model_data.get('likes', 0)
            task = model_data.get('pipeline_tag', 'N/A')
            library = model_data.get('library_name', 'N/A')
            result += f"**πŸ“Š Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Task:** {task}\nβ€’ **Library:** {library}\nβ€’ **Created:** {model_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {model_data.get('lastModified', 'N/A')[:10]}\n\n"
            if 'tags' in model_data and model_data['tags']: result += f"**🏷️ Tags:** {', '.join(model_data['tags'][:10])}\n\n"
            model_url = f"{self.base_url}/{model_name}"
            page_content = self._fetch_with_retry(model_url)
            if page_content:
                soup = BeautifulSoup(page_content, 'html.parser')
                readme_content = soup.find('div', class_=re.compile(r'prose|readme|model-card'))
                if readme_content:
                    paragraphs = readme_content.find_all('p')[:3]
                    description_parts = []
                    for p in paragraphs:
                        text = p.get_text(strip=True)
                        if len(text) > 30 and not any(skip in text.lower() for skip in ['table of contents', 'toc']):
                            description_parts.append(text)
                    if description_parts:
                        description = ' '.join(description_parts)
                        result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
                code_examples = self._extract_code_examples(soup)
                if code_examples:
                    result += "**πŸ’» Usage Examples:**\n\n"
                    for i, code_block in enumerate(code_examples[:3], 1):
                        lang = code_block.get('language', 'python')
                        result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
            if task and task != 'N/A':
                result += f"**πŸš€ Quick Start Template:**\n"
                if library == 'transformers':
                    result += f"```python\nfrom transformers import pipeline\n\n# Load the model\nmodel = pipeline('{task}', model='{model_name}')\n\n# Use the model\n# result = model(your_input_here)\n# print(result)\n```\n\n"
                else:
                    result += f"```python\n# Load and use {model_name}\n# Refer to the documentation for specific usage\n```\n\n"
            if 'siblings' in model_data:
                files = [f['rfilename'] for f in model_data['siblings'][:10]]
                if files:
                    result += f"**πŸ“ Model Files:** {', '.join(files)}\n\n"
            result += f"**πŸ”— Model Page:** {model_url}\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in get_model_info: {e}")
            return f"❌ Error fetching model info: {str(e)}"

    def get_dataset_info(self, dataset_name: str) -> str:
        """
        Retrieves detailed information about a specific dataset from the Hugging Face Hub.
        Includes statistics, a description, and a quick-start code snippet showing how to load the dataset.
        Args:
            dataset_name (str): The full identifier of the dataset on the Hub, for example 'squad' or 'imdb'.
        """
        try:
            dataset_name = dataset_name.strip()
            if not dataset_name: return "Please provide a dataset name."
            api_url = f"{self.api_url}/datasets/{dataset_name}"
            response = self.session.get(api_url, timeout=15)
            if response.status_code == 404: return f"❌ Dataset '{dataset_name}' not found. Please check the dataset name."
            elif response.status_code != 200: return f"❌ Error fetching dataset info (Status: {response.status_code})"
            dataset_data = response.json()
            result = f"# πŸ“Š Dataset: {dataset_name}\n\n"
            downloads = dataset_data.get('downloads', 0)
            likes = dataset_data.get('likes', 0)
            result += f"**πŸ“ˆ Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Created:** {dataset_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {dataset_data.get('lastModified', 'N/A')[:10]}\n\n"
            if 'tags' in dataset_data and dataset_data['tags']: result += f"**🏷️ Tags:** {', '.join(dataset_data['tags'][:10])}\n\n"
            dataset_url = f"{self.base_url}/datasets/{dataset_name}"
            page_content = self._fetch_with_retry(dataset_url)
            if page_content:
                soup = BeautifulSoup(page_content, 'html.parser')
                readme_content = soup.find('div', class_=re.compile(r'prose|readme|dataset-card'))
                if readme_content:
                    paragraphs = readme_content.find_all('p')[:3]
                    description_parts = []
                    for p in paragraphs:
                        text = p.get_text(strip=True)
                        if len(text) > 30: description_parts.append(text)
                    if description_parts:
                        description = ' '.join(description_parts)
                        result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
                code_examples = self._extract_code_examples(soup)
                if code_examples:
                    result += "**πŸ’» Usage Examples:**\n\n"
                    for i, code_block in enumerate(code_examples[:3], 1):
                        lang = code_block.get('language', 'python')
                        result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
            result += f"**πŸš€ Quick Start Template:**\n"
            result += f"```python\nfrom datasets import load_dataset\n\n# Load the dataset\ndataset = load_dataset('{dataset_name}')\n\n# Explore the dataset\n# print(dataset)\n# print(f\"Dataset keys: {{list(dataset.keys())}}\")\n\n# Access first example\n# if 'train' in dataset:\n#     print(\"First example:\")\n#     print(dataset['train'][0])\n```\n\n"
            result += f"**πŸ”— Dataset Page:** {dataset_url}\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in get_dataset_info: {e}")
            return f"❌ Error fetching dataset info: {str(e)}"

    def search_models(self, task: str, limit: str = "5") -> str:
        """
        Searches the Hugging Face Hub for models based on a specified task or keyword and returns a list of top models.
        Each result includes statistics and a quick usage example.
        Args:
            task (str): The task to search for, such as 'text-classification', 'image-generation', or 'question-answering'.
            limit (str): The maximum number of models to return. Defaults to '5'.
        """
        try:
            task = task.strip()
            if not task: return "Please provide a search task or keyword."
            limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 5
            limit = min(max(limit, 1), 10)
            params = {'search': task, 'limit': limit * 3, 'sort': 'downloads', 'direction': -1}
            response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
            response.raise_for_status()
            models = response.json()
            if not models: return f"❌ No models found for task: '{task}'. Try different keywords."
            filtered_models = []
            for model in models:
                if (model.get('downloads', 0) > 0 or model.get('likes', 0) > 0 or 'pipeline_tag' in model):
                    filtered_models.append(model)
                    if len(filtered_models) >= limit: break
            if not filtered_models: filtered_models = models[:limit]
            result = f"# πŸ” Top {len(filtered_models)} Models for '{task}'\n\n"
            for i, model in enumerate(filtered_models, 1):
                model_id = model.get('id', 'Unknown')
                downloads = model.get('downloads', 0)
                likes = model.get('likes', 0)
                task_type = model.get('pipeline_tag', 'N/A')
                library = model.get('library_name', 'N/A')
                quality_score = ""
                if downloads > 10000: quality_score = "⭐ Popular"
                elif downloads > 1000: quality_score = "πŸ”₯ Active"
                elif likes > 10: quality_score = "πŸ‘ Liked"
                result += f"## {i}. {model_id} {quality_score}\n\n"
                result += f"**πŸ“Š Stats:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes}\nβ€’ **Task:** {task_type}\nβ€’ **Library:** {library}\n\n"
                if task_type and task_type != 'N/A':
                    result += f"**πŸš€ Quick Usage:**\n"
                    if library == 'transformers':
                        result += f"```python\nfrom transformers import pipeline\n\n# Load model\nmodel = pipeline('{task_type}', model='{model_id}')\n\n# Use model\n# result = model(\"Your input here\")\n# print(result)\n```\n\n"
                    else:
                        result += f"```python\n# Load and use {model_id}\n# Check model page for specific usage instructions\n```\n\n"
                result += f"**πŸ”— Model Page:** {self.base_url}/{model_id}\n\n---\n\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in search_models: {e}")
            return f"❌ Error searching models: {str(e)}"

    def get_transformers_docs(self, topic: str) -> str:
        """
        Fetches detailed documentation specifically for the Hugging Face Transformers library on a given topic.
        This provides in-depth explanations, code examples, and parameter descriptions for core library components.
        Args:
            topic (str): The Transformers library topic to look up, such as 'pipeline', 'tokenizer', 'trainer', or 'generation'.
        """
        try:
            topic = topic.strip().lower()
            if not topic: return "Please provide a topic to search for."
            docs_url = "https://huggingface.co/docs/transformers"
            topic_map = {'pipeline': f"{docs_url}/main_classes/pipelines", 'pipelines': f"{docs_url}/main_classes/pipelines", 'tokenizer': f"{docs_url}/main_classes/tokenizer", 'tokenizers': f"{docs_url}/main_classes/tokenizer", 'trainer': f"{docs_url}/main_classes/trainer", 'training': f"{docs_url}/training", 'model': f"{docs_url}/main_classes/model", 'models': f"{docs_url}/main_classes/model", 'configuration': f"{docs_url}/main_classes/configuration", 'config': f"{docs_url}/main_classes/configuration", 'quicktour': f"{docs_url}/quicktour", 'quick': f"{docs_url}/quicktour", 'installation': f"{docs_url}/installation", 'install': f"{docs_url}/installation", 'tutorial': f"{docs_url}/tutorials", 'tutorials': f"{docs_url}/tutorials", 'generation': f"{docs_url}/main_classes/text_generation", 'text_generation': f"{docs_url}/main_classes/text_generation", 'preprocessing': f"{docs_url}/preprocessing", 'preprocess': f"{docs_url}/preprocessing", 'peft': f"{docs_url}/peft", 'lora': f"{docs_url}/peft", 'quantization': f"{docs_url}/main_classes/quantization", 'optimization': f"{docs_url}/perf_train_gpu_one", 'performance': f"{docs_url}/perf_train_gpu_one", 'deployment': f"{docs_url}/deployment", 'custom': f"{docs_url}/custom_models", 'fine-tuning': f"{docs_url}/training", 'finetuning': f"{docs_url}/training"}
            url = topic_map.get(topic)
            if not url:
                for key, value in topic_map.items():
                    if topic in key or key in topic:
                        url = value
                        topic = key
                        break
            if not url:
                url = f"{docs_url}/quicktour"
                topic = "quicktour"
            content = self._fetch_with_retry(url)
            if not content: return f"❌ Could not fetch documentation for '{topic}'. Please try again or visit: {url}"
            soup = BeautifulSoup(content, 'html.parser')
            practical_content = self._extract_practical_content(soup, topic)
            result = f"# πŸ“š Transformers Documentation: {topic.replace('_', ' ').title()}\n\n"
            if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
            if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
            if practical_content['code_examples']:
                result += "**πŸ’» Code Examples:**\n\n"
                for i, code_block in enumerate(practical_content['code_examples'][:4], 1):
                    lang = code_block.get('language', 'python')
                    code_type = code_block.get('type', 'example')
                    result += f"### {code_type.title()} {i}:\n```{lang}\n{code_block['code']}\n```\n\n"
            if practical_content['usage_instructions']:
                result += "**πŸ› οΈ Step-by-Step Usage:**\n"
                for i, instruction in enumerate(practical_content['usage_instructions'][:6], 1):
                    result += f"{i}. {instruction}\n"
                result += "\n"
            if practical_content['parameters']:
                result += "**βš™οΈ Key Parameters:**\n"
                for param in practical_content['parameters'][:10]:
                    param_type = f" (`{param['type']}`)" if param.get('type') else ""
                    default_val = f" *Default: `{param['default']}`*" if param.get('default') else ""
                    result += f"β€’ **`{param['name']}`**{param_type}: {param['description']}{default_val}\n"
                result += "\n"
            related_topics = [k for k in topic_map.keys() if k != topic][:5]
            if related_topics: result += f"**πŸ”— Related Topics:** {', '.join(related_topics)}\n\n"
            result += f"**πŸ“„ Full Documentation:** {url}\n"
            return result
        except Exception as e:
            logger.error(f"Error in get_transformers_docs: {e}")
            return f"❌ Error fetching Transformers documentation: {str(e)}"

    def get_trending_models(self, limit: str = "10") -> str:
        """
        Fetches a list of the most downloaded models currently trending on the Hugging Face Hub.
        This is useful for discovering popular and widely-used models.
        Args:
            limit (str): The number of trending models to return. Defaults to '10'.
        """
        try:
            limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 10
            limit = min(max(limit, 1), 20)
            params = {'sort': 'downloads', 'direction': -1, 'limit': limit}
            response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
            response.raise_for_status()
            models = response.json()
            if not models: return "❌ Could not fetch trending models."
            result = f"# πŸ”₯ Trending Models (Top {len(models)})\n\n"
            for i, model in enumerate(models, 1):
                model_id = model.get('id', 'Unknown')
                downloads = model.get('downloads', 0)
                likes = model.get('likes', 0)
                task = model.get('pipeline_tag', 'N/A')
                if downloads > 1000000: trend = "πŸš€ Mega Popular"
                elif downloads > 100000: trend = "πŸ”₯ Very Popular"
                elif downloads > 10000: trend = "⭐ Popular"
                else: trend = "πŸ“ˆ Trending"
                result += f"## {i}. {model_id} {trend}\n"
                result += f"β€’ **Downloads:** {downloads:,} | **Likes:** {likes} | **Task:** {task}\n"
                result += f"β€’ **Link:** {self.base_url}/{model_id}\n\n"
            return result
        except Exception as e:
            logger.error(f"Error in get_trending_models: {e}")
            return f"❌ Error fetching trending models: {str(e)}"

# Initialize the API server
hf_api = HF_API()

# --- Named Functions for Gradio UI ---

def clear_output():
    """Clears a Gradio output component."""
    return ""

def set_textbox_value(text):
    """Sets a Gradio Textbox to a specific value."""
    return text

# --- Doc Search Tab Functions ---
def run_doc_search(query, max_results):
    return hf_api.search_documentation(query, int(max_results) if str(max_results).isdigit() else 2)

# --- Model Info Tab Functions ---
def run_model_info(model_name):
    return hf_api.get_model_info(model_name)

# --- Dataset Info Tab Functions ---
def run_dataset_info(dataset_name):
    return hf_api.get_dataset_info(dataset_name)

# --- Model Search Tab Functions ---
def run_model_search(task, limit):
    return hf_api.search_models(task, int(limit) if str(limit).isdigit() else 5)

# --- Transformers Docs Tab Functions ---
def run_transformers_docs(topic):
    return hf_api.get_transformers_docs(topic)
    
# --- Trending Models Tab Functions ---
def run_trending_models(limit):
    return hf_api.get_trending_models(int(limit) if str(limit).isdigit() else 10)


# --- Create Gradio Interface ---

with gr.Blocks(
    title="πŸ€— Hugging Face Information Server",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container { font-family: 'Inter', sans-serif; }
    .main-header { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; }
    """) as demo:
    # Header
    with gr.Row():
        gr.HTML("""
        <div class="main-header">
            <h1>πŸ€— Hugging Face Information Server</h1>
            <p>Get comprehensive documentation with <strong>real code examples</strong>, <strong>usage instructions</strong>, and <strong>practical content</strong></p>
        </div>
        """)
        
    with gr.Tab("πŸ“š Documentation Search", elem_id="docs"):
        gr.Markdown("### Search for documentation with **comprehensive code examples** and **step-by-step instructions**")
        with gr.Row():
            with gr.Column(scale=3):
                doc_query = gr.Textbox(label="πŸ” Search Query", placeholder="e.g., tokenizer, pipeline, fine-tuning, peft, trainer, quantization")
            with gr.Column(scale=1):
                doc_max_results = gr.Number(label="Max Results", value=2, minimum=1, maximum=5)
        doc_output = gr.Textbox(label="πŸ“– Documentation with Examples", lines=25, max_lines=30)
        with gr.Row():
            doc_btn = gr.Button("πŸ” Search Documentation", variant="primary", size="lg")
            doc_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        gr.Markdown("**Quick Examples:**")
        with gr.Row():
            gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=doc_query)
            gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=doc_query)
            gr.Button("Fine-tuning", size="sm").click(functools.partial(set_textbox_value, "fine-tuning"), outputs=doc_query)
            gr.Button("PEFT", size="sm").click(functools.partial(set_textbox_value, "peft"), outputs=doc_query)
        
        doc_btn.click(run_doc_search, inputs=[doc_query, doc_max_results], outputs=doc_output)
        doc_clear.click(clear_output, outputs=doc_output)

    with gr.Tab("πŸ€– Model Information", elem_id="models"):
        gr.Markdown("### Get detailed model information with **usage examples** and **code snippets**")
        model_name = gr.Textbox(label="πŸ€– Model Name", placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium, meta-llama/Llama-2-7b-hf")
        model_output = gr.Textbox(label="πŸ“Š Model Information + Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            model_btn = gr.Button("πŸ“Š Get Model Info", variant="primary", size="lg")
            model_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        gr.Markdown("**Popular Models:**")
        with gr.Row():
            gr.Button("BERT", size="sm").click(functools.partial(set_textbox_value, "bert-base-uncased"), outputs=model_name)
            gr.Button("GPT-2", size="sm").click(functools.partial(set_textbox_value, "gpt2"), outputs=model_name)
            gr.Button("T5", size="sm").click(functools.partial(set_textbox_value, "t5-small"), outputs=model_name)
            gr.Button("DistilBERT", size="sm").click(functools.partial(set_textbox_value, "distilbert-base-uncased"), outputs=model_name)
        
        model_btn.click(run_model_info, inputs=model_name, outputs=model_output)
        model_clear.click(clear_output, outputs=model_output)

    with gr.Tab("πŸ“Š Dataset Information", elem_id="datasets"):
        gr.Markdown("### Get dataset information with **loading examples** and **usage code**")
        dataset_name = gr.Textbox(label="πŸ“Š Dataset Name", placeholder="e.g., squad, imdb, glue, common_voice, wikitext")
        dataset_output = gr.Textbox(label="πŸ“ˆ Dataset Information + Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            dataset_btn = gr.Button("πŸ“ˆ Get Dataset Info", variant="primary", size="lg")
            dataset_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        gr.Markdown("**Popular Datasets:**")
        with gr.Row():
            gr.Button("SQuAD", size="sm").click(functools.partial(set_textbox_value, "squad"), outputs=dataset_name)
            gr.Button("IMDB", size="sm").click(functools.partial(set_textbox_value, "imdb"), outputs=dataset_name)
            gr.Button("GLUE", size="sm").click(functools.partial(set_textbox_value, "glue"), outputs=dataset_name)
            gr.Button("Common Voice", size="sm").click(functools.partial(set_textbox_value, "common_voice"), outputs=dataset_name)
        
        dataset_btn.click(run_dataset_info, inputs=dataset_name, outputs=dataset_output)
        dataset_clear.click(clear_output, outputs=dataset_output)

    with gr.Tab("πŸ” Model Search", elem_id="search"):
        gr.Markdown("### Search models with **quick usage examples** and **quality indicators**")
        with gr.Row():
            with gr.Column(scale=3):
                search_task = gr.Textbox(label="πŸ” Task or Keyword", placeholder="e.g., text-classification, image-generation, question-answering, sentiment-analysis")
            with gr.Column(scale=1):
                search_limit = gr.Number(label="Max Results", value=5, minimum=1, maximum=10)
        search_output = gr.Textbox(label="πŸš€ Models with Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            search_btn = gr.Button("πŸš€ Search Models", variant="primary", size="lg")
            search_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        gr.Markdown("**Popular Tasks:**")
        with gr.Row():
            gr.Button("Text Classification", size="sm").click(functools.partial(set_textbox_value, "text-classification"), outputs=search_task)
            gr.Button("Question Answering", size="sm").click(functools.partial(set_textbox_value, "question-answering"), outputs=search_task)
            gr.Button("Text Generation", size="sm").click(functools.partial(set_textbox_value, "text-generation"), outputs=search_task)
            gr.Button("Image Classification", size="sm").click(functools.partial(set_textbox_value, "image-classification"), outputs=search_task)
            
        search_btn.click(run_model_search, inputs=[search_task, search_limit], outputs=search_output)
        search_clear.click(clear_output, outputs=search_output)

    with gr.Tab("⚑ Transformers Docs", elem_id="transformers"):
        gr.Markdown("### Get comprehensive Transformers documentation with **detailed examples** and **parameters**")
        transformers_topic = gr.Textbox(label="πŸ“š Topic", placeholder="e.g., pipeline, tokenizer, trainer, model, peft, generation, quantization")
        transformers_output = gr.Textbox(label="πŸ“– Comprehensive Documentation", lines=25, max_lines=30)
        with gr.Row():
            transformers_btn = gr.Button("πŸ“– Get Documentation", variant="primary", size="lg")
            transformers_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        gr.Markdown("**Core Topics:**")
        with gr.Row():
            gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=transformers_topic)
            gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=transformers_topic)
            gr.Button("Trainer", size="sm").click(functools.partial(set_textbox_value, "trainer"), outputs=transformers_topic)
            gr.Button("Generation", size="sm").click(functools.partial(set_textbox_value, "generation"), outputs=transformers_topic)
        
        transformers_btn.click(run_transformers_docs, inputs=transformers_topic, outputs=transformers_output)
        transformers_clear.click(clear_output, outputs=transformers_output)

    with gr.Tab("πŸ”₯ Trending Models", elem_id="trending"):
        gr.Markdown("### Discover the most popular and trending models")
        trending_limit = gr.Number(label="Number of Models", value=10, minimum=1, maximum=20)
        trending_output = gr.Textbox(label="πŸ”₯ Trending Models", lines=20, max_lines=25)
        with gr.Row():
            trending_btn = gr.Button("πŸ”₯ Get Trending Models", variant="primary", size="lg")
            trending_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
        
        trending_btn.click(run_trending_models, inputs=trending_limit, outputs=trending_output)
        trending_clear.click(clear_output, outputs=trending_output)

    # Footer
    with gr.Row():
        gr.HTML("""
        <div style="text-align: center; padding: 20px; color: #666;">
            <h3>πŸ’‘ Features</h3>
            <p><strong>βœ… Real code examples</strong> β€’ <strong>βœ… Step-by-step instructions</strong> β€’ <strong>βœ… Parameter documentation</strong> β€’ <strong>βœ… Quality indicators</strong></p>
            <p><em>Get practical, actionable information, directly from the source.</em></p>
            <p><a href="https://huggingface.co/spaces/Agents-MCP-Hackathon/HuggingFaceDoc/blob/main/README.md" target="_blank">πŸ“– Read the Guide on Hugging Face Spaces</a></p>
        </div>
        """)

if __name__ == "__main__":
    print("πŸš€ Starting Hugging Face Information Server...")
    print("πŸ“Š Features: Code examples, usage instructions, comprehensive documentation")
    # Kept your original launch parameters
    demo.launch(

        mcp_server=True
    )