File size: 25,692 Bytes
6f9845a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
#!/usr/bin/env python3
"""
Hugging Face Space: GGUF Model Converter
A web interface for converting Hugging Face models to GGUF format

This Space provides:
1. Web interface for model conversion
2. Progress tracking and logging
3. Automatic upload to Hugging Face
4. Resource monitoring
"""

import os
import sys
import subprocess
import shutil
import logging
import tempfile
import threading
import queue
import time
import psutil
import gc
from pathlib import Path
from typing import Optional, List, Dict, Any
from datetime import datetime

import gradio as gr
import torch

# Try importing required packages
try:
    from huggingface_hub import HfApi, login, create_repo, snapshot_download
    from transformers import AutoConfig, AutoTokenizer
    HF_HUB_AVAILABLE = True
except ImportError:
    HF_HUB_AVAILABLE = False

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Global variables for progress tracking
conversion_progress = queue.Queue()
current_status = {"status": "idle", "progress": 0, "message": "Ready"}

class SpaceGGUFConverter:
    def __init__(self):
        """Initialize the GGUF converter for Hugging Face Spaces"""
        self.temp_dir = None
        self.llama_cpp_dir = None
        self.hf_token = None
        
    def set_hf_token(self, token: str):
        """Set the Hugging Face token"""
        self.hf_token = token
        if token:
            login(token=token)
            return "βœ… HF Token set successfully!"
        return "❌ Invalid token"
    
    def update_progress(self, status: str, progress: int, message: str):
        """Update the global progress status"""
        global current_status
        current_status = {
            "status": status,
            "progress": progress,
            "message": message,
            "timestamp": datetime.now().strftime("%H:%M:%S")
        }
        conversion_progress.put(current_status.copy())
    
    def check_resources(self) -> Dict[str, Any]:
        """Check available system resources"""
        try:
            memory = psutil.virtual_memory()
            disk = psutil.disk_usage('/')
            
            return {
                "memory_total": f"{memory.total / (1024**3):.1f} GB",
                "memory_available": f"{memory.available / (1024**3):.1f} GB",
                "memory_percent": memory.percent,
                "disk_total": f"{disk.total / (1024**3):.1f} GB",
                "disk_free": f"{disk.free / (1024**3):.1f} GB",
                "disk_percent": disk.percent,
                "cpu_count": psutil.cpu_count(),
                "gpu_available": torch.cuda.is_available(),
                "gpu_memory": f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.1f} GB" if torch.cuda.is_available() else "N/A"
            }
        except Exception as e:
            return {"error": str(e)}
    
    def validate_model(self, model_id: str) -> tuple[bool, str]:
        """Validate if the model exists and get basic info"""
        try:
            if not HF_HUB_AVAILABLE:
                return False, "❌ Required packages not available"
            
            self.update_progress("validating", 10, f"Validating model: {model_id}")
            
            # Try to get model config
            config = AutoConfig.from_pretrained(model_id, trust_remote_code=False)
            
            # Get approximate model size
            try:
                api = HfApi()
                model_info = api.model_info(model_id)
                
                # Calculate approximate size from number of parameters
                if hasattr(config, 'num_parameters'):
                    params = config.num_parameters()
                elif hasattr(config, 'n_params'):
                    params = config.n_params
                else:
                    # Estimate from model files
                    params = "Unknown"
                
                estimated_size = f"~{params/1e9:.1f}B parameters" if isinstance(params, (int, float)) else params
                
                return True, f"βœ… Valid model found!\nParameters: {estimated_size}\nArchitecture: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}"
                
            except Exception as e:
                return True, f"βœ… Model accessible (size estimation failed: {str(e)})"
                
        except Exception as e:
            return False, f"❌ Model validation failed: {str(e)}"
    
    def setup_environment(self) -> bool:
        """Set up the environment for GGUF conversion"""
        try:
            self.update_progress("setup", 20, "Setting up conversion environment...")
            
            # Create temporary directory
            self.temp_dir = tempfile.mkdtemp(prefix="gguf_space_")
            logger.info(f"Created temporary directory: {self.temp_dir}")
            
            # Clone llama.cpp
            self.llama_cpp_dir = os.path.join(self.temp_dir, "llama.cpp")
            self.update_progress("setup", 30, "Downloading llama.cpp...")
            
            result = subprocess.run([
                "git", "clone", "--depth", "1", 
                "https://github.com/ggerganov/llama.cpp.git", 
                self.llama_cpp_dir
            ], capture_output=True, text=True)
            
            if result.returncode != 0:
                raise Exception(f"Failed to clone llama.cpp: {result.stderr}")
            
            # Build llama.cpp
            self.update_progress("setup", 50, "Building llama.cpp (this may take a few minutes)...")
            
            original_dir = os.getcwd()
            try:
                os.chdir(self.llama_cpp_dir)
                
                # Configure with CMake
                configure_result = subprocess.run([
                    "cmake", "-S", ".", "-B", "build",
                    "-DCMAKE_BUILD_TYPE=Release",
                    "-DLLAMA_BUILD_TESTS=OFF",
                    "-DLLAMA_BUILD_EXAMPLES=ON"
                ], capture_output=True, text=True)
                
                if configure_result.returncode != 0:
                    raise Exception(f"CMake configure failed: {configure_result.stderr}")
                
                # Build
                build_result = subprocess.run([
                    "cmake", "--build", "build", "--config", "Release", "-j"
                ], capture_output=True, text=True)
                
                if build_result.returncode != 0:
                    raise Exception(f"CMake build failed: {build_result.stderr}")
                
            finally:
                os.chdir(original_dir)
            
            self.update_progress("setup", 70, "Environment setup complete!")
            return True
            
        except Exception as e:
            self.update_progress("error", 0, f"Setup failed: {str(e)}")
            logger.error(f"Environment setup failed: {e}")
            return False
    
    def convert_model(
        self, 
        model_id: str, 
        output_repo: str, 
        quantizations: List[str],
        hf_token: str,
        private_repo: bool = False
    ) -> tuple[bool, str]:
        """Convert model to GGUF format"""
        try:
            if not hf_token:
                return False, "❌ Hugging Face token is required"
            
            # Set token
            self.set_hf_token(hf_token)
            
            # Validate model first
            valid, validation_msg = self.validate_model(model_id)
            if not valid:
                return False, validation_msg
            
            # Check resources
            resources = self.check_resources()
            if resources.get("memory_percent", 100) > 90:
                return False, "❌ Insufficient memory available (>90% used)"
            
            # Setup environment
            if not self.setup_environment():
                return False, "❌ Failed to setup environment"
            
            # Download model
            self.update_progress("downloading", 80, f"Downloading model: {model_id}")
            model_dir = os.path.join(self.temp_dir, "original_model")
            
            try:
                snapshot_download(
                    repo_id=model_id,
                    local_dir=model_dir,
                    token=hf_token
                )
            except Exception as e:
                return False, f"❌ Failed to download model: {str(e)}"
            
            # Convert to GGUF
            self.update_progress("converting", 85, "Converting to GGUF format...")
            gguf_dir = os.path.join(self.temp_dir, "gguf_output")
            os.makedirs(gguf_dir, exist_ok=True)
            
            # Convert to f16 first
            convert_script = os.path.join(self.llama_cpp_dir, "convert_hf_to_gguf.py")
            f16_output = os.path.join(gguf_dir, "model-f16.gguf")
            
            convert_result = subprocess.run([
                sys.executable, convert_script,
                model_dir,
                "--outfile", f16_output,
                "--outtype", "f16"
            ], capture_output=True, text=True)
            
            if convert_result.returncode != 0:
                return False, f"❌ F16 conversion failed: {convert_result.stderr}"
            
            # Find quantize binary
            quantize_binary = self._find_quantize_binary()
            if not quantize_binary:
                return False, "❌ Could not find llama-quantize binary"
            
            # Create quantizations
            successful_quants = ["f16"]
            for i, quant in enumerate(quantizations):
                if quant == "f16":
                    continue
                
                progress = 85 + (10 * i / len(quantizations))
                self.update_progress("converting", int(progress), f"Creating {quant} quantization...")
                
                quant_output = os.path.join(gguf_dir, f"model-{quant}.gguf")
                
                quant_result = subprocess.run([
                    quantize_binary,
                    f16_output,
                    quant_output,
                    quant.upper()
                ], capture_output=True, text=True)
                
                if quant_result.returncode == 0:
                    successful_quants.append(quant)
                else:
                    logger.warning(f"Failed to create {quant} quantization: {quant_result.stderr}")
            
            # Create model card
            self._create_model_card(model_id, gguf_dir, successful_quants)
            
            # Upload to Hugging Face
            self.update_progress("uploading", 95, f"Uploading to {output_repo}...")
            
            try:
                api = HfApi(token=hf_token)
                create_repo(output_repo, private=private_repo, exist_ok=True, token=hf_token)
                
                for file_path in Path(gguf_dir).rglob("*"):
                    if file_path.is_file():
                        relative_path = file_path.relative_to(gguf_dir)
                        api.upload_file(
                            path_or_fileobj=str(file_path),
                            path_in_repo=str(relative_path),
                            repo_id=output_repo,
                            repo_type="model",
                            token=hf_token
                        )
                
            except Exception as e:
                return False, f"❌ Upload failed: {str(e)}"
            
            self.update_progress("complete", 100, "Conversion completed successfully!")
            
            return True, f"""βœ… Conversion completed successfully!

πŸ“Š **Results:**
- Successfully created: {', '.join(successful_quants)} quantizations
- Uploaded to: https://huggingface.co/{output_repo}
- Files created: {len(successful_quants)} GGUF files + README.md

πŸ”— **Links:**
- View model: https://huggingface.co/{output_repo}
- Download files: https://huggingface.co/{output_repo}/tree/main
"""
            
        except Exception as e:
            self.update_progress("error", 0, f"Conversion failed: {str(e)}")
            return False, f"❌ Conversion failed: {str(e)}"
        
        finally:
            # Cleanup
            self._cleanup()
            gc.collect()
    
    def _find_quantize_binary(self) -> Optional[str]:
        """Find the llama-quantize binary"""
        possible_locations = [
            os.path.join(self.llama_cpp_dir, "build", "bin", "llama-quantize"),
            os.path.join(self.llama_cpp_dir, "build", "llama-quantize"),
            os.path.join(self.llama_cpp_dir, "build", "llama-quantize.exe"),
            os.path.join(self.llama_cpp_dir, "build", "bin", "llama-quantize.exe")
        ]
        
        for location in possible_locations:
            if os.path.exists(location):
                return location
        
        return None
    
    def _create_model_card(self, original_model_id: str, output_dir: str, quantizations: List[str]):
        """Create a model card for the GGUF model"""
        
        quant_table = []
        for quant in quantizations:
            filename = f"model-{quant}.gguf"
            if quant == "f16":
                desc = "Original precision (largest file)"
            elif "q4" in quant:
                desc = "4-bit quantization (good balance)"
            elif "q5" in quant:
                desc = "5-bit quantization (higher quality)"
            elif "q8" in quant:
                desc = "8-bit quantization (high quality)"
            else:
                desc = "Quantized version"
            
            quant_table.append(f"| {filename} | {quant.upper()} | {desc} |")
        
        model_card_content = f"""---
language:
- en
library_name: gguf
base_model: {original_model_id}
tags:
- gguf
- quantized
- llama.cpp
- converted
license: apache-2.0
---

# {original_model_id} - GGUF

This repository contains GGUF quantizations of [{original_model_id}](https://huggingface.co/{original_model_id}).

**Converted using [HF GGUF Converter Space](https://huggingface.co/spaces/)**

## About GGUF

GGUF is a quantization method that allows you to run large language models on consumer hardware by reducing the precision of the model weights.

## Files

| Filename | Quant type | Description |
| -------- | ---------- | ----------- |
{chr(10).join(quant_table)}

## Usage

You can use these models with llama.cpp or any other GGUF-compatible inference engine.

### llama.cpp

```bash
./llama-cli -m model-q4_0.gguf -p "Your prompt here"
```

### Python (using llama-cpp-python)

```python
from llama_cpp import Llama

llm = Llama(model_path="model-q4_0.gguf")
output = llm("Your prompt here", max_tokens=512)
print(output['choices'][0]['text'])
```

## Original Model

This is a quantized version of [{original_model_id}](https://huggingface.co/{original_model_id}). Please refer to the original model card for more information about the model's capabilities, training data, and usage guidelines.

## Conversion Details

- Converted using llama.cpp
- Original model downloaded from Hugging Face
- Multiple quantization levels provided for different use cases
- Conversion completed on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}

## License

This model inherits the license from the original model. Please check the original model's license for usage terms.
"""
        
        model_card_path = os.path.join(output_dir, "README.md")
        with open(model_card_path, "w", encoding="utf-8") as f:
            f.write(model_card_content)
    
    def _cleanup(self):
        """Clean up temporary files"""
        if self.temp_dir and os.path.exists(self.temp_dir):
            try:
                shutil.rmtree(self.temp_dir)
                logger.info("Cleaned up temporary files")
            except Exception as e:
                logger.warning(f"Failed to cleanup: {e}")

# Initialize converter
converter = SpaceGGUFConverter()

def get_current_status():
    """Get current conversion status"""
    global current_status
    return f"""**Status:** {current_status['status']}
**Progress:** {current_status['progress']}%
**Message:** {current_status['message']}
**Time:** {current_status.get('timestamp', 'N/A')}"""

def validate_model_interface(model_id: str):
    """Interface function for model validation"""
    if not model_id.strip():
        return "❌ Please enter a model ID"
    
    valid, message = converter.validate_model(model_id.strip())
    return message

def check_resources_interface():
    """Interface function for resource checking"""
    resources = converter.check_resources()
    if "error" in resources:
        return f"❌ Error checking resources: {resources['error']}"
    
    return f"""## πŸ’» System Resources

**Memory:**
- Total: {resources['memory_total']}
- Available: {resources['memory_available']} ({100-resources['memory_percent']:.1f}% free)
- Usage: {resources['memory_percent']:.1f}%

**Storage:**
- Total: {resources['disk_total']}
- Free: {resources['disk_free']} ({100-resources['disk_percent']:.1f}% free)
- Usage: {resources['disk_percent']:.1f}%

**Compute:**
- CPU Cores: {resources['cpu_count']}
- GPU Available: {'βœ… Yes' if resources['gpu_available'] else '❌ No'}
- GPU Memory: {resources['gpu_memory']}

**Status:** {'🟒 Good' if resources['memory_percent'] < 80 and resources['disk_percent'] < 80 else '🟑 Limited' if resources['memory_percent'] < 90 else 'πŸ”΄ Critical'}
"""

def convert_model_interface(
    model_id: str,
    output_repo: str,
    hf_token: str,
    quant_f16: bool,
    quant_q4_0: bool,
    quant_q4_1: bool,
    quant_q5_0: bool,
    quant_q5_1: bool,
    quant_q8_0: bool,
    private_repo: bool
):
    """Interface function for model conversion"""
    
    # Validate inputs
    if not model_id.strip():
        return "❌ Please enter a model ID"
    
    if not output_repo.strip():
        return "❌ Please enter an output repository name"
    
    if not hf_token.strip():
        return "❌ Please enter your Hugging Face token"
    
    # Collect selected quantizations
    quantizations = []
    if quant_f16:
        quantizations.append("f16")
    if quant_q4_0:
        quantizations.append("q4_0")
    if quant_q4_1:
        quantizations.append("q4_1")
    if quant_q5_0:
        quantizations.append("q5_0")
    if quant_q5_1:
        quantizations.append("q5_1")
    if quant_q8_0:
        quantizations.append("q8_0")
    
    if not quantizations:
        return "❌ Please select at least one quantization type"
    
    # Start conversion
    success, message = converter.convert_model(
        model_id.strip(),
        output_repo.strip(),
        quantizations,
        hf_token.strip(),
        private_repo
    )
    
    return message

# Create Gradio interface
def create_interface():
    """Create the Gradio interface"""
    
    with gr.Blocks(
        title="πŸ€— GGUF Model Converter",
        theme=gr.themes.Soft(),
        css="""
        .status-box {
            background-color: #f0f0f0;
            padding: 10px;
            border-radius: 5px;
            margin: 10px 0;
        }
        """
    ) as demo:
        
        gr.Markdown("""
        # πŸ€— GGUF Model Converter
        
        Convert Hugging Face models to GGUF format for use with llama.cpp and other inference engines.
        
        ⚠️ **Important Notes:**
        - Large models (>7B parameters) may take a long time and require significant memory
        - Make sure you have sufficient disk space (models can be several GB)
        - You need a Hugging Face token with write access to upload models
        """)
        
        with gr.Tab("πŸ”§ Model Converter"):
            with gr.Row():
                with gr.Column(scale=2):
                    gr.Markdown("### πŸ“‹ Model Configuration")
                    
                    model_id_input = gr.Textbox(
                        label="Model ID",
                        placeholder="e.g., microsoft/DialoGPT-small",
                        info="Hugging Face model repository ID"
                    )
                    
                    validate_btn = gr.Button("βœ… Validate Model", variant="secondary")
                    validation_output = gr.Markdown()
                    
                    output_repo_input = gr.Textbox(
                        label="Output Repository",
                        placeholder="e.g., your-username/model-name-GGUF",
                        info="Where to upload the converted model"
                    )
                    
                    hf_token_input = gr.Textbox(
                        label="Hugging Face Token",
                        type="password",
                        placeholder="hf_xxxxxxxxxxxxxxxx",
                        info="Get your token from https://huggingface.co/settings/tokens"
                    )
                    
                    private_repo_checkbox = gr.Checkbox(
                        label="Make repository private",
                        value=False
                    )
                
                with gr.Column(scale=1):
                    gr.Markdown("### πŸŽ›οΈ Quantization Options")
                    
                    quant_f16 = gr.Checkbox(label="F16 (Original precision)", value=True)
                    quant_q4_0 = gr.Checkbox(label="Q4_0 (Small, fast)", value=True)
                    quant_q4_1 = gr.Checkbox(label="Q4_1 (Small, balanced)", value=False)
                    quant_q5_0 = gr.Checkbox(label="Q5_0 (Medium, good quality)", value=False)
                    quant_q5_1 = gr.Checkbox(label="Q5_1 (Medium, better quality)", value=False)
                    quant_q8_0 = gr.Checkbox(label="Q8_0 (Large, high quality)", value=False)
            
            gr.Markdown("### πŸš€ Start Conversion")
            convert_btn = gr.Button("πŸ”„ Convert Model", variant="primary", size="lg")
            
            conversion_output = gr.Markdown()
        
        with gr.Tab("πŸ“Š System Status"):
            gr.Markdown("### πŸ’» Resource Monitor")
            
            refresh_btn = gr.Button("πŸ”„ Refresh Resources", variant="secondary")
            resources_output = gr.Markdown()
            
            gr.Markdown("### πŸ“ˆ Conversion Status")
            status_btn = gr.Button("πŸ“Š Check Status", variant="secondary")
            status_output = gr.Markdown(get_current_status())
        
        with gr.Tab("πŸ“š Help & Examples"):
            gr.Markdown("""
            ## 🎯 Quick Start Guide
            
            1. **Enter Model ID**: Use any Hugging Face model ID (e.g., `microsoft/DialoGPT-small`)
            2. **Validate Model**: Click "Validate Model" to check if the model is accessible
            3. **Set Output Repository**: Choose where to upload (e.g., `your-username/model-name-GGUF`)
            4. **Add HF Token**: Get your token from [Hugging Face Settings](https://huggingface.co/settings/tokens)
            5. **Select Quantizations**: Choose which formats to create
            6. **Convert**: Click "Convert Model" and wait for completion
            
            ## πŸ“ Quantization Guide
            
            - **F16**: Original precision, largest file size, best quality
            - **Q4_0**: 4-bit quantization, smallest size, good for most uses
            - **Q4_1**: 4-bit with better quality than Q4_0
            - **Q5_0/Q5_1**: 5-bit quantization, balance of size and quality
            - **Q8_0**: 8-bit quantization, high quality, larger files
            
            ## πŸ’‘ Tips for Success
            
            - Start with small models (< 1B parameters) to test
            - Use Q4_0 for mobile/edge deployment
            - Use Q8_0 or F16 for best quality
            - Monitor system resources in the Status tab
            - Large models may take 30+ minutes to convert
            
            ## πŸ”§ Supported Models
            
            This converter works with most language models that use standard architectures:
            - LLaMA, LLaMA 2, Code Llama
            - Mistral, Mixtral
            - Phi, Phi-2, Phi-3
            - Qwen, ChatGLM
            - And many others!
            """)
        
        # Event handlers
        validate_btn.click(
            fn=validate_model_interface,
            inputs=[model_id_input],
            outputs=[validation_output]
        )
        
        convert_btn.click(
            fn=convert_model_interface,
            inputs=[
                model_id_input,
                output_repo_input,
                hf_token_input,
                quant_f16,
                quant_q4_0,
                quant_q4_1,
                quant_q5_0,
                quant_q5_1,
                quant_q8_0,
                private_repo_checkbox
            ],
            outputs=[conversion_output]
        )
        
        refresh_btn.click(
            fn=check_resources_interface,
            outputs=[resources_output]
        )
        
        status_btn.click(
            fn=get_current_status,
            outputs=[status_output]
        )
        
        # Auto-refresh status every 5 seconds during conversion
        demo.load(fn=check_resources_interface, outputs=[resources_output])
    
    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )