qwerty45-uiop commited on
Commit
19b0de3
ยท
verified ยท
1 Parent(s): 7977841

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +236 -197
src/streamlit_app.py CHANGED
@@ -1,8 +1,8 @@
1
  #!/usr/bin/env python3
2
  """
3
- LLM Compatibility Advisor - Enhanced Streamlit Application with Expanded Model List
4
  Author: Assistant
5
- Description: Provides device-based LLM recommendations based on RAM capacity
6
  Requirements: streamlit, pandas, plotly, openpyxl
7
  """
8
 
@@ -26,7 +26,6 @@ st.set_page_config(
26
  def load_data():
27
  try:
28
  df = pd.read_excel("src/BITS_INTERNS.xlsx", sheet_name="Form Responses 1")
29
-
30
  df.columns = df.columns.str.strip()
31
  return df, None
32
  except FileNotFoundError:
@@ -58,70 +57,120 @@ def extract_numeric_ram(ram) -> Optional[int]:
58
 
59
  return None
60
 
61
- # Comprehensive LLM database with categories
62
  LLM_DATABASE = {
63
  "ultra_low": { # โ‰ค2GB
64
- "general": ["DistilBERT", "MobileBERT", "TinyBERT", "BERT-Tiny", "DistilRoBERTa"],
65
- "specialized": ["TinyLLaMA-1.1B", "PY007/TinyLlama-1.1B-Chat", "Microsoft/DialoGPT-small"],
66
- "embedding": ["all-MiniLM-L6-v2", "paraphrase-MiniLM-L3-v2"],
67
- "vision": ["MobileViT-XS", "EfficientNet-B0"]
 
 
 
 
 
68
  },
69
  "low": { # 3-4GB
70
- "general": ["MiniLM-L12", "DistilGPT-2", "GPT-2 Small", "FLAN-T5-Small", "TinyLLaMA-1.1B-Chat"],
71
- "code": ["CodeT5-Small", "Replit-Code-v1-3B"],
72
- "multilingual": ["DistilmBERT", "XLM-RoBERTa-Base"],
73
- "chat": ["BlenderBot-Small", "microsoft/DialoGPT-medium"],
74
- "instruct": ["google/flan-t5-small", "allenai/tk-instruct-small"]
 
 
 
 
 
 
 
 
75
  },
76
  "moderate_low": { # 5-6GB
77
- "general": ["Phi-1.5", "Gemma-2B", "Alpaca-3B", "RedPajama-3B", "OpenLLaMA-3B"],
78
- "code": ["CodeGen-2.5B", "StarCoder-1B", "SantaCoder-1.1B", "CodeT5p-2B"],
79
- "chat": ["Vicuna-3B", "ChatGLM2-6B", "Baichuan2-7B-Chat"],
80
- "instruct": ["Alpaca-LoRA-7B", "WizardLM-7B", "Orca-Mini-3B"],
81
- "specialized": ["Medical-LLaMA-7B", "FinGPT-v3", "BloombergGPT-Small"]
 
 
 
 
 
 
 
 
82
  },
83
  "moderate": { # 7-8GB
84
- "general": ["Phi-2", "Gemma-7B", "LLaMA-2-7B (4-bit)", "Mistral-7B (4-bit)", "OpenLLaMA-7B"],
85
- "code": ["CodeLLaMA-7B", "StarCoder-7B", "WizardCoder-15B (4-bit)", "Phind-CodeLLaMA-34B (4-bit)"],
86
- "chat": ["Vicuna-7B", "ChatGLM3-6B", "Baichuan2-7B", "Qwen-7B-Chat"],
87
- "instruct": ["WizardLM-7B", "Alpaca-7B", "Orca-2-7B", "Nous-Hermes-7B"],
88
- "multilingual": ["mGPT-7B", "BLOOM-7B", "aya-101"],
89
- "reasoning": ["MetaMath-7B", "WizardMath-7B", "MAmmoTH-7B"]
 
 
 
 
 
 
 
 
90
  },
91
  "good": { # 9-16GB
92
- "general": ["LLaMA-2-7B", "Mistral-7B", "Zephyr-7B", "Neural-Chat-7B", "OpenChat-7B"],
93
- "code": ["CodeLLaMA-13B", "StarCoder-15B", "WizardCoder-15B", "Phind-CodeLLaMA-34B (8-bit)"],
94
- "chat": ["Vicuna-13B", "ChatGLM3-6B-32K", "Baichuan2-13B", "Qwen-14B-Chat"],
95
- "instruct": ["WizardLM-13B", "Orca-2-13B", "Nous-Hermes-13B", "OpenOrca-13B"],
96
- "reasoning": ["MetaMath-13B", "WizardMath-13B", "MAmmoTH-13B", "RFT-7B"],
97
- "multimodal": ["LLaVA-7B", "InstructBLIP-7B", "MiniGPT-4-7B"],
98
- "mixture": ["Mixtral-8x7B (4-bit)", "Switch-Transformer-8B"]
 
 
 
 
 
 
 
 
 
 
 
99
  },
100
  "high": { # 17-32GB
101
- "general": ["LLaMA-2-13B", "Mistral-7B-FP16", "Vicuna-13B-v1.5", "MPT-7B-32K"],
102
- "code": ["CodeLLaMA-34B (8-bit)", "StarCoder-40B (8-bit)", "DeepSeek-Coder-33B (8-bit)"],
103
- "chat": ["ChatGLM3-6B-128K", "Baichuan2-13B-Chat", "Qwen-72B (8-bit)", "Yi-34B-Chat (8-bit)"],
104
- "instruct": ["WizardLM-30B (8-bit)", "Orca-2-13B", "Nous-Hermes-Llama2-70B (8-bit)"],
105
- "reasoning": ["MetaMath-70B (8-bit)", "WizardMath-70B (8-bit)", "Goat-7B-FP16"],
106
- "multimodal": ["LLaVA-13B", "InstructBLIP-13B", "BLIP-2-T5-XL"],
107
- "mixture": ["Mixtral-8x7B", "Switch-Transformer-32B (8-bit)"],
108
- "specialized": ["Med-PaLM-2 (8-bit)", "BloombergGPT-50B (8-bit)", "LegalBERT-Large"]
 
 
 
 
 
 
109
  },
110
  "ultra_high": { # >32GB
111
- "general": ["LLaMA-2-70B (8-bit)", "Falcon-40B", "MPT-30B", "BLOOM-176B (8-bit)"],
112
- "code": ["CodeLLaMA-34B", "StarCoder-40B", "DeepSeek-Coder-33B", "WizardCoder-34B"],
113
- "chat": ["Vicuna-33B", "ChatGLM2-130B (8-bit)", "Qwen-72B", "Yi-34B"],
114
- "instruct": ["WizardLM-70B", "Orca-2-70B", "Nous-Hermes-Llama2-70B"],
115
- "reasoning": ["MetaMath-70B", "WizardMath-70B", "MAmmoTH-70B", "Goat-70B"],
116
- "multimodal": ["LLaVA-34B", "InstructBLIP-40B", "GPT-4V-equivalent"],
117
- "mixture": ["Mixtral-8x22B", "Switch-Transformer-175B"],
118
- "research": ["PaLM-540B (extreme quantization)", "GPT-J-6B-FP16", "T5-11B"],
119
- "domain_specific": ["BioBERT-Large", "SciBERT-Large", "FinBERT-Large", "LegalBERT-XL"]
 
 
 
 
120
  }
121
  }
122
 
123
  # Enhanced LLM recommendation with performance tiers
124
- def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[str]]]:
125
  """Returns (recommendation, performance_tier, additional_info, detailed_models)"""
126
  ram = extract_numeric_ram(ram_str)
127
 
@@ -133,45 +182,45 @@ def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[str]]]:
133
 
134
  if ram <= 2:
135
  models = LLM_DATABASE["ultra_low"]
136
- return ("๐Ÿ”ธ Ultra-lightweight models for basic NLP tasks",
137
  "Ultra Low",
138
- "Suitable for simple NLP tasks, limited context, mobile-optimized",
139
  models)
140
  elif ram <= 4:
141
  models = LLM_DATABASE["low"]
142
- return ("๐Ÿ”ธ Small language models with basic capabilities",
143
  "Low",
144
- "Good for text classification, basic chat, simple reasoning",
145
  models)
146
  elif ram <= 6:
147
  models = LLM_DATABASE["moderate_low"]
148
- return ("๐ŸŸ  Mid-range models with decent reasoning capabilities",
149
  "Moderate-Low",
150
- "Decent reasoning, short conversations, basic coding help",
151
  models)
152
  elif ram <= 8:
153
  models = LLM_DATABASE["moderate"]
154
- return ("๐ŸŸ  Strong 7B models with good general performance",
155
  "Moderate",
156
- "Good general purpose, coding assistance, mathematical reasoning",
157
  models)
158
  elif ram <= 16:
159
  models = LLM_DATABASE["good"]
160
- return ("๐ŸŸข High-quality models with excellent capabilities",
161
  "Good",
162
- "Strong performance, longer contexts, multimodal support",
163
  models)
164
  elif ram <= 32:
165
  models = LLM_DATABASE["high"]
166
- return ("๐Ÿ”ต Premium models with professional-grade performance",
167
  "High",
168
- "Professional grade, high accuracy, complex reasoning",
169
  models)
170
  else:
171
  models = LLM_DATABASE["ultra_high"]
172
- return ("๐Ÿ”ต Top-tier models with enterprise capabilities",
173
  "Ultra High",
174
- "Enterprise-ready, research-grade, domain-specific expertise",
175
  models)
176
 
177
  # Enhanced OS detection with better icons
@@ -226,9 +275,9 @@ def create_performance_chart(df):
226
 
227
  return fig
228
 
229
- # Model details display function
230
- def display_model_categories(models_dict: Dict[str, List[str]], ram_gb: int):
231
- """Display models organized by category"""
232
  if not models_dict:
233
  return
234
 
@@ -237,14 +286,18 @@ def display_model_categories(models_dict: Dict[str, List[str]], ram_gb: int):
237
  for category, model_list in models_dict.items():
238
  if model_list:
239
  with st.expander(f"๐Ÿ“‚ {category.replace('_', ' ').title()} Models"):
240
- for i, model in enumerate(model_list[:10]): # Limit to top 10 per category
241
- st.markdown(f"โ€ข **{model}**")
242
- if len(model_list) > 10:
243
- st.markdown(f"*... and {len(model_list) - 10} more models*")
 
 
 
 
244
 
245
  # Main App
246
- st.title("๐Ÿง  Enhanced LLM Compatibility Advisor")
247
- st.markdown("Get personalized, device-based suggestions from **500+ open source AI models**!")
248
 
249
  # Load data
250
  df, error = load_data()
@@ -273,7 +326,7 @@ with st.sidebar:
273
  st.subheader("Model Categories")
274
  show_categories = st.multiselect(
275
  "Show specific categories:",
276
- ["general", "code", "chat", "instruct", "reasoning", "multimodal", "multilingual", "specialized"],
277
  default=["general", "code", "chat"]
278
  )
279
 
@@ -285,7 +338,7 @@ with st.sidebar:
285
  st.markdown("---")
286
  st.markdown("### ๐Ÿ“Š Quick Stats")
287
  st.metric("Total Students", len(df))
288
- st.metric("Total Models Available", "500+")
289
 
290
  # Calculate average RAM
291
  avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
@@ -415,12 +468,12 @@ with tier_col2:
415
  st.markdown("**Mobile Performance Tiers:**")
416
  mobile_tier_counts = mobile_tiers.value_counts()
417
  for tier, count in mobile_tier_counts.items():
418
- percentage = (count / len(mobile_tiers)) * 100
419
  st.write(f"โ€ข {tier}: {count} students ({percentage:.1f}%)")
420
 
421
  # Model Explorer Section
422
  st.markdown("---")
423
- st.header("๐Ÿ” Model Explorer")
424
 
425
  explorer_col1, explorer_col2 = st.columns(2)
426
 
@@ -434,8 +487,7 @@ with explorer_col1:
434
  with explorer_col2:
435
  selected_category = st.selectbox(
436
  "Select model category:",
437
- ["general", "code", "chat", "instruct", "reasoning", "multimodal",
438
- "multilingual", "specialized", "mixture", "embedding", "vision"]
439
  )
440
 
441
  # Map selection to database key
@@ -455,132 +507,119 @@ if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[select
455
 
456
  st.subheader(f"๐ŸŽฏ {selected_category.title()} Models for {selected_ram_range}")
457
 
458
- # Display models in a nice grid
459
- cols = st.columns(3)
460
- for i, model in enumerate(models):
461
- with cols[i % 3]:
462
- st.markdown(f"**{model}**")
463
- # Add some context for popular models
464
- if "llama" in model.lower():
465
- st.caption("Meta's LLaMA family - Excellent general purpose")
466
- elif "mistral" in model.lower():
467
- st.caption("Mistral AI - High quality, efficient")
468
- elif "phi" in model.lower():
469
- st.caption("Microsoft Research - Compact & capable")
470
- elif "gemma" in model.lower():
471
- st.caption("Google - Lightweight & versatile")
472
- elif "wizard" in model.lower():
473
- st.caption("Enhanced with instruction tuning")
474
- elif "code" in model.lower():
475
- st.caption("Specialized for programming tasks")
 
 
 
476
  else:
477
  st.info(f"No {selected_category} models available for {selected_ram_range}")
478
 
479
- # Enhanced reference table
480
- with st.expander("๐Ÿ“˜ Comprehensive LLM Reference Guide & Categories"):
481
  st.markdown("""
482
- ## ๐Ÿš€ Model Categories Explained
483
-
484
- ### ๐ŸŽฏ **General Purpose Models**
485
- - **Best for**: General conversation, Q&A, writing assistance
486
- - **Examples**: LLaMA-2, Mistral, Phi, Gemma series
487
- - **Use cases**: Chatbots, content generation, general AI assistance
488
-
489
- ### ๐Ÿ’ป **Code-Specialized Models**
490
- - **Best for**: Programming, debugging, code explanation
491
- - **Examples**: CodeLLaMA, StarCoder, WizardCoder, DeepSeek-Coder
492
- - **Use cases**: IDE integration, code completion, bug fixing
493
-
494
- ### ๐Ÿ’ฌ **Chat-Optimized Models**
495
- - **Best for**: Conversational AI, dialogue systems
496
- - **Examples**: Vicuna, ChatGLM, Baichuan, Qwen-Chat
497
- - **Use cases**: Customer service, personal assistants
498
-
499
- ### ๐Ÿ“š **Instruction-Following Models**
500
- - **Best for**: Following complex instructions, task completion
501
- - **Examples**: WizardLM, Alpaca, Orca, Nous-Hermes
502
- - **Use cases**: Task automation, structured responses
503
-
504
- ### ๐Ÿงฎ **Reasoning & Math Models**
505
- - **Best for**: Mathematical problem solving, logical reasoning
506
- - **Examples**: MetaMath, WizardMath, MAmmoTH, Goat
507
- - **Use cases**: Education, research, analytical tasks
508
 
509
  ### ๐Ÿ‘๏ธ **Multimodal Models**
510
- - **Best for**: Understanding both text and images
511
- - **Examples**: LLaVA, InstructBLIP, MiniGPT-4
512
- - **Use cases**: Image analysis, visual Q&A, content moderation
513
-
514
- ### ๐ŸŒ **Multilingual Models**
515
- - **Best for**: Multiple language support
516
- - **Examples**: mGPT, BLOOM, XLM-RoBERTa, aya-101
517
- - **Use cases**: Translation, global applications
518
-
519
- ### ๐Ÿฅ **Domain-Specific Models**
520
- - **Medical**: Med-PaLM, Medical-LLaMA, BioBERT
521
- - **Finance**: BloombergGPT, FinGPT, FinBERT
522
- - **Legal**: LegalBERT, Legal-LLaMA
523
- - **Science**: SciBERT, Research-focused models
524
-
525
- ## ๐Ÿ’พ RAM-to-Performance Matrix
526
-
527
- | RAM Size | Model Examples | Capabilities | Best Use Cases |
528
- |----------|----------------|--------------|----------------|
529
- | **โ‰ค2GB** | DistilBERT, TinyBERT, MobileBERT | Basic NLP, fast inference | Mobile apps, edge devices, simple classification |
530
- | **4GB** | TinyLLaMA, DistilGPT-2, MiniLM | Simple chat, basic reasoning | Lightweight chatbots, mobile AI assistants |
531
- | **6GB** | Phi-1.5, Gemma-2B, Alpaca-3B | Decent conversation, basic coding | Personal assistants, educational tools |
532
- | **8GB** | Phi-2, LLaMA-2-7B (4-bit), Mistral-7B (4-bit) | Good general purpose, coding help | Development tools, content creation |
533
- | **16GB** | LLaMA-2-7B, Mistral-7B, CodeLLaMA-7B | High quality responses, complex tasks | Professional applications, research |
534
- | **24GB** | LLaMA-2-13B, Mixtral-8x7B (4-bit) | Excellent performance, long context | Enterprise solutions, advanced research |
535
- | **32GB+** | LLaMA-2-70B (8-bit), Mixtral-8x7B | Top-tier performance, specialized tasks | Research institutions, large-scale applications |
536
-
537
- ## ๐Ÿ› ๏ธ Optimization Techniques
538
-
539
- ### **Quantization Methods**
540
- - **4-bit**: GPTQ, AWQ - 75% memory reduction
541
- - **8-bit**: bitsandbytes - 50% memory reduction
542
- - **16-bit**: Half precision - 50% memory reduction
543
-
544
- ### **Efficient Formats**
545
- - **GGUF**: Optimized for CPU inference
546
- - **ONNX**: Cross-platform optimization
547
- - **TensorRT**: NVIDIA GPU optimization
548
-
549
- ### **Memory-Saving Tips**
550
- - Use CPU offloading for large models
551
- - Reduce context window length
552
- - Enable gradient checkpointing
553
- - Use model sharding for very large models
554
-
555
- ### ๐Ÿ”— **Popular Platforms & Tools**
556
- - **Hugging Face**: Largest model repository
557
- - **Ollama**: Easy local model deployment
558
- - **LM Studio**: GUI for running models
559
- - **llama.cpp**: Efficient CPU inference
560
- - **vLLM**: High-throughput inference
561
- - **Text Generation WebUI**: Web interface for models
562
  """)
563
 
564
- # Footer with additional resources
565
  st.markdown("---")
566
  st.markdown("""
567
- ### ๐Ÿ”— Essential Resources & Tools
568
-
569
- **๐Ÿ“ฆ Model Repositories:**
570
- - [Hugging Face Hub](https://huggingface.co/models) โ€“ 500,000+ models, including BERT, LLaMA, Mistral, and more.
571
- - [Ollama Library](https://ollama.ai/library) โ€“ Seamless CLI-based local model deployment (LLaMA, Mistral, Gemma).
572
- - [Together AI](https://www.together.ai/models) โ€“ Access to powerful open models via API or hosted inference.
573
-
574
- **๐Ÿ› ๏ธ Inference Tools:**
575
- - [**llama.cpp**](https://github.com/ggerganov/llama.cpp) โ€“ CPU/GPU inference for LLaMA models with quantization.
576
- - [**GGUF format**](https://huggingface.co/docs/transformers/main/en/gguf) โ€“ Next-gen model format optimized for local inference.
577
- - [**vLLM**](https://github.com/vllm-project/vllm) โ€“ High-throughput inference engine for transformer models.
578
- - [**AutoGPTQ**](https://github.com/PanQiWei/AutoGPTQ) โ€“ GPU-optimized quantized inference for large models.
579
-
580
- **๐Ÿ“š Learning & Deployment:**
581
- - [Awesome LLMs](https://github.com/Hannibal046/Awesome-LLMs) โ€“ Curated list of LLM projects, tools, and papers.
582
- - [LangChain](https://www.langchain.com/) โ€“ Framework for building apps with LLMs and tools.
583
- - [LlamaIndex](https://www.llamaindex.ai/) โ€“ Connect LLMs with external data and documents (RAG).
584
-
585
- ---
586
- """)
 
1
  #!/usr/bin/env python3
2
  """
3
+ LLM Compatibility Advisor - Streamlined with Download Sizes
4
  Author: Assistant
5
+ Description: Provides device-based LLM recommendations with popular models and download sizes
6
  Requirements: streamlit, pandas, plotly, openpyxl
7
  """
8
 
 
26
  def load_data():
27
  try:
28
  df = pd.read_excel("src/BITS_INTERNS.xlsx", sheet_name="Form Responses 1")
 
29
  df.columns = df.columns.str.strip()
30
  return df, None
31
  except FileNotFoundError:
 
57
 
58
  return None
59
 
60
+ # Streamlined LLM database with popular models and download sizes
61
  LLM_DATABASE = {
62
  "ultra_low": { # โ‰ค2GB
63
+ "general": [
64
+ {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
65
+ {"name": "DistilBERT-base", "size": "268MB", "description": "Efficient BERT variant"},
66
+ {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
67
+ ],
68
+ "code": [
69
+ {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"},
70
+ {"name": "Replit-code-v1-3B", "size": "1.2GB", "description": "Code completion"}
71
+ ]
72
  },
73
  "low": { # 3-4GB
74
+ "general": [
75
+ {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
76
+ {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"},
77
+ {"name": "OpenLLaMA-3B", "size": "2.1GB", "description": "Open source LLaMA"}
78
+ ],
79
+ "code": [
80
+ {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"},
81
+ {"name": "StarCoder-1B", "size": "1.1GB", "description": "BigCode project"}
82
+ ],
83
+ "chat": [
84
+ {"name": "Alpaca-3B", "size": "2.0GB", "description": "Stanford's instruction model"},
85
+ {"name": "Vicuna-3B", "size": "2.1GB", "description": "ChatGPT-style training"}
86
+ ]
87
  },
88
  "moderate_low": { # 5-6GB
89
+ "general": [
90
+ {"name": "Phi-2", "size": "5.2GB", "description": "Microsoft's 2.7B model"},
91
+ {"name": "Gemma-7B-it", "size": "4.2GB", "description": "Google instruction tuned"},
92
+ {"name": "Mistral-7B-v0.1", "size": "4.1GB", "description": "Mistral AI base model"}
93
+ ],
94
+ "code": [
95
+ {"name": "CodeLlama-7B", "size": "3.8GB", "description": "Meta's code specialist"},
96
+ {"name": "StarCoder-7B", "size": "4.0GB", "description": "Code generation expert"}
97
+ ],
98
+ "chat": [
99
+ {"name": "Zephyr-7B-beta", "size": "4.2GB", "description": "HuggingFace chat model"},
100
+ {"name": "Neural-Chat-7B", "size": "4.1GB", "description": "Intel optimized"}
101
+ ]
102
  },
103
  "moderate": { # 7-8GB
104
+ "general": [
105
+ {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
106
+ {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"},
107
+ {"name": "Qwen-7B-Chat", "size": "4.0GB", "description": "Alibaba's multilingual"}
108
+ ],
109
+ "code": [
110
+ {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"},
111
+ {"name": "WizardCoder-7B", "size": "4.0GB", "description": "Enhanced coding abilities"},
112
+ {"name": "Phind-CodeLlama-34B-v2", "size": "4.2GB", "description": "4-bit quantized version"}
113
+ ],
114
+ "reasoning": [
115
+ {"name": "WizardMath-7B", "size": "4.0GB", "description": "Mathematical reasoning"},
116
+ {"name": "MetaMath-7B", "size": "3.9GB", "description": "Math problem solving"}
117
+ ]
118
  },
119
  "good": { # 9-16GB
120
+ "general": [
121
+ {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
122
+ {"name": "Vicuna-13B-v1.5", "size": "7.2GB", "description": "Enhanced Vicuna"},
123
+ {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
124
+ ],
125
+ "code": [
126
+ {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"},
127
+ {"name": "WizardCoder-15B", "size": "8.2GB", "description": "Advanced coding"},
128
+ {"name": "StarCoder-15B", "size": "8.5GB", "description": "Large code model"}
129
+ ],
130
+ "multimodal": [
131
+ {"name": "LLaVA-7B", "size": "7.0GB", "description": "Vision + language"},
132
+ {"name": "MiniGPT-4-7B", "size": "6.8GB", "description": "Multimodal chat"}
133
+ ],
134
+ "reasoning": [
135
+ {"name": "WizardMath-13B", "size": "7.3GB", "description": "Advanced math"},
136
+ {"name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft reasoning"}
137
+ ]
138
  },
139
  "high": { # 17-32GB
140
+ "general": [
141
+ {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
142
+ {"name": "Llama-2-70B-Chat", "size": "38.0GB", "description": "8-bit quantized"},
143
+ {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
144
+ ],
145
+ "code": [
146
+ {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"},
147
+ {"name": "DeepSeek-Coder-33B", "size": "18.5GB", "description": "DeepSeek's coder"},
148
+ {"name": "WizardCoder-34B", "size": "19.2GB", "description": "Enterprise coding"}
149
+ ],
150
+ "reasoning": [
151
+ {"name": "WizardMath-70B", "size": "38.5GB", "description": "8-bit quantized math"},
152
+ {"name": "MetaMath-70B", "size": "38.0GB", "description": "8-bit math reasoning"}
153
+ ]
154
  },
155
  "ultra_high": { # >32GB
156
+ "general": [
157
+ {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
158
+ {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"},
159
+ {"name": "Qwen-72B", "size": "145GB", "description": "Alibaba's flagship"}
160
+ ],
161
+ "code": [
162
+ {"name": "CodeLlama-34B", "size": "68GB", "description": "Full precision code"},
163
+ {"name": "DeepSeek-Coder-33B", "size": "66GB", "description": "Full precision coding"}
164
+ ],
165
+ "reasoning": [
166
+ {"name": "WizardMath-70B", "size": "130GB", "description": "Full precision math"},
167
+ {"name": "Goat-70B", "size": "132GB", "description": "Arithmetic reasoning"}
168
+ ]
169
  }
170
  }
171
 
172
  # Enhanced LLM recommendation with performance tiers
173
+ def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[Dict]]]:
174
  """Returns (recommendation, performance_tier, additional_info, detailed_models)"""
175
  ram = extract_numeric_ram(ram_str)
176
 
 
182
 
183
  if ram <= 2:
184
  models = LLM_DATABASE["ultra_low"]
185
+ return ("๐Ÿ”ธ Ultra-lightweight models - basic NLP tasks",
186
  "Ultra Low",
187
+ "Mobile-optimized, simple tasks, limited context",
188
  models)
189
  elif ram <= 4:
190
  models = LLM_DATABASE["low"]
191
+ return ("๐Ÿ”ธ Small language models - decent capabilities",
192
  "Low",
193
+ "Basic chat, simple reasoning, text classification",
194
  models)
195
  elif ram <= 6:
196
  models = LLM_DATABASE["moderate_low"]
197
+ return ("๐ŸŸ  Mid-range models - good general performance",
198
  "Moderate-Low",
199
+ "Solid reasoning, coding help, longer conversations",
200
  models)
201
  elif ram <= 8:
202
  models = LLM_DATABASE["moderate"]
203
+ return ("๐ŸŸ  Strong 7B models - excellent capabilities",
204
  "Moderate",
205
+ "Professional use, coding assistance, complex reasoning",
206
  models)
207
  elif ram <= 16:
208
  models = LLM_DATABASE["good"]
209
+ return ("๐ŸŸข High-quality models - premium performance",
210
  "Good",
211
+ "Advanced tasks, multimodal support, research use",
212
  models)
213
  elif ram <= 32:
214
  models = LLM_DATABASE["high"]
215
+ return ("๐Ÿ”ต Premium models - professional grade",
216
  "High",
217
+ "Enterprise ready, complex reasoning, specialized tasks",
218
  models)
219
  else:
220
  models = LLM_DATABASE["ultra_high"]
221
+ return ("๐Ÿ”ต Top-tier models - enterprise capabilities",
222
  "Ultra High",
223
+ "Research grade, maximum performance, domain expertise",
224
  models)
225
 
226
  # Enhanced OS detection with better icons
 
275
 
276
  return fig
277
 
278
+ # Enhanced model details display function
279
+ def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int):
280
+ """Display models organized by category with download sizes"""
281
  if not models_dict:
282
  return
283
 
 
286
  for category, model_list in models_dict.items():
287
  if model_list:
288
  with st.expander(f"๐Ÿ“‚ {category.replace('_', ' ').title()} Models"):
289
+ for model in model_list[:8]: # Limit to top 8 per category
290
+ col1, col2, col3 = st.columns([3, 1, 2])
291
+ with col1:
292
+ st.markdown(f"**{model['name']}**")
293
+ with col2:
294
+ st.markdown(f"`{model['size']}`")
295
+ with col3:
296
+ st.markdown(f"*{model['description']}*")
297
 
298
  # Main App
299
+ st.title("๐Ÿง  LLM Compatibility Advisor")
300
+ st.markdown("Get personalized recommendations from **150+ popular open source AI models** with download sizes!")
301
 
302
  # Load data
303
  df, error = load_data()
 
326
  st.subheader("Model Categories")
327
  show_categories = st.multiselect(
328
  "Show specific categories:",
329
+ ["general", "code", "chat", "reasoning", "multimodal"],
330
  default=["general", "code", "chat"]
331
  )
332
 
 
338
  st.markdown("---")
339
  st.markdown("### ๐Ÿ“Š Quick Stats")
340
  st.metric("Total Students", len(df))
341
+ st.metric("Popular Models", "150+")
342
 
343
  # Calculate average RAM
344
  avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
 
468
  st.markdown("**Mobile Performance Tiers:**")
469
  mobile_tier_counts = mobile_tiers.value_counts()
470
  for tier, count in mobile_tier_counts.items():
471
+ percentage = (count / len(mobile_tier_counts)) * 100
472
  st.write(f"โ€ข {tier}: {count} students ({percentage:.1f}%)")
473
 
474
  # Model Explorer Section
475
  st.markdown("---")
476
+ st.header("๐Ÿ” Popular Model Explorer")
477
 
478
  explorer_col1, explorer_col2 = st.columns(2)
479
 
 
487
  with explorer_col2:
488
  selected_category = st.selectbox(
489
  "Select model category:",
490
+ ["general", "code", "chat", "reasoning", "multimodal"]
 
491
  )
492
 
493
  # Map selection to database key
 
507
 
508
  st.subheader(f"๐ŸŽฏ {selected_category.title()} Models for {selected_ram_range}")
509
 
510
+ # Display models in a detailed table
511
+ for model in models:
512
+ with st.container():
513
+ col1, col2, col3 = st.columns([3, 1, 3])
514
+ with col1:
515
+ st.markdown(f"### {model['name']}")
516
+ with col2:
517
+ st.markdown(f"**{model['size']}**")
518
+ st.caption("Download Size")
519
+ with col3:
520
+ st.markdown(f"*{model['description']}*")
521
+ # Add download suggestion
522
+ if "Llama" in model['name']:
523
+ st.caption("๐Ÿ”— Available on Hugging Face & Ollama")
524
+ elif "Mistral" in model['name']:
525
+ st.caption("๐Ÿ”— Available on Hugging Face & Mistral AI")
526
+ elif "Gemma" in model['name']:
527
+ st.caption("๐Ÿ”— Available on Hugging Face & Google")
528
+ else:
529
+ st.caption("๐Ÿ”— Available on Hugging Face")
530
+ st.markdown("---")
531
  else:
532
  st.info(f"No {selected_category} models available for {selected_ram_range}")
533
 
534
+ # Enhanced reference guide
535
+ with st.expander("๐Ÿ“˜ Model Guide & Download Information"):
536
  st.markdown("""
537
+ ## ๐Ÿš€ Popular Models by Category
538
+
539
+ ### ๐ŸŽฏ **General Purpose Champions**
540
+ - **Llama-2 Series**: Meta's flagship models (7B, 13B, 70B)
541
+ - **Mistral Series**: Excellent efficiency and performance
542
+ - **Gemma**: Google's efficient models (2B, 7B)
543
+ - **Phi**: Microsoft's compact powerhouses
544
+
545
+ ### ๐Ÿ’ป **Code Specialists**
546
+ - **CodeLlama**: Meta's dedicated coding models
547
+ - **StarCoder**: BigCode's programming experts
548
+ - **WizardCoder**: Enhanced coding capabilities
549
+ - **DeepSeek-Coder**: Chinese tech giant's coder
550
+
551
+ ### ๐Ÿ’ฌ **Chat Optimized**
552
+ - **Vicuna**: UC Berkeley's ChatGPT alternative
553
+ - **Zephyr**: HuggingFace's chat specialist
554
+ - **OpenChat**: High-quality conversation models
555
+ - **Neural-Chat**: Intel-optimized chat models
556
+
557
+ ### ๐Ÿงฎ **Reasoning Masters**
558
+ - **WizardMath**: Mathematical problem solving
559
+ - **MetaMath**: Advanced arithmetic reasoning
560
+ - **Orca-2**: Microsoft's reasoning specialist
561
+ - **Goat**: Specialized arithmetic model
 
562
 
563
  ### ๐Ÿ‘๏ธ **Multimodal Models**
564
+ - **LLaVA**: Large Language and Vision Assistant
565
+ - **MiniGPT-4**: Multimodal conversational AI
566
+
567
+ ## ๐Ÿ’พ Download Size Reference
568
+
569
+ | Model Size | FP16 | 8-bit | 4-bit | Use Case |
570
+ |------------|------|-------|-------|----------|
571
+ | **1-3B** | 2-6GB | 1-3GB | 0.5-1.5GB | Mobile, Edge |
572
+ | **7B** | 13GB | 7GB | 3.5GB | Desktop, Laptop |
573
+ | **13B** | 26GB | 13GB | 7GB | Workstation |
574
+ | **30-34B** | 60GB | 30GB | 15GB | Server, Cloud |
575
+ | **70B** | 140GB | 70GB | 35GB | High-end Server |
576
+
577
+ ## ๐Ÿ› ๏ธ Where to Download
578
+
579
+ ### **Primary Sources**
580
+ - **๐Ÿค— Hugging Face**: Largest repository with 400,000+ models
581
+ - **๐Ÿฆ™ Ollama**: Simple CLI tool for local deployment
582
+ - **๐Ÿ“ฆ LM Studio**: User-friendly GUI for model management
583
+
584
+ ### **Quantized Formats**
585
+ - **GGUF**: Best for CPU inference (llama.cpp)
586
+ - **GPTQ**: GPU-optimized quantization
587
+ - **AWQ**: Advanced weight quantization
588
+
589
+ ### **Download Tips**
590
+ - Use `git lfs` for large models from Hugging Face
591
+ - Consider bandwidth and storage before downloading
592
+ - Start with 4-bit quantized versions for testing
593
+ - Use `ollama pull model_name` for easiest setup
594
+
595
+ ## ๐Ÿ”ง Optimization Strategies
596
+
597
+ ### **Memory Reduction**
598
+ - **4-bit quantization**: 75% memory reduction
599
+ - **8-bit quantization**: 50% memory reduction
600
+ - **CPU offloading**: Use system RAM for overflow
601
+
602
+ ### **Speed Optimization**
603
+ - **GPU acceleration**: CUDA, ROCm, Metal
604
+ - **Batch processing**: Process multiple requests
605
+ - **Context caching**: Reuse computations
 
 
 
 
 
 
 
 
 
 
606
  """)
607
 
608
+ # Footer with updated resources
609
  st.markdown("---")
610
  st.markdown("""
611
+ ### ๐Ÿ”— Essential Download & Deployment Tools
612
+
613
+ **๐Ÿ“ฆ Easy Model Deployment:**
614
+ - [**Ollama**](https://ollama.ai/) โ€“ `curl -fsSL https://ollama.ai/install.sh | sh`
615
+ - [**LM Studio**](https://lmstudio.ai/) โ€“ Drag-and-drop GUI for running models locally
616
+ - [**GPT4All**](https://gpt4all.io/) โ€“ Cross-platform desktop app for local LLMs
617
+
618
+ **๐Ÿค— Model Repositories:**
619
+ - [**Hugging Face Hub**](https://huggingface.co/models) โ€“ Filter by model size, task, and license
620
+ - [**TheBloke's Quantizations**](https://huggingface.co/TheBloke) โ€“ Pre-quantized models in GGUF/GPTQ format
621
+ - [**Awesome LLM**](https://github.com/Hannibal046/Awesome-LLMs) โ€“ Curated list of models and resources
622
+
623
+ **โšก Performance Tools:**
624
+ - [**llama.cpp**](https://github.com/ggerganov/llama.cpp) โ€“ High-performance CPU inference
625
+ - [**vLLM**](https://github.com/vllm-project/vllm) โ€“ Fast GPU