'''Defines available model configurations. Maps three tiers (“tiny”, “small”, “medium”) to their model filename, Hugging Face repo, required GPU VRAM, and required system RAM. get_system_specs() uses psutil to compute total system RAM in GB and torch.cuda to query GPU VRAM in GB (zero if no CUDA device). select_best_model() prints detected RAM and GPU VRAM, chooses “small” if GPU VRAM ≥ 4 GB or if RAM ≥ 8 GB, otherwise “tiny”, prints the chosen tier and model name, and returns the model filename and repo string. ''' import os import psutil from typing import Tuple, Dict import torch import torchaudio.transforms as T from huggingface_hub import hf_hub_download from typing import Optional from llama_index.llms.llama_cpp import LlamaCPP # Model options mapped to their requirements MODEL_OPTIONS = { "tiny": { "name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf", "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", "vram_req": 2, # GB "ram_req": 4 # GB }, "small": { "name": "phi-2.Q4_K_M.gguf", "repo": "TheBloke/phi-2-GGUF", "vram_req": 4, "ram_req": 8 }, "medium": { "name": "mistral-7b-instruct-v0.1.Q4_K_M.gguf", "repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "vram_req": 6, "ram_req": 16 } } def get_system_specs() -> Dict[str, float]: """Get system specifications.""" # Get RAM ram_gb = psutil.virtual_memory().total / (1024**3) # Get GPU info if available gpu_vram_gb = 0 if torch.cuda.is_available(): try: # Query GPU memory in bytes and convert to GB gpu_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) except Exception as e: print(f"Warning: Could not get GPU memory: {e}") return { "ram_gb": ram_gb, "gpu_vram_gb": gpu_vram_gb } def select_best_model() -> Tuple[str, str]: """Select the best model based on system specifications.""" specs = get_system_specs() print(f"\nSystem specifications:") print(f"RAM: {specs['ram_gb']:.1f} GB") print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB") # Prioritize GPU if available if specs['gpu_vram_gb'] >= 4: # You have 6GB, so this should work model_tier = "small" # phi-2 should work well on RTX 2060 elif specs['ram_gb'] >= 8: model_tier = "small" else: model_tier = "tiny" selected = MODEL_OPTIONS[model_tier] print(f"\nSelected model tier: {model_tier}") print(f"Model: {selected['name']}") return selected['name'], selected['repo'] def ensure_model(model_name: Optional[str] = None, repo_id: Optional[str] = None) -> str: """Ensures model is available, downloading only if needed.""" BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # Determine environment and set cache directory if os.path.exists("/home/user"): # HF Space environment cache_dir = "/home/user/.cache/models" else: # Local development environment cache_dir = os.path.join(BASE_DIR, "models") # Create cache directory if it doesn't exist try: os.makedirs(cache_dir, exist_ok=True) except Exception as e: print(f"Warning: Could not create cache directory {cache_dir}: {e}") # Fall back to temporary directory if needed cache_dir = os.path.join("/tmp", "models") os.makedirs(cache_dir, exist_ok=True) # Get model details if not model_name or not repo_id: model_option = MODEL_OPTIONS["small"] # default to small model model_name = model_option["name"] repo_id = model_option["repo"] # Ensure model_name and repo_id are not None if model_name is None: raise ValueError("model_name cannot be None") if repo_id is None: raise ValueError("repo_id cannot be None") # Check if model already exists in cache model_path = os.path.join(cache_dir, model_name) if os.path.exists(model_path): print(f"\nUsing cached model: {model_path}") return model_path print(f"\nDownloading model {model_name} from {repo_id}...") model_path = hf_hub_download( repo_id=repo_id, filename=model_name, cache_dir=cache_dir, local_dir=cache_dir ) print(f"Model downloaded successfully to {model_path}") return model_path