Spaces:
Running
Running
File size: 4,468 Bytes
3b5fe24 9d2bec8 3b5fe24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
'''Defines available model configurations.
Maps three tiers (“tiny”, “small”, “medium”) to their model filename, Hugging Face repo, required GPU VRAM, and required system RAM.
get_system_specs() uses psutil to compute total system RAM in GB and torch.cuda to query GPU VRAM in GB (zero if no CUDA device).
select_best_model() prints detected RAM and GPU VRAM, chooses “small” if GPU VRAM ≥ 4 GB or if RAM ≥ 8 GB, otherwise “tiny”, prints the chosen tier and model name, and returns the model filename and repo string.
'''
import os
import psutil
from typing import Tuple, Dict
import torch
import torchaudio.transforms as T
from huggingface_hub import hf_hub_download
from typing import Optional
from llama_index.llms.llama_cpp import LlamaCPP
# Model options mapped to their requirements
MODEL_OPTIONS = {
"tiny": {
"name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf",
"repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"vram_req": 2, # GB
"ram_req": 4 # GB
},
"small": {
"name": "phi-2.Q4_K_M.gguf",
"repo": "TheBloke/phi-2-GGUF",
"vram_req": 4,
"ram_req": 8
},
"medium": {
"name": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
"repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
"vram_req": 6,
"ram_req": 16
}
}
def get_system_specs() -> Dict[str, float]:
"""Get system specifications."""
# Get RAM
ram_gb = psutil.virtual_memory().total / (1024**3)
# Get GPU info if available
gpu_vram_gb = 0
if torch.cuda.is_available():
try:
# Query GPU memory in bytes and convert to GB
gpu_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
except Exception as e:
print(f"Warning: Could not get GPU memory: {e}")
return {
"ram_gb": ram_gb,
"gpu_vram_gb": gpu_vram_gb
}
def select_best_model() -> Tuple[str, str]:
"""Select the best model based on system specifications."""
specs = get_system_specs()
print(f"\nSystem specifications:")
print(f"RAM: {specs['ram_gb']:.1f} GB")
print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")
# Prioritize GPU if available
if specs['gpu_vram_gb'] >= 4: # You have 6GB, so this should work
model_tier = "small" # phi-2 should work well on RTX 2060
elif specs['ram_gb'] >= 8:
model_tier = "small"
else:
model_tier = "tiny"
selected = MODEL_OPTIONS[model_tier]
print(f"\nSelected model tier: {model_tier}")
print(f"Model: {selected['name']}")
return selected['name'], selected['repo']
def ensure_model(model_name: Optional[str] = None, repo_id: Optional[str] = None) -> str:
"""Ensures model is available, downloading only if needed."""
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
# Determine environment and set cache directory
if os.path.exists("/home/user"):
# HF Space environment
cache_dir = "/home/user/.cache/models"
else:
# Local development environment
cache_dir = os.path.join(BASE_DIR, "models")
# Create cache directory if it doesn't exist
try:
os.makedirs(cache_dir, exist_ok=True)
except Exception as e:
print(f"Warning: Could not create cache directory {cache_dir}: {e}")
# Fall back to temporary directory if needed
cache_dir = os.path.join("/tmp", "models")
os.makedirs(cache_dir, exist_ok=True)
# Get model details
if not model_name or not repo_id:
model_option = MODEL_OPTIONS["small"] # default to small model
model_name = model_option["name"]
repo_id = model_option["repo"]
# Ensure model_name and repo_id are not None
if model_name is None:
raise ValueError("model_name cannot be None")
if repo_id is None:
raise ValueError("repo_id cannot be None")
# Check if model already exists in cache
model_path = os.path.join(cache_dir, model_name)
if os.path.exists(model_path):
print(f"\nUsing cached model: {model_path}")
return model_path
print(f"\nDownloading model {model_name} from {repo_id}...")
model_path = hf_hub_download(
repo_id=repo_id,
filename=model_name,
cache_dir=cache_dir,
local_dir=cache_dir
)
print(f"Model downloaded successfully to {model_path}")
return model_path
|