Spaces:

gpaasch
/

MedCodeMCP

Running

File size: 4,468 Bytes

'''Defines available model configurations.

Maps three tiers (“tiny”, “small”, “medium”) to their model filename, Hugging Face repo, required GPU VRAM, and required system RAM.

get_system_specs() uses psutil to compute total system RAM in GB and torch.cuda to query GPU VRAM in GB (zero if no CUDA device).

select_best_model() prints detected RAM and GPU VRAM, chooses “small” if GPU VRAM ≥ 4 GB or if RAM ≥ 8 GB, otherwise “tiny”, prints the chosen tier and model name, and returns the model filename and repo string.
'''
import os
import psutil
from typing import Tuple, Dict
import torch
import torchaudio.transforms as T
from huggingface_hub import hf_hub_download
from typing import Optional
from llama_index.llms.llama_cpp import LlamaCPP

# Model options mapped to their requirements
MODEL_OPTIONS = {
    "tiny": {
        "name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf",
        "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        "vram_req": 2,  # GB
        "ram_req": 4    # GB
    },
    "small": {
        "name": "phi-2.Q4_K_M.gguf",
        "repo": "TheBloke/phi-2-GGUF",
        "vram_req": 4,
        "ram_req": 8
    },
    "medium": {
        "name": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
        "repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        "vram_req": 6,
        "ram_req": 16
    }
}

def get_system_specs() -> Dict[str, float]:
    """Get system specifications."""
    # Get RAM
    ram_gb = psutil.virtual_memory().total / (1024**3)
    
    # Get GPU info if available
    gpu_vram_gb = 0
    if torch.cuda.is_available():
        try:
            # Query GPU memory in bytes and convert to GB
            gpu_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        except Exception as e:
            print(f"Warning: Could not get GPU memory: {e}")
    
    return {
        "ram_gb": ram_gb,
        "gpu_vram_gb": gpu_vram_gb
    }

def select_best_model() -> Tuple[str, str]:
    """Select the best model based on system specifications."""
    specs = get_system_specs()
    print(f"\nSystem specifications:")
    print(f"RAM: {specs['ram_gb']:.1f} GB")
    print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")
    
    # Prioritize GPU if available
    if specs['gpu_vram_gb'] >= 4:  # You have 6GB, so this should work
        model_tier = "small"  # phi-2 should work well on RTX 2060
    elif specs['ram_gb'] >= 8:
        model_tier = "small"
    else:
        model_tier = "tiny"
    
    selected = MODEL_OPTIONS[model_tier]
    print(f"\nSelected model tier: {model_tier}")
    print(f"Model: {selected['name']}")
    
    return selected['name'], selected['repo']

def ensure_model(model_name: Optional[str] = None, repo_id: Optional[str] = None) -> str:
    """Ensures model is available, downloading only if needed."""
    BASE_DIR = os.path.dirname(os.path.dirname(__file__))

    # Determine environment and set cache directory
    if os.path.exists("/home/user"):
        # HF Space environment
        cache_dir = "/home/user/.cache/models"
    else:
        # Local development environment
        cache_dir = os.path.join(BASE_DIR, "models")
    
    # Create cache directory if it doesn't exist
    try:
        os.makedirs(cache_dir, exist_ok=True)
    except Exception as e:
        print(f"Warning: Could not create cache directory {cache_dir}: {e}")
        # Fall back to temporary directory if needed
        cache_dir = os.path.join("/tmp", "models")
        os.makedirs(cache_dir, exist_ok=True)
    
    # Get model details
    if not model_name or not repo_id:
        model_option = MODEL_OPTIONS["small"]  # default to small model
        model_name = model_option["name"]
        repo_id = model_option["repo"]
    
    # Ensure model_name and repo_id are not None
    if model_name is None:
        raise ValueError("model_name cannot be None")
    if repo_id is None:
        raise ValueError("repo_id cannot be None")
    # Check if model already exists in cache
    model_path = os.path.join(cache_dir, model_name)
    if os.path.exists(model_path):
        print(f"\nUsing cached model: {model_path}")
        return model_path
        
    print(f"\nDownloading model {model_name} from {repo_id}...")

    model_path = hf_hub_download(
        repo_id=repo_id,
        filename=model_name,
        cache_dir=cache_dir,
        local_dir=cache_dir
    )
    print(f"Model downloaded successfully to {model_path}")
    return model_path