Spaces:

gpaasch
/

MedCodeMCP

Running

MedCodeMCP / utils /model_configuration_utils.py

improved seperation of concerns best practice in the code, added print statements for better understanding of what code is doing

9d2bec8 18 days ago

raw

history blame contribute delete

4.47 kB

	'''Defines available model configurations.

	Maps three tiers (“tiny”, “small”, “medium”) to their model filename, Hugging Face repo, required GPU VRAM, and required system RAM.

	get_system_specs() uses psutil to compute total system RAM in GB and torch.cuda to query GPU VRAM in GB (zero if no CUDA device).

	select_best_model() prints detected RAM and GPU VRAM, chooses “small” if GPU VRAM ≥ 4 GB or if RAM ≥ 8 GB, otherwise “tiny”, prints the chosen tier and model name, and returns the model filename and repo string.
	'''
	import os
	import psutil
	from typing import Tuple, Dict
	import torch
	import torchaudio.transforms as T
	from huggingface_hub import hf_hub_download
	from typing import Optional
	from llama_index.llms.llama_cpp import LlamaCPP

	# Model options mapped to their requirements
	MODEL_OPTIONS = {
	"tiny": {
	"name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf",
	"repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
	"vram_req": 2, # GB
	"ram_req": 4 # GB
	},
	"small": {
	"name": "phi-2.Q4_K_M.gguf",
	"repo": "TheBloke/phi-2-GGUF",
	"vram_req": 4,
	"ram_req": 8
	},
	"medium": {
	"name": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
	"repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
	"vram_req": 6,
	"ram_req": 16
	}
	}

	def get_system_specs() -> Dict[str, float]:
	"""Get system specifications."""
	# Get RAM
	ram_gb = psutil.virtual_memory().total / (1024**3)

	# Get GPU info if available
	gpu_vram_gb = 0
	if torch.cuda.is_available():
	try:
	# Query GPU memory in bytes and convert to GB
	gpu_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	except Exception as e:
	print(f"Warning: Could not get GPU memory: {e}")

	return {
	"ram_gb": ram_gb,
	"gpu_vram_gb": gpu_vram_gb
	}

	def select_best_model() -> Tuple[str, str]:
	"""Select the best model based on system specifications."""
	specs = get_system_specs()
	print(f"\nSystem specifications:")
	print(f"RAM: {specs['ram_gb']:.1f} GB")
	print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")

	# Prioritize GPU if available
	if specs['gpu_vram_gb'] >= 4: # You have 6GB, so this should work
	model_tier = "small" # phi-2 should work well on RTX 2060
	elif specs['ram_gb'] >= 8:
	model_tier = "small"
	else:
	model_tier = "tiny"

	selected = MODEL_OPTIONS[model_tier]
	print(f"\nSelected model tier: {model_tier}")
	print(f"Model: {selected['name']}")

	return selected['name'], selected['repo']

	def ensure_model(model_name: Optional[str] = None, repo_id: Optional[str] = None) -> str:
	"""Ensures model is available, downloading only if needed."""
	BASE_DIR = os.path.dirname(os.path.dirname(__file__))

	# Determine environment and set cache directory
	if os.path.exists("/home/user"):
	# HF Space environment
	cache_dir = "/home/user/.cache/models"
	else:
	# Local development environment
	cache_dir = os.path.join(BASE_DIR, "models")

	# Create cache directory if it doesn't exist
	try:
	os.makedirs(cache_dir, exist_ok=True)
	except Exception as e:
	print(f"Warning: Could not create cache directory {cache_dir}: {e}")
	# Fall back to temporary directory if needed
	cache_dir = os.path.join("/tmp", "models")
	os.makedirs(cache_dir, exist_ok=True)

	# Get model details
	if not model_name or not repo_id:
	model_option = MODEL_OPTIONS["small"] # default to small model
	model_name = model_option["name"]
	repo_id = model_option["repo"]

	# Ensure model_name and repo_id are not None
	if model_name is None:
	raise ValueError("model_name cannot be None")
	if repo_id is None:
	raise ValueError("repo_id cannot be None")
	# Check if model already exists in cache
	model_path = os.path.join(cache_dir, model_name)
	if os.path.exists(model_path):
	print(f"\nUsing cached model: {model_path}")
	return model_path

	print(f"\nDownloading model {model_name} from {repo_id}...")

	model_path = hf_hub_download(
	repo_id=repo_id,
	filename=model_name,
	cache_dir=cache_dir,
	local_dir=cache_dir
	)
	print(f"Model downloaded successfully to {model_path}")
	return model_path