Spaces:
Sleeping
Sleeping
# FILE: modules/config_settings_public.py (Hugging Face Demo) | |
import os | |
import sys | |
import logging | |
from huggingface_hub import hf_hub_download | |
logger = logging.getLogger("ZOTHEOS_Config") | |
if not logger.handlers: | |
handler = logging.StreamHandler(sys.stdout) | |
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s') | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
logger.setLevel(logging.INFO) | |
# --- β WEB-OPTIMIZED MODEL SOURCES --- | |
# These models are smaller and faster, perfect for a CPU-based web demo. | |
MODEL_DEFINITIONS = { | |
"mistral": { | |
"repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", | |
"filename": "mistral-7b-instruct-v0.2.Q2_K.gguf" # Using a smaller Q2_K quantization for speed | |
}, | |
"gemma": { | |
"repo_id": "TheBloke/gemma-2b-it-GGUF", | |
"filename": "gemma-2b-it.Q4_K_M.gguf" # Using the 2B parameter version | |
}, | |
"qwen": { | |
"repo_id": "TheBloke/Qwen1.5-1.8B-Chat-GGUF", | |
"filename": "qwen1.5-1.8b-chat.Q4_K_M.gguf" # Using the 1.8B parameter version | |
} | |
} | |
MODEL_PATHS = {} | |
# This logic will only run when the code is on Hugging Face Spaces. | |
logger.info("β β β RUNNING IN WEB DEMO MODE (Hugging Face Space) β β β ") | |
N_GPU_LAYERS_FALLBACK = 0 # Force CPU-only mode for Hugging Face free tier | |
for name, model_info in MODEL_DEFINITIONS.items(): | |
logger.info(f"Downloading model for demo: {name} from {model_info['repo_id']}") | |
try: | |
MODEL_PATHS[name] = hf_hub_download(repo_id=model_info["repo_id"], filename=model_info["filename"]) | |
logger.info(f"β Successfully downloaded {name}") | |
except Exception as e: | |
logger.error(f"β FAILED to download {name}: {e}") | |
raise e | |
# --- β WEB-OPTIMIZED MODEL PARAMETERS --- | |
MODEL_SPECIFIC_PARAMS = { | |
"_default": { | |
"n_gpu_layers": N_GPU_LAYERS_FALLBACK, # Ensures CPU usage | |
"n_ctx": 4096, # Smaller context window for lower RAM usage | |
"n_batch": 512, # Standard batch size for CPU | |
"verbose": True | |
}, | |
"mistral": { "chat_format": "mistral-instruct" }, | |
"gemma": { "chat_format": "gemma" }, | |
"qwen": { "chat_format": "chatml" } | |
} | |
# --- β TIER INFERENCE PRESETS --- | |
INFERENCE_PRESETS = { | |
"balanced": {"temperature": 0.7, "top_p": 0.9, "max_tokens": 1024, "repeat_penalty": 1.1 }, | |
"precise": {"temperature": 0.2, "top_p": 0.7, "top_k": 20, "max_tokens": 1536, "repeat_penalty": 1.05 }, | |
"creative": {"temperature": 0.9, "top_p": 0.95, "top_k": 60, "max_tokens": 1024, "repeat_penalty": 1.15 } | |
} | |
DEFAULT_INFERENCE_PRESET = "balanced" | |
# --- β TIER MODEL ROLES AND PROMPTS --- | |
DEFAULT_SYSTEM_PROMPT = "You are ZOTHEOS, an ethical AI developed to help humanity. Be clear, respectful, and helpful. Respond only in English." | |
MODEL_ROLES = {"mistral": "analyst", "gemma": "humanist", "qwen": "skeptic"} | |
MODEL_ROLE_SYSTEM_PROMPTS = { | |
"analyst": "You are an impartial analyst. Provide structured, logical insights. Break down complex topics into clear, understandable points. Respond only in English.", | |
"humanist": "You are an empathetic and values-driven AI. Focus on the emotional, ethical, and societal impact. Consider the human element above all. Respond only in English.", | |
"skeptic": "You are a respectful but rigorous skeptic. Challenge the user's premise, identify potential risks, question assumptions, and explore alternative interpretations. Respond only in English.", | |
} | |
logger.info("β Hugging Face Demo Configuration Loaded Successfully.") |