Spaces:
Sleeping
Sleeping
File size: 3,575 Bytes
4d5f326 ac35688 92f3987 40b123c 92f3987 9f05f25 92f3987 9f05f25 92f3987 40b123c ac35688 9f05f25 f728ad5 92f3987 dfce113 92f3987 9f05f25 92f3987 ac35688 92f3987 40b123c 9470356 9f05f25 4d5f326 9f05f25 92f3987 ac35688 78a6734 4d5f326 92f3987 9f05f25 92f3987 de81947 92f3987 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# FILE: modules/config_settings_public.py (Hugging Face Demo)
import os
import sys
import logging
from huggingface_hub import hf_hub_download
logger = logging.getLogger("ZOTHEOS_Config")
if not logger.handlers:
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# --- β
WEB-OPTIMIZED MODEL SOURCES ---
# These models are smaller and faster, perfect for a CPU-based web demo.
MODEL_DEFINITIONS = {
"mistral": {
"repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
"filename": "mistral-7b-instruct-v0.2.Q2_K.gguf" # Using a smaller Q2_K quantization for speed
},
"gemma": {
"repo_id": "TheBloke/gemma-2b-it-GGUF",
"filename": "gemma-2b-it.Q4_K_M.gguf" # Using the 2B parameter version
},
"qwen": {
"repo_id": "TheBloke/Qwen1.5-1.8B-Chat-GGUF",
"filename": "qwen1.5-1.8b-chat.Q4_K_M.gguf" # Using the 1.8B parameter version
}
}
MODEL_PATHS = {}
# This logic will only run when the code is on Hugging Face Spaces.
logger.info("β
β
β
RUNNING IN WEB DEMO MODE (Hugging Face Space) β
β
β
")
N_GPU_LAYERS_FALLBACK = 0 # Force CPU-only mode for Hugging Face free tier
for name, model_info in MODEL_DEFINITIONS.items():
logger.info(f"Downloading model for demo: {name} from {model_info['repo_id']}")
try:
MODEL_PATHS[name] = hf_hub_download(repo_id=model_info["repo_id"], filename=model_info["filename"])
logger.info(f"β
Successfully downloaded {name}")
except Exception as e:
logger.error(f"β FAILED to download {name}: {e}")
raise e
# --- β
WEB-OPTIMIZED MODEL PARAMETERS ---
MODEL_SPECIFIC_PARAMS = {
"_default": {
"n_gpu_layers": N_GPU_LAYERS_FALLBACK, # Ensures CPU usage
"n_ctx": 4096, # Smaller context window for lower RAM usage
"n_batch": 512, # Standard batch size for CPU
"verbose": True
},
"mistral": { "chat_format": "mistral-instruct" },
"gemma": { "chat_format": "gemma" },
"qwen": { "chat_format": "chatml" }
}
# --- β
TIER INFERENCE PRESETS ---
INFERENCE_PRESETS = {
"balanced": {"temperature": 0.7, "top_p": 0.9, "max_tokens": 1024, "repeat_penalty": 1.1 },
"precise": {"temperature": 0.2, "top_p": 0.7, "top_k": 20, "max_tokens": 1536, "repeat_penalty": 1.05 },
"creative": {"temperature": 0.9, "top_p": 0.95, "top_k": 60, "max_tokens": 1024, "repeat_penalty": 1.15 }
}
DEFAULT_INFERENCE_PRESET = "balanced"
# --- β
TIER MODEL ROLES AND PROMPTS ---
DEFAULT_SYSTEM_PROMPT = "You are ZOTHEOS, an ethical AI developed to help humanity. Be clear, respectful, and helpful. Respond only in English."
MODEL_ROLES = {"mistral": "analyst", "gemma": "humanist", "qwen": "skeptic"}
MODEL_ROLE_SYSTEM_PROMPTS = {
"analyst": "You are an impartial analyst. Provide structured, logical insights. Break down complex topics into clear, understandable points. Respond only in English.",
"humanist": "You are an empathetic and values-driven AI. Focus on the emotional, ethical, and societal impact. Consider the human element above all. Respond only in English.",
"skeptic": "You are a respectful but rigorous skeptic. Challenge the user's premise, identify potential risks, question assumptions, and explore alternative interpretations. Respond only in English.",
}
logger.info("β
Hugging Face Demo Configuration Loaded Successfully.") |