File size: 4,555 Bytes
08ef0df
ac35688
 
 
 
 
 
 
 
 
 
 
 
 
 
dfce113
44b65e5
ac35688
 
 
 
 
 
 
 
08ef0df
ac35688
 
 
 
 
 
 
 
dfce113
ac35688
 
dfce113
ac35688
 
dfce113
 
ac35688
dfce113
 
 
08ef0df
dfce113
 
 
 
ac35688
 
dfce113
ac35688
 
08ef0df
ac35688
 
 
 
 
 
 
dfce113
44b65e5
ac35688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44b65e5
08ef0df
 
44b65e5
08ef0df
44b65e5
08ef0df
 
44b65e5
 
08ef0df
ac35688
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# FILE: modules/config_settings_public.py (Definitive Final Version)

import os
import sys
import logging
from huggingface_hub import hf_hub_download

logger = logging.getLogger("ZOTHEOS_Config")
if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

# --- A more reliable way to detect if we are running in a Hugging Face Space ---
IS_WEB_MODE = os.path.exists("/home/user/app")

# --- Define Model Repositories and Filenames ONCE ---
MODEL_DEFINITIONS = {
    "mistral": {
        "repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
        "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
    },
    "gemma": {
        "repo_id": "google/gemma-2b-it-gguf",  # βœ…βœ…βœ… THE ONLY CHANGE IS THIS LINE (GGUF -> gguf) βœ…βœ…βœ…
        "filename": "gemma-2b-it.Q4_K_M.gguf"
    },
    "qwen": {
        "repo_id": "Qwen/Qwen1.5-1.8B-Chat-GGUF",
        "filename": "qwen1.5-1.8b-chat.Q4_K_M.gguf"
    }
}

# --- Initialize MODEL_PATHS dictionary ---
MODEL_PATHS = {}

# --- Set up paths and GPU layers based on environment ---
if IS_WEB_MODE:
    logger.info("βœ…βœ…βœ… RUNNING IN WEB MODE (Hugging Face Space) βœ…βœ…βœ…")
    logger.info("Model paths will be resolved by hf_hub_download.")
    
    for name, model_info in MODEL_DEFINITIONS.items():
        logger.info(f"Downloading model: {name} from repo: {model_info['repo_id']}")
        try:
            MODEL_PATHS[name] = hf_hub_download(repo_id=model_info["repo_id"], filename=model_info["filename"])
            logger.info(f"βœ… Successfully downloaded {name}.")
        except Exception as e:
            logger.error(f"❌ FAILED to download model {name}. Error: {e}")
            raise e

    N_GPU_LAYERS_FALLBACK = 0
    logger.info("N_GPU_LAYERS_FALLBACK forced to 0 for CPU-only web environment.")

else: # LOCAL MODE
    logger.info("βœ…βœ…βœ… RUNNING IN LOCAL MODE (Desktop/PC) βœ…βœ…βœ…")
    APP_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    BASE_MODELS_DIR = os.path.join(APP_DIR, "models")
    logger.info(f"Models will be loaded from local directory: {BASE_MODELS_DIR}")
    for name, model_info in MODEL_DEFINITIONS.items():
        MODEL_PATHS[name] = os.path.join(BASE_MODELS_DIR, model_info["filename"])
    N_GPU_LAYERS_FALLBACK = -1
    logger.info("N_GPU_LAYERS_FALLBACK set to -1 for local GPU acceleration.")


# --- Shared Configurations ---
MAX_RAM_MODELS_GB = 23.8
MAX_CONCURRENT_MODELS = 3
N_CTX_FALLBACK = 2048
N_THREADS_FALLBACK = 8
VERBOSE_LLAMA_CPP = True

MODEL_SPECIFIC_PARAMS = {
    "mistral": { "chat_format": "mistral-instruct" },
    "gemma": { "chat_format": "gemma" },
    "qwen": { "chat_format": "chatml" },
    "_default": {
        "f16_kv": True, "use_mmap": True, "use_mlock": False,
        "verbose": VERBOSE_LLAMA_CPP,
        "n_gpu_layers": N_GPU_LAYERS_FALLBACK,
        "n_threads": N_THREADS_FALLBACK,
        "n_ctx": N_CTX_FALLBACK
    }
}

INFERENCE_PRESETS = {
    "balanced": {"temperature": 0.7, "top_p": 0.9, "top_k": 40, "repeat_penalty": 1.1, "mirostat_mode": 0, "max_tokens": 1024},
    "precise": {"temperature": 0.2, "top_p": 0.7, "top_k": 20, "repeat_penalty": 1.05, "mirostat_mode": 0, "max_tokens": 1536},
    "creative": {"temperature": 0.9, "top_p": 0.95, "top_k": 60, "repeat_penalty": 1.15, "mirostat_mode": 2, "mirostat_tau": 4.0, "mirostat_eta": 0.1, "max_tokens": 1024},
    "passthrough": {}
}
DEFAULT_INFERENCE_PRESET = "balanced"

DEFAULT_SYSTEM_PROMPT = "You are ZOTHEOS, an ethical AI developed to help humanity. Provide clear, concise, and helpful responses. Be respectful and avoid harmful content."
SYSTEM_PERSONAS = {
    "default": DEFAULT_SYSTEM_PROMPT, "helpful_assistant": "You are a helpful AI assistant.", "philosopher": "You are an AI philosopher.",
    "coder": "You are an expert AI programmer.", "concise_summarizer": "You are an AI tasked with providing very concise summaries."
}
MODEL_ROLES = {"mistral": "analyst", "gemma": "humanist", "qwen": "skeptic"}
MODEL_ROLE_SYSTEM_PROMPTS = {
    "analyst": "You are an impartial analyst.", "humanist": "You are a human-centered assistant.",
    "skeptic": "You are a critical evaluator and a respectful skeptic.", "general": DEFAULT_SYSTEM_PROMPT
}

ZOTHEOS_VERSION = "Public Beta 1.8 (Live)"
logger.info(f"Config settings loaded. Version: {ZOTHEOS_VERSION}")
logger.info(f"Web Mode: {IS_WEB_MODE}")