Spaces:
Running
Running
Upload 9 files
Browse files- modules/config_settings_public.py +143 -0
- modules/knowledge_core_public.py +102 -0
- modules/main_fusion_public.py +379 -0
- modules/memory_bank_public.py +354 -0
- modules/model_manager_public.py +401 -0
- modules/plugin_manager_public.py +80 -0
- modules/query_processor_public.py +135 -0
- modules/response_optimizer_public.py +53 -0
- modules/user_auth.py +136 -0
modules/config_settings_public.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FILE: modules/config_settings_public.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import logging
|
6 |
+
from huggingface_hub import hf_hub_download # ✅ IMPORT THE DOWNLOADER
|
7 |
+
|
8 |
+
logger = logging.getLogger("ZOTHEOS_Config")
|
9 |
+
if not logger.handlers:
|
10 |
+
handler = logging.StreamHandler(sys.stdout)
|
11 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
12 |
+
handler.setFormatter(formatter)
|
13 |
+
logger.addHandler(handler)
|
14 |
+
logger.setLevel(logging.INFO)
|
15 |
+
|
16 |
+
# --- ✅ WEB DEPLOYMENT & LOCAL PATH CONFIG ---
|
17 |
+
|
18 |
+
# Standard way to detect if we are running in a Hugging Face Space
|
19 |
+
IS_WEB_MODE = "HF_SPACE_ID" in os.environ
|
20 |
+
|
21 |
+
_is_frozen = getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
|
22 |
+
|
23 |
+
if _is_frozen:
|
24 |
+
APP_DIR = os.path.dirname(sys.executable)
|
25 |
+
else:
|
26 |
+
try:
|
27 |
+
APP_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
28 |
+
except NameError:
|
29 |
+
APP_DIR = os.getcwd()
|
30 |
+
logger.warning(f"__file__ not defined, APP_DIR set to CWD: {APP_DIR}")
|
31 |
+
|
32 |
+
# This directory structure is primarily for local/desktop use.
|
33 |
+
# On Hugging Face, these paths will be created in the virtual environment.
|
34 |
+
BASE_MODELS_DIR = os.path.join(APP_DIR, "models")
|
35 |
+
BASE_DATA_SUBDIR = "zotheos_public_data"
|
36 |
+
BASE_DATA_PATH = os.path.join(APP_DIR, BASE_DATA_SUBDIR)
|
37 |
+
CORE_DIRS_TO_VERIFY = {
|
38 |
+
"data_base": BASE_DATA_PATH, "memory": os.path.join(BASE_DATA_PATH, "zotheos_memory"),
|
39 |
+
"cache": os.path.join(BASE_DATA_PATH, "cache"), "cache_transformers": os.path.join(BASE_DATA_PATH, "cache", "transformers"),
|
40 |
+
"cache_huggingface": os.path.join(BASE_DATA_PATH, "cache", "huggingface"), "cache_hf_hub": os.path.join(BASE_DATA_PATH, "cache", "huggingface", "hub"),
|
41 |
+
"logs": os.path.join(BASE_DATA_PATH, "logs"), "temp": os.path.join(BASE_DATA_PATH, "temp_files"),
|
42 |
+
"models_root": BASE_MODELS_DIR
|
43 |
+
}
|
44 |
+
|
45 |
+
for dir_key, dir_path in CORE_DIRS_TO_VERIFY.items():
|
46 |
+
try:
|
47 |
+
if dir_key == "models_root" and (_is_frozen or IS_WEB_MODE):
|
48 |
+
continue # Skip creating models folder for frozen app or web app
|
49 |
+
os.makedirs(dir_path, exist_ok=True)
|
50 |
+
except Exception as e:
|
51 |
+
logger.error(f"Error creating directory {dir_path}: {e}")
|
52 |
+
|
53 |
+
# --- ✅ CONDITIONAL MODEL PATHS & GPU CONFIG ---
|
54 |
+
|
55 |
+
if IS_WEB_MODE:
|
56 |
+
logger.info("✅✅✅ RUNNING IN WEB MODE (Hugging Face Space) ✅✅✅")
|
57 |
+
logger.info("Model paths will be resolved by hf_hub_download.")
|
58 |
+
|
59 |
+
# Download models from the Hub instead of looking for local files
|
60 |
+
MODEL_PATHS = {
|
61 |
+
"mistral": hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"),
|
62 |
+
"gemma": hf_hub_download(repo_id="google/gemma-2b-it-gguf", filename="gemma-2b-it.Q4_K_M.gguf"),
|
63 |
+
"qwen": hf_hub_download(repo_id="Qwen/Qwen1.5-1.8B-Chat-GGUF", filename="qwen1.5-1.8b-chat.Q4_K_M.gguf")
|
64 |
+
}
|
65 |
+
# Free Hugging Face Spaces are CPU-only, so GPU layers MUST be 0
|
66 |
+
N_GPU_LAYERS_FALLBACK = 0
|
67 |
+
logger.info("N_GPU_LAYERS_FALLBACK forced to 0 for CPU-only web environment.")
|
68 |
+
|
69 |
+
else:
|
70 |
+
logger.info("✅✅✅ RUNNING IN LOCAL MODE (Desktop/PC) ✅✅✅")
|
71 |
+
logger.info(f"Models will be loaded from local directory: {BASE_MODELS_DIR}")
|
72 |
+
|
73 |
+
# Use local file paths
|
74 |
+
# Note: I've updated your paths to match the ones from the hf_hub_download for consistency.
|
75 |
+
# Please ensure your local folder structure matches these paths.
|
76 |
+
MODEL_PATHS = {
|
77 |
+
"mistral": os.path.join(BASE_MODELS_DIR, "mistral-7b-instruct-v0.2.Q4_K_M.gguf"),
|
78 |
+
"gemma": os.path.join(BASE_MODELS_DIR, "gemma-2b-it.Q4_K_M.gguf"),
|
79 |
+
"qwen": os.path.join(BASE_MODELS_DIR, "qwen1.5-1.8b-chat.Q4_K_M.gguf"),
|
80 |
+
}
|
81 |
+
# On local machine, use your GPU
|
82 |
+
N_GPU_LAYERS_FALLBACK = -1 # -1 means offload all to GPU
|
83 |
+
logger.info("N_GPU_LAYERS_FALLBACK set to -1 for local GPU acceleration.")
|
84 |
+
|
85 |
+
|
86 |
+
# --- Shared Configurations ---
|
87 |
+
|
88 |
+
MAX_RAM_MODELS_GB = 23.8
|
89 |
+
MAX_CONCURRENT_MODELS = 3
|
90 |
+
N_CTX_FALLBACK = 2048
|
91 |
+
N_THREADS_FALLBACK = 8
|
92 |
+
VERBOSE_LLAMA_CPP = True
|
93 |
+
|
94 |
+
MODEL_SPECIFIC_PARAMS = {
|
95 |
+
"mistral": { "chat_format": "mistral-instruct", "n_ctx": N_CTX_FALLBACK },
|
96 |
+
"gemma": { "chat_format": "gemma", "n_ctx": N_CTX_FALLBACK },
|
97 |
+
"qwen": { "chat_format": "chatml", "n_ctx": N_CTX_FALLBACK },
|
98 |
+
"_default": {
|
99 |
+
"f16_kv": True, "use_mmap": True, "use_mlock": False,
|
100 |
+
"verbose": VERBOSE_LLAMA_CPP,
|
101 |
+
"n_gpu_layers": N_GPU_LAYERS_FALLBACK, # This now uses the correct value for web or local
|
102 |
+
"n_threads": N_THREADS_FALLBACK,
|
103 |
+
"n_ctx": N_CTX_FALLBACK
|
104 |
+
}
|
105 |
+
}
|
106 |
+
|
107 |
+
INFERENCE_PRESETS = {
|
108 |
+
"balanced": {"temperature": 0.7, "top_p": 0.9, "top_k": 40, "repeat_penalty": 1.1, "mirostat_mode": 0, "max_tokens": 1024},
|
109 |
+
"precise": {"temperature": 0.2, "top_p": 0.7, "top_k": 20, "repeat_penalty": 1.05, "mirostat_mode": 0, "max_tokens": 1536},
|
110 |
+
"creative": {"temperature": 0.9, "top_p": 0.95, "top_k": 60, "repeat_penalty": 1.15, "mirostat_mode": 2, "mirostat_tau": 4.0, "mirostat_eta": 0.1, "max_tokens": 1024},
|
111 |
+
"passthrough": {}
|
112 |
+
}
|
113 |
+
DEFAULT_INFERENCE_PRESET = "balanced"
|
114 |
+
|
115 |
+
DEFAULT_SYSTEM_PROMPT = "You are ZOTHEOS, an ethical AI developed to help humanity. Provide clear, concise, and helpful responses. Be respectful and avoid harmful content."
|
116 |
+
SYSTEM_PERSONAS = {
|
117 |
+
"default": DEFAULT_SYSTEM_PROMPT, "helpful_assistant": "You are a helpful AI assistant. Your goal is to provide accurate and informative answers.",
|
118 |
+
"philosopher": "You are an AI philosopher. Engage with complex questions thoughtfully and explore different perspectives.",
|
119 |
+
"coder": "You are an expert AI programmer. Provide code examples and explain them clearly. Assume a senior developer audience.",
|
120 |
+
"concise_summarizer": "You are an AI tasked with providing very concise summaries. Get straight to the point. Use bullet points where appropriate.",
|
121 |
+
}
|
122 |
+
|
123 |
+
MODEL_ROLES = { "mistral": "analyst", "gemma": "humanist", "qwen": "skeptic" }
|
124 |
+
MODEL_ROLE_SYSTEM_PROMPTS = {
|
125 |
+
"analyst": "You are an impartial analyst. Focus on facts, clarity, and cause-effect logic. Provide structured, evidence-based reasoning.",
|
126 |
+
"humanist": "You are a human-centered assistant. Focus on emotion, empathy, ethical considerations, and the potential human impact or experience related to the query.",
|
127 |
+
"skeptic": "You are a critical evaluator and a respectful skeptic. Your role is to challenge assumptions, highlight potential risks, point out biases, and explore alternative or less obvious interpretations. Question the premises if necessary.",
|
128 |
+
"general": DEFAULT_SYSTEM_PROMPT
|
129 |
+
}
|
130 |
+
|
131 |
+
MODEL_WEIGHTS = { "mistral": 1.0, "gemma": 0.9, "qwen": 1.1 }
|
132 |
+
LOG_LEVEL = "INFO"
|
133 |
+
LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s'
|
134 |
+
|
135 |
+
ENV_VARS_TO_SET = {
|
136 |
+
"TRANSFORMERS_CACHE": CORE_DIRS_TO_VERIFY["cache_transformers"], "HF_HOME": CORE_DIRS_TO_VERIFY["cache_huggingface"],
|
137 |
+
"HF_HUB_CACHE": CORE_DIRS_TO_VERIFY["cache_hf_hub"], "TOKENIZERS_PARALLELISM": "false",
|
138 |
+
}
|
139 |
+
|
140 |
+
ZOTHEOS_VERSION = "Public Beta 1.4 (Web Enabled)"
|
141 |
+
|
142 |
+
logger.info(f"Config settings loaded. Version: {ZOTHEOS_VERSION}")
|
143 |
+
logger.info(f"APP_DIR: {APP_DIR} (Frozen: {_is_frozen}) | Web Mode: {IS_WEB_MODE}")
|
modules/knowledge_core_public.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
import asyncio
|
5 |
+
import traceback
|
6 |
+
from typing import List
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
# ✅ Define Knowledge Storage Path
|
11 |
+
KNOWLEDGE_FILE = os.path.expanduser("~/zotheos_public/zotheos_knowledge.json")
|
12 |
+
|
13 |
+
class KnowledgeCore:
|
14 |
+
"""🚀 Public Version of Knowledge Core (Simplified Storage and Retrieval)"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
"""✅ Initialize Knowledge Storage."""
|
18 |
+
self.knowledge_store = {}
|
19 |
+
self.load_knowledge()
|
20 |
+
|
21 |
+
def load_knowledge(self):
|
22 |
+
"""✅ Load Knowledge from JSON."""
|
23 |
+
try:
|
24 |
+
if os.path.exists(KNOWLEDGE_FILE):
|
25 |
+
with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as file:
|
26 |
+
self.knowledge_store = json.load(file)
|
27 |
+
logger.info(f"✅ Knowledge loaded from {KNOWLEDGE_FILE}")
|
28 |
+
else:
|
29 |
+
self.knowledge_store = {}
|
30 |
+
logger.info(f"✅ Created new knowledge store at {KNOWLEDGE_FILE}")
|
31 |
+
except Exception as e:
|
32 |
+
logger.warning(f"⚠️ Error loading knowledge: {e}")
|
33 |
+
self.knowledge_store = {}
|
34 |
+
|
35 |
+
def save_knowledge(self):
|
36 |
+
"""✅ Save Knowledge to JSON."""
|
37 |
+
try:
|
38 |
+
with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as file:
|
39 |
+
json.dump(self.knowledge_store, file, indent=4)
|
40 |
+
logger.info(f"✅ Knowledge saved to {KNOWLEDGE_FILE}")
|
41 |
+
except Exception as e:
|
42 |
+
logger.warning(f"⚠️ Error saving knowledge: {e}")
|
43 |
+
|
44 |
+
async def update(self, new_knowledge: str, category: str = "general") -> bool:
|
45 |
+
"""✅ Add New Knowledge to the Store."""
|
46 |
+
try:
|
47 |
+
knowledge_id = str(len(self.knowledge_store) + 1)
|
48 |
+
self.knowledge_store[knowledge_id] = {
|
49 |
+
'content': new_knowledge,
|
50 |
+
'category': category
|
51 |
+
}
|
52 |
+
self.save_knowledge()
|
53 |
+
logger.info(f"✅ Knowledge added with ID {knowledge_id}")
|
54 |
+
return True
|
55 |
+
except Exception as e:
|
56 |
+
logger.error(f"❌ Error updating knowledge: {traceback.format_exc()}")
|
57 |
+
return False
|
58 |
+
|
59 |
+
async def retrieve(self, query: str, n_results: int = 5) -> List[dict]:
|
60 |
+
"""🔎 Retrieve Relevant Knowledge Based on Simple Keyword Match."""
|
61 |
+
try:
|
62 |
+
matches = []
|
63 |
+
for knowledge_id, data in self.knowledge_store.items():
|
64 |
+
if query.lower() in data['content'].lower():
|
65 |
+
matches.append({
|
66 |
+
'id': knowledge_id,
|
67 |
+
'content': data['content'],
|
68 |
+
'category': data['category']
|
69 |
+
})
|
70 |
+
if len(matches) >= n_results:
|
71 |
+
break
|
72 |
+
logger.info(f"✅ Retrieved {len(matches)} matches for query '{query}'")
|
73 |
+
return matches
|
74 |
+
except Exception as e:
|
75 |
+
logger.error(f"❌ Error retrieving knowledge: {traceback.format_exc()}")
|
76 |
+
return []
|
77 |
+
|
78 |
+
async def reset(self) -> bool:
|
79 |
+
"""🗑️ Reset Knowledge Store (Delete All Stored Data)."""
|
80 |
+
try:
|
81 |
+
self.knowledge_store = {}
|
82 |
+
self.save_knowledge()
|
83 |
+
logger.info("✅ Knowledge store reset successfully.")
|
84 |
+
return True
|
85 |
+
except Exception as e:
|
86 |
+
logger.error(f"❌ Error resetting knowledge store: {traceback.format_exc()}")
|
87 |
+
return False
|
88 |
+
|
89 |
+
async def delete_knowledge(self, knowledge_id: str) -> bool:
|
90 |
+
"""🗑️ Delete Specific Knowledge Entry by ID."""
|
91 |
+
try:
|
92 |
+
if knowledge_id in self.knowledge_store:
|
93 |
+
del self.knowledge_store[knowledge_id]
|
94 |
+
self.save_knowledge()
|
95 |
+
logger.info(f"✅ Deleted knowledge ID: {knowledge_id}")
|
96 |
+
return True
|
97 |
+
else:
|
98 |
+
logger.warning(f"⚠️ Knowledge ID {knowledge_id} not found.")
|
99 |
+
return False
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f"❌ Error deleting knowledge ID {knowledge_id}: {traceback.format_exc()}")
|
102 |
+
return False
|
modules/main_fusion_public.py
ADDED
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FILE: modules/main_fusion_public.py (Finalized with Tier Logic, Async, Fusion Summary)
|
2 |
+
|
3 |
+
import asyncio
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
import time
|
8 |
+
import json
|
9 |
+
from typing import Dict, Any, Optional, List, Union
|
10 |
+
|
11 |
+
try:
|
12 |
+
# Correctly determine project_root assuming this file is in 'modules'
|
13 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
14 |
+
project_root = os.path.dirname(script_dir)
|
15 |
+
if project_root not in sys.path:
|
16 |
+
sys.path.insert(0, project_root)
|
17 |
+
except Exception as e_path:
|
18 |
+
# Basic logging if path setup fails, though critical
|
19 |
+
logging.basicConfig(level=logging.ERROR)
|
20 |
+
logging.error(f"Error setting up sys.path in main_fusion_public.py: {e_path}")
|
21 |
+
|
22 |
+
try:
|
23 |
+
from modules.config_settings_public import (
|
24 |
+
MODEL_PATHS, MAX_CONCURRENT_MODELS, MAX_RAM_MODELS_GB, # Used by ModelManager init
|
25 |
+
DEFAULT_SYSTEM_PROMPT, SYSTEM_PERSONAS, INFERENCE_PRESETS, DEFAULT_INFERENCE_PRESET,
|
26 |
+
MODEL_ROLES, MODEL_ROLE_SYSTEM_PROMPTS
|
27 |
+
)
|
28 |
+
from modules.model_manager_public import ModelManager
|
29 |
+
from modules.memory_bank_public import MemoryBank
|
30 |
+
from modules.user_auth import get_user_tier
|
31 |
+
except ImportError as e:
|
32 |
+
logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
33 |
+
logging.critical(f"CRITICAL IMPORT ERROR in main_fusion_public.py: {e}. ZOTHEOS may not function.", exc_info=True)
|
34 |
+
# Provide fallbacks for critical components if possible, or allow failure
|
35 |
+
# For now, if these fail, the __init__ will likely raise an error or log critical status.
|
36 |
+
# Making the application fail loudly is often better than silent dysfunction.
|
37 |
+
# Consider exiting if critical imports fail: sys.exit(f"Fatal Import Error in main_fusion_public.py: {e}")
|
38 |
+
|
39 |
+
# --- Start of FIX: Define LLAMA_CPP_AVAILABLE ---
|
40 |
+
try:
|
41 |
+
# Attempt to import the core Llama class from llama_cpp
|
42 |
+
from llama_cpp import Llama # You might also need LlamaCppError if you use it
|
43 |
+
LLAMA_CPP_AVAILABLE = True
|
44 |
+
# print("DEBUG: llama_cpp imported successfully, LLAMA_CPP_AVAILABLE=True") # Optional debug print
|
45 |
+
except ImportError:
|
46 |
+
LLAMA_CPP_AVAILABLE = False
|
47 |
+
# print("DEBUG: llama_cpp import failed, LLAMA_CPP_AVAILABLE=False") # Optional debug print
|
48 |
+
except Exception as e_llama_import: # Catch other potential errors during import
|
49 |
+
LLAMA_CPP_AVAILABLE = False
|
50 |
+
# print(f"DEBUG: An unexpected error occurred during llama_cpp import: {e_llama_import}, LLAMA_CPP_AVAILABLE=False") # Optional debug print
|
51 |
+
# --- End of FIX ---
|
52 |
+
|
53 |
+
logger = logging.getLogger("ZOTHEOS_MainFusion")
|
54 |
+
if not logger.handlers:
|
55 |
+
handler = logging.StreamHandler(sys.stdout)
|
56 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
57 |
+
handler.setFormatter(formatter)
|
58 |
+
logger.addHandler(handler)
|
59 |
+
logger.setLevel(logging.INFO)
|
60 |
+
|
61 |
+
|
62 |
+
class MainFusionPublic:
|
63 |
+
def __init__(self, device_preference: Optional[str] = "cuda"):
|
64 |
+
logger.info("🚀 ZOTHEOS MainFusion Initializing (Tier Logic, Async Fusion Summary Enabled)...")
|
65 |
+
|
66 |
+
self.config = {
|
67 |
+
"MODEL_ROLES": MODEL_ROLES,
|
68 |
+
"MODEL_ROLE_SYSTEM_PROMPTS": MODEL_ROLE_SYSTEM_PROMPTS,
|
69 |
+
"DEFAULT_SYSTEM_PROMPT": DEFAULT_SYSTEM_PROMPT,
|
70 |
+
"TIER_CONFIG": {
|
71 |
+
"free": {"model_limit": 2, "memory_enabled": False, "display_name": "Free Tier"},
|
72 |
+
"starter": {"model_limit": 3, "memory_enabled": True, "display_name": "Starter Tier"},
|
73 |
+
# Pro uses max models from MODEL_PATHS, ensuring it uses all available configured models
|
74 |
+
"pro": {"model_limit": len(MODEL_PATHS.keys()) if MODEL_PATHS else 3, "memory_enabled": True, "display_name": "Pro Tier"}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
self.models_last_queried_for_perspectives: List[str] = []
|
78 |
+
|
79 |
+
try:
|
80 |
+
# Pass global config values for ModelManager initialization
|
81 |
+
logger.info(f"Initializing ModelManager with device_preference='{device_preference}', max_count={MAX_CONCURRENT_MODELS}, max_ram_gb={MAX_RAM_MODELS_GB}...")
|
82 |
+
self.model_manager = ModelManager(
|
83 |
+
device_preference=device_preference,
|
84 |
+
max_model_count=MAX_CONCURRENT_MODELS,
|
85 |
+
max_ram_models_gb=MAX_RAM_MODELS_GB
|
86 |
+
)
|
87 |
+
# active_model_names_in_order is the master list of all models ZOTHEOS *could* use
|
88 |
+
self.active_model_names_in_order: List[str] = list(MODEL_PATHS.keys()) if MODEL_PATHS else []
|
89 |
+
logger.info(f"✅ MainFusion initialized. Max available models for fusion: {self.active_model_names_in_order}")
|
90 |
+
except Exception as e_mm_init:
|
91 |
+
logger.critical(f"❌ CRITICAL: ModelManager failed to initialize in MainFusion: {e_mm_init}", exc_info=True)
|
92 |
+
self.model_manager = None
|
93 |
+
self.active_model_names_in_order = []
|
94 |
+
|
95 |
+
try:
|
96 |
+
self.memory_bank = MemoryBank()
|
97 |
+
logger.info("✅ MemoryBank initialized.")
|
98 |
+
except Exception as e_mb_init:
|
99 |
+
logger.error(f"❌ MemoryBank failed to initialize: {e_mb_init}. Interactions may not be stored.", exc_info=True)
|
100 |
+
self.memory_bank = None
|
101 |
+
|
102 |
+
if not self.model_manager or not LLAMA_CPP_AVAILABLE: # Check if ModelManager itself or Llama backend failed
|
103 |
+
logger.critical("MainFusion started in a DEGRADED state: ModelManager or Llama.cpp backend is UNAVAILABLE.")
|
104 |
+
|
105 |
+
|
106 |
+
async def _get_single_model_response_direct(self, model_name: str, user_query: str, system_prompt_for_call: str, preset_name_for_call: str) -> Dict[str, Any]:
|
107 |
+
response_text = f"[Error: Model '{model_name}' generation did not complete or model unavailable]"
|
108 |
+
start_time_model = time.perf_counter()
|
109 |
+
status = "Model Error or Unavailable"
|
110 |
+
|
111 |
+
if not self.model_manager:
|
112 |
+
logger.error(f"[{model_name}] ModelManager not available for generation.")
|
113 |
+
return {"model": model_name, "text": "[Error: ModelManager is offline]", "time_ms": 0, "status": "ModelManager Offline"}
|
114 |
+
|
115 |
+
try:
|
116 |
+
logger.info(f"⚙️ [{model_name}] Calling ModelManager.generate_with_model. System Prompt: '{system_prompt_for_call[:50]}...'. User Query: '{user_query[:50].replace(chr(10),' ')}...'")
|
117 |
+
# generate_with_model handles actual call to Llama instance via async_generation_wrapper
|
118 |
+
response_text = await self.model_manager.generate_with_model(
|
119 |
+
model_name=model_name,
|
120 |
+
prompt=user_query,
|
121 |
+
preset_name=preset_name_for_call,
|
122 |
+
system_prompt=system_prompt_for_call
|
123 |
+
)
|
124 |
+
preview = response_text[:100].replace("\n", " ")
|
125 |
+
if response_text.startswith(("[Error:", "[No text generated", "[Malformed response")) or not response_text.strip():
|
126 |
+
logger.warning(f"⚠️ [{model_name}] Model returned an error string or empty content: {preview}...")
|
127 |
+
status = "Model Error or Empty Response"
|
128 |
+
else:
|
129 |
+
logger.info(f"✅ [{model_name}] Response received ({len(response_text)} chars): {preview}...")
|
130 |
+
status = "Success"
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"❌ [{model_name}] Exception in _get_single_model_response_direct: {e}", exc_info=True)
|
133 |
+
response_text = f"[Error: MainFusion encountered an exception during {model_name} generation: {type(e).__name__}]"
|
134 |
+
status = "MainFusion Exception"
|
135 |
+
inference_time_ms = (time.perf_counter() - start_time_model) * 1000
|
136 |
+
return {"model": model_name, "text": response_text.strip(), "time_ms": round(inference_time_ms, 2), "status": status}
|
137 |
+
|
138 |
+
async def _generate_true_fusion_summary(self, raw_responses_dict: Dict[str, str], original_query: str, models_actually_used: List[str]) -> str:
|
139 |
+
logger.info(f"Attempting to generate a true fusion summary from {len(models_actually_used)} perspective(s).")
|
140 |
+
summarizer_model_key = "gemma" # Default summarizer, ensure it's in MODEL_PATHS and loadable
|
141 |
+
|
142 |
+
valid_responses_for_summary = {
|
143 |
+
model: text for model, text in raw_responses_dict.items()
|
144 |
+
if model in models_actually_used and text and not text.startswith("[Error:")
|
145 |
+
}
|
146 |
+
|
147 |
+
if not valid_responses_for_summary: # No valid responses at all
|
148 |
+
logger.warning("No valid responses available to generate a fusion summary.")
|
149 |
+
return "A summary could not be generated as no valid perspectives were successfully gathered."
|
150 |
+
|
151 |
+
if len(valid_responses_for_summary) == 1:
|
152 |
+
single_model_name = list(valid_responses_for_summary.keys())[0]
|
153 |
+
logger.info(f"Only one valid perspective from {single_model_name}. Using it as the summary.")
|
154 |
+
return f"Based on the single available perspective from **{single_model_name.capitalize()}**:\n\n{list(valid_responses_for_summary.values())[0]}"
|
155 |
+
|
156 |
+
fusion_prompt = f"Original User Question: \"{original_query}\"\n\n"
|
157 |
+
fusion_prompt += "You are the Fusion Summary engine for ZOTHEOS. Your task is to read the multiple perspectives provided below and write a concise, balanced, and synthesized summary. Capture the most important points, common themes, and notable differences from all viewpoints. Your tone should be thoughtful, neutral, and respectful. Structure the summary clearly.\n\n"
|
158 |
+
|
159 |
+
for model_name, text in valid_responses_for_summary.items():
|
160 |
+
role = self.config["MODEL_ROLES"].get(model_name, "General")
|
161 |
+
fusion_prompt += f"--- PERSPECTIVE FROM {model_name.upper()} ({role.capitalize()}) ---\n{text}\n\n"
|
162 |
+
|
163 |
+
fusion_prompt += "--- SYNTHESIZED SUMMARY (Combine the perspectives above into a unified insight) ---\n"
|
164 |
+
|
165 |
+
logger.info(f"Calling summarizer model '{summarizer_model_key}' (from available: {self.active_model_names_in_order}) for fusion summary.")
|
166 |
+
|
167 |
+
if not self.model_manager:
|
168 |
+
logger.error("ModelManager not available for generating fusion summary.")
|
169 |
+
return "[Error: Summarizer service unavailable - ModelManager offline]"
|
170 |
+
|
171 |
+
try:
|
172 |
+
# The system prompt for the summarizer itself
|
173 |
+
summarizer_system_prompt = "You are an expert synthesis AI. Your role is to create a coherent and insightful summary from the provided texts."
|
174 |
+
|
175 |
+
summary_text = await self.model_manager.generate_with_model(
|
176 |
+
model_name=summarizer_model_key,
|
177 |
+
prompt=fusion_prompt, # The full context with all perspectives
|
178 |
+
preset_name="precise", # Use a precise preset for summarization
|
179 |
+
system_prompt=summarizer_system_prompt
|
180 |
+
)
|
181 |
+
if summary_text and not summary_text.startswith("[Error:"):
|
182 |
+
logger.info("✅ True fusion summary generated successfully.")
|
183 |
+
return summary_text.strip()
|
184 |
+
else:
|
185 |
+
logger.warning(f"Summarizer model '{summarizer_model_key}' returned an error or empty response: {summary_text}")
|
186 |
+
return "[Warning: Summary generation was partial or failed. Displaying raw perspectives.]"
|
187 |
+
except Exception as e:
|
188 |
+
logger.error(f"❌ Exception while generating true fusion summary with '{summarizer_model_key}': {e}", exc_info=True)
|
189 |
+
return f"[Error: The summary generation process failed. Exception: {type(e).__name__}]"
|
190 |
+
|
191 |
+
def _analyze_responses_basic(self, responses_dict: Dict[str, str], model_roles: Dict[str, str]) -> Dict[str, Any]:
|
192 |
+
valid_responses = {model: text for model, text in responses_dict.items() if text and not text.startswith("[Error:")}
|
193 |
+
consensus_points = []
|
194 |
+
if len(valid_responses) > 1: consensus_points.append("Multiple perspectives were gathered and synthesized.")
|
195 |
+
elif len(valid_responses) == 1: consensus_points.append("A single primary perspective was available for synthesis.")
|
196 |
+
else: consensus_points.append("No valid primary perspectives were available for synthesis.")
|
197 |
+
return {"consensus_points": consensus_points, "contradictions": [], "unique_insights": valid_responses}
|
198 |
+
|
199 |
+
def _synthesize_fusion_response(self, analysis_result: dict, model_roles: dict, raw_responses_dict: dict, final_summary_text: str, models_used_for_perspectives: List[str]) -> str:
|
200 |
+
response_parts = []
|
201 |
+
|
202 |
+
response_parts.append("## ✨ ZOTHEOS Final Synthesized Insight ✨")
|
203 |
+
response_parts.append(final_summary_text if final_summary_text and not final_summary_text.startswith(("[Error:", "[Warning:")) else "*Synthesis process encountered an issue or no summary was generated. Please see detailed perspectives below.*")
|
204 |
+
response_parts.append("\n---\n")
|
205 |
+
|
206 |
+
response_parts.append("### 💬 Detailed Individual Perspectives")
|
207 |
+
has_any_valid_perspectives = False
|
208 |
+
for model_name in models_used_for_perspectives:
|
209 |
+
text = raw_responses_dict.get(model_name)
|
210 |
+
role = model_roles.get(model_name, "General") # Default role
|
211 |
+
response_parts.append(f"**Perspective from {model_name.capitalize()} ({role.capitalize()}):**")
|
212 |
+
if text and not text.startswith("[Error:"):
|
213 |
+
response_parts.append(text.strip())
|
214 |
+
has_any_valid_perspectives = True
|
215 |
+
else:
|
216 |
+
response_parts.append(f"*{text if text else '[No response or error from this model.]'}*") # Display error or placeholder
|
217 |
+
response_parts.append("")
|
218 |
+
if not has_any_valid_perspectives and not (final_summary_text and not final_summary_text.startswith(("[Error:", "[Warning:"))):
|
219 |
+
# If summary also failed/empty and no valid individual perspectives.
|
220 |
+
response_parts = ["## ⚠️ ZOTHEOS Alert\n\nUnfortunately, ZOTHEOS encountered issues processing your query with all available AI cores for your tier. No insights could be gathered at this time. Please try rephrasing your query or try again later."]
|
221 |
+
elif not has_any_valid_perspectives : # Summary might be there, but no individual details.
|
222 |
+
response_parts.append("*No valid individual perspectives were successfully retrieved to display in detail.*")
|
223 |
+
|
224 |
+
|
225 |
+
return "\n".join(response_parts).strip()
|
226 |
+
|
227 |
+
async def process_query_with_fusion(
|
228 |
+
self,
|
229 |
+
query: str,
|
230 |
+
user_token: Optional[str] = None,
|
231 |
+
persona_key: Optional[str] = None,
|
232 |
+
fusion_mode_override: str = "balanced",
|
233 |
+
**kwargs
|
234 |
+
) -> str:
|
235 |
+
process_start_time = time.time()
|
236 |
+
current_tier_name = get_user_tier(user_token if user_token else "")
|
237 |
+
tier_settings = self.config["TIER_CONFIG"].get(current_tier_name, self.config["TIER_CONFIG"]["free"])
|
238 |
+
tier_model_limit = tier_settings["model_limit"]
|
239 |
+
tier_memory_enabled = tier_settings["memory_enabled"]
|
240 |
+
logger.info(f"User Tier: '{current_tier_name}' ({tier_settings['display_name']}). Model Limit: {tier_model_limit}, Memory: {'Enabled' if tier_memory_enabled else 'Disabled'}.")
|
241 |
+
|
242 |
+
if not self.model_manager or not LLAMA_CPP_AVAILABLE: return "[Error: ZOTHEOS Core Model Manager not ready or Llama.cpp backend unavailable.]"
|
243 |
+
if not self.active_model_names_in_order: return "[Error: ZOTHEOS Core not ready. No models configured in MODEL_PATHS.]"
|
244 |
+
if not query or not query.strip(): return "[Error: Query is empty. Please provide a question or topic.]"
|
245 |
+
|
246 |
+
current_query_text = query
|
247 |
+
current_preset_name = fusion_mode_override if fusion_mode_override in INFERENCE_PRESETS else DEFAULT_INFERENCE_PRESET
|
248 |
+
base_persona_prompt = SYSTEM_PERSONAS.get(persona_key or "default", self.config["DEFAULT_SYSTEM_PROMPT"])
|
249 |
+
|
250 |
+
# Determine actual models to use based on tier limit and availability
|
251 |
+
models_to_use_for_perspectives = [m for m in self.active_model_names_in_order if m in MODEL_PATHS][:tier_model_limit]
|
252 |
+
self.models_last_queried_for_perspectives = models_to_use_for_perspectives # For status report
|
253 |
+
|
254 |
+
if not models_to_use_for_perspectives:
|
255 |
+
logger.error(f"No models available for tier '{current_tier_name}' after applying limit of {tier_model_limit}.")
|
256 |
+
return f"[Error: No models available for your current tier ('{current_tier_name}').]"
|
257 |
+
|
258 |
+
logger.info(f"🔎 Processing query. Models for perspectives (Tier: {current_tier_name}): {models_to_use_for_perspectives}. Preset: '{current_preset_name}'. Query: '{current_query_text[:60].replace(chr(10),' ')}...'")
|
259 |
+
|
260 |
+
raw_responses_dict: Dict[str, str] = {}
|
261 |
+
individual_results_for_memory: List[Dict[str, Any]] = []
|
262 |
+
successful_responses = 0
|
263 |
+
|
264 |
+
for model_name in models_to_use_for_perspectives:
|
265 |
+
model_role = self.config["MODEL_ROLES"].get(model_name, "general")
|
266 |
+
system_prompt_for_model = self.config["MODEL_ROLE_SYSTEM_PROMPTS"].get(model_role, base_persona_prompt)
|
267 |
+
query_for_this_model = current_query_text
|
268 |
+
if model_name.lower() == "gemma" and system_prompt_for_model:
|
269 |
+
query_for_this_model = f"<start_of_turn>user\n{system_prompt_for_model.strip()}\n{current_query_text}<end_of_turn>\n<start_of_turn>model\n"
|
270 |
+
system_prompt_for_model = ""
|
271 |
+
model_output_data = await self._get_single_model_response_direct(model_name, query_for_this_model, system_prompt_for_model, current_preset_name)
|
272 |
+
individual_results_for_memory.append(model_output_data)
|
273 |
+
raw_responses_dict[model_name] = model_output_data.get("text", "[Error: No text field in response data]")
|
274 |
+
if model_output_data.get("status") == "Success":
|
275 |
+
successful_responses += 1
|
276 |
+
|
277 |
+
synthesized_summary_text = await self._generate_true_fusion_summary(raw_responses_dict, current_query_text, models_to_use_for_perspectives)
|
278 |
+
analysis_result = self._analyze_responses_basic(raw_responses_dict, self.config["MODEL_ROLES"]) # Basic analysis for now
|
279 |
+
final_fused_output_content = self._synthesize_fusion_response(analysis_result, self.config["MODEL_ROLES"], raw_responses_dict, synthesized_summary_text, models_to_use_for_perspectives)
|
280 |
+
|
281 |
+
persona_display = (persona_key or "default").capitalize()
|
282 |
+
mode_display = current_preset_name.capitalize()
|
283 |
+
tier_display_name = tier_settings.get("display_name", current_tier_name.capitalize())
|
284 |
+
final_header = f"## 🧠 ZOTHEOS Fused Perspectives 🧠\n*(Fusion Mode: {mode_display} | Persona: {persona_display} | Tier: {tier_display_name})*\n\n"
|
285 |
+
final_fused_output = final_header + final_fused_output_content
|
286 |
+
|
287 |
+
if successful_responses == 0 and not "[Error:" in final_fused_output_content and not "[Warning:" in final_fused_output_content:
|
288 |
+
logger.error(f"All models ({len(models_to_use_for_perspectives)}) failed for tier '{current_tier_name}'.")
|
289 |
+
final_fused_output = final_header + "[Critical Error: ZOTHEOS was unable to obtain any valid responses from its AI cores for this query.]\n\n" + final_fused_output_content.split("\n\n",1)[-1]
|
290 |
+
|
291 |
+
if tier_memory_enabled:
|
292 |
+
if self.memory_bank:
|
293 |
+
try:
|
294 |
+
memory_metadata = {
|
295 |
+
"user_token_used_prefix": user_token[:3] + "***" if user_token and len(user_token) > 3 else "N/A (No Token)" if not user_token else user_token,
|
296 |
+
"tier_at_interaction": current_tier_name,
|
297 |
+
"persona_key": persona_key or "default", "fusion_mode_used": current_preset_name,
|
298 |
+
# timestamp_iso is now added within store_memory_async
|
299 |
+
"duration_seconds": round(time.time() - process_start_time, 3),
|
300 |
+
"active_models_queried": models_to_use_for_perspectives, # Models actually used for perspectives
|
301 |
+
"individual_model_outputs": individual_results_for_memory, # Detailed dicts
|
302 |
+
"synthesized_summary_text": synthesized_summary_text, # The AI-generated summary
|
303 |
+
"fused_response_length_chars": len(final_fused_output),
|
304 |
+
"successful_model_responses": successful_responses,
|
305 |
+
"total_models_queried": len(models_to_use_for_perspectives)
|
306 |
+
}
|
307 |
+
await self.memory_bank.store_memory_async(query=current_query_text, response=final_fused_output, metadata=memory_metadata)
|
308 |
+
except Exception as e_mem: logger.error(f"Failed to store fusion interaction in MemoryBank (Tier: '{current_tier_name}'): {e_mem}", exc_info=True)
|
309 |
+
else: logger.warning(f"MemoryBank not initialized. Skipping storage (Tier: '{current_tier_name}').")
|
310 |
+
else: logger.info(f"Memory storage disabled for tier '{current_tier_name}'. Skipping storage.")
|
311 |
+
|
312 |
+
total_processing_time = round(time.time() - process_start_time, 2)
|
313 |
+
logger.info(f"🧠 Fusion complete in {total_processing_time}s. Output len: {len(final_fused_output)}. Models used: {len(models_to_use_for_perspectives)} (Tier: {current_tier_name}).")
|
314 |
+
return final_fused_output
|
315 |
+
|
316 |
+
async def get_status_report(self) -> Dict[str, Any]:
|
317 |
+
report: Dict[str, Any] = {
|
318 |
+
"status": "Full Multi-Model Fusion Mode Active",
|
319 |
+
"fusion_engine_status": "Online" if self.model_manager and self.active_model_names_in_order and LLAMA_CPP_AVAILABLE else "Degraded/Offline",
|
320 |
+
"all_available_models": self.active_model_names_in_order,
|
321 |
+
"models_last_queried_for_perspectives": getattr(self, 'models_last_queried_for_perspectives', []),
|
322 |
+
"model_manager_status": "Online" if self.model_manager else "Offline/Init Failed",
|
323 |
+
"llama_cpp_backend_available": LLAMA_CPP_AVAILABLE,
|
324 |
+
"memory_bank_status": "Online" if self.memory_bank else "Offline/Init Failed"
|
325 |
+
}
|
326 |
+
if self.model_manager and hasattr(self.model_manager, 'get_loaded_model_stats'):
|
327 |
+
try: report["model_manager_runtime_stats"] = self.model_manager.get_loaded_model_stats()
|
328 |
+
except Exception as e: report["model_manager_runtime_stats"] = f"Error getting MM stats: {e}"
|
329 |
+
else: report["model_manager_runtime_stats"] = "ModelManager N/A for stats."
|
330 |
+
|
331 |
+
if self.memory_bank and hasattr(self.memory_bank, 'get_memory_stats'):
|
332 |
+
try: report["memory_bank_stats"] = await self.memory_bank.get_memory_stats()
|
333 |
+
except Exception as e: report["memory_bank_stats"] = f"Error getting MB stats: {e}"
|
334 |
+
else: report["memory_bank_stats"] = "MemoryBank N/A for stats."
|
335 |
+
return report
|
336 |
+
|
337 |
+
if __name__ == "__main__":
|
338 |
+
if os.getenv("ZOTHEOS_DEBUG_DEPS", "false").lower() != "true":
|
339 |
+
for lib_logger_name in ["torch", "huggingface_hub", "psutil", "llama_cpp", "httpx", "PIL"]: logging.getLogger(lib_logger_name).setLevel(logging.WARNING)
|
340 |
+
logger.setLevel(logging.DEBUG)
|
341 |
+
logger.info("--- MainFusionPublic (Tier Logic & Async Summary) CLI Test ---")
|
342 |
+
async def run_main_fusion_cli_test_with_token(test_token=None, token_desc="Default (Free Tier)"):
|
343 |
+
main_fusion_instance: Optional[MainFusionPublic] = None
|
344 |
+
logger.info(f"\n--- Testing with token: '{token_desc}' ---")
|
345 |
+
try:
|
346 |
+
main_fusion_instance = MainFusionPublic(device_preference="cuda")
|
347 |
+
if not main_fusion_instance.model_manager or not main_fusion_instance.active_model_names_in_order or not LLAMA_CPP_AVAILABLE:
|
348 |
+
logger.critical("CLI Test Aborted: MainFusion init failed (MM or LlamaCPP unavailable)."); return
|
349 |
+
test_query = "What are the core principles of Stoicism and how can they be applied in modern life?"
|
350 |
+
logger.info(f"CLI Test: Querying (Token: {test_token[:3] + '...' if test_token and len(test_token)>3 else test_token}): '{test_query}'")
|
351 |
+
response = await main_fusion_instance.process_query_with_fusion(query=test_query, user_token=test_token, persona_key="philosopher", fusion_mode_override="balanced")
|
352 |
+
print("\n" + "="*25 + f" CLI Test Response ({token_desc}) " + "="*25); print(response); print("="* (50 + len(f" CLI Test Response ({token_desc}) ") + 2))
|
353 |
+
status = await main_fusion_instance.get_status_report()
|
354 |
+
print("\nSystem Status Report After Query:"); print(json.dumps(status, indent=2, default=str))
|
355 |
+
except Exception as e: logger.critical(f"Error during CLI test ({token_desc}): {e}", exc_info=True); print(f"🚨 CLI Test Error ({token_desc}): {e}")
|
356 |
+
finally:
|
357 |
+
if main_fusion_instance and main_fusion_instance.model_manager and hasattr(main_fusion_instance.model_manager, 'shutdown'):
|
358 |
+
logger.info(f"CLI Test ({token_desc}): Shutting down ModelManager...")
|
359 |
+
main_fusion_instance.model_manager.shutdown() # This is synchronous
|
360 |
+
logger.info(f"🛑 MainFusion CLI test ({token_desc}) shutdown.")
|
361 |
+
async def run_all_cli_tests():
|
362 |
+
# Ensure stripe_users.json is in project root for this test to work with tokens
|
363 |
+
tokens_to_test = {
|
364 |
+
None: "No Token (Defaults to Free)",
|
365 |
+
"TOKEN_FOR_FREE_TEST": "Free Tier Token", # Add this to your stripe_users.json
|
366 |
+
"TOKEN_FOR_STARTER_TEST": "Starter Tier Token", # Add this
|
367 |
+
"TOKEN_FOR_PRO_TEST": "Pro Tier Token" # Add this
|
368 |
+
}
|
369 |
+
# Create dummy stripe_users.json if not exists for test
|
370 |
+
if not os.path.exists(os.path.join(project_root, "stripe_users.json")):
|
371 |
+
logger.warning("Creating dummy stripe_users.json for CLI test.")
|
372 |
+
dummy_users = {t: t.split('_')[1].lower() for t in tokens_to_test if t} # type: ignore
|
373 |
+
with open(os.path.join(project_root, "stripe_users.json"), "w") as f:
|
374 |
+
json.dump(dummy_users, f, indent=2)
|
375 |
+
|
376 |
+
for token, desc in tokens_to_test.items():
|
377 |
+
await run_main_fusion_cli_test_with_token(token, desc)
|
378 |
+
|
379 |
+
asyncio.run(run_all_cli_tests())
|
modules/memory_bank_public.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FILE: modules/memory_bank_public.py (Finalized for Beta Checklist & Async Correctness)
|
2 |
+
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
import time
|
6 |
+
import asyncio
|
7 |
+
import logging
|
8 |
+
import traceback # For more detailed error logging if needed
|
9 |
+
from typing import List, Dict, Optional, Any
|
10 |
+
from json import JSONDecodeError
|
11 |
+
from datetime import datetime, timezone # For ISO timestamps
|
12 |
+
from pathlib import Path # For home directory in export
|
13 |
+
import sys # For logger setup if run standalone
|
14 |
+
|
15 |
+
logger = logging.getLogger("ZOTHEOS_MemoryBank")
|
16 |
+
if not logger.handlers:
|
17 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
18 |
+
handler = logging.StreamHandler(sys.stdout)
|
19 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
20 |
+
handler.setFormatter(formatter)
|
21 |
+
logger.addHandler(handler)
|
22 |
+
logger.setLevel(logging.INFO)
|
23 |
+
|
24 |
+
# --- Define memory file paths and limits ---
|
25 |
+
try:
|
26 |
+
# Path logic for when memory_bank_public.py is in 'modules' folder
|
27 |
+
# and data directory is in project root ('../zotheos_public_data')
|
28 |
+
current_file_dir = os.path.dirname(os.path.abspath(__file__))
|
29 |
+
project_root_dir = os.path.dirname(current_file_dir) # This is ZOTHEOS_Release_Package
|
30 |
+
|
31 |
+
# Check if running from PyInstaller bundle
|
32 |
+
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
|
33 |
+
# If bundled, data directory is relative to sys._MEIPASS (the temp extraction folder)
|
34 |
+
DATA_BASE_DIR = os.path.join(sys._MEIPASS, "zotheos_public_data")
|
35 |
+
else:
|
36 |
+
# If running as script, data directory is relative to project root
|
37 |
+
DATA_BASE_DIR = os.path.join(project_root_dir, "zotheos_public_data")
|
38 |
+
|
39 |
+
if not os.path.exists(DATA_BASE_DIR):
|
40 |
+
os.makedirs(DATA_BASE_DIR, exist_ok=True)
|
41 |
+
logger.info(f"Created data base directory: {DATA_BASE_DIR}")
|
42 |
+
|
43 |
+
MEMORY_DIR = os.path.join(DATA_BASE_DIR, "zotheos_memory")
|
44 |
+
if not os.path.exists(MEMORY_DIR):
|
45 |
+
os.makedirs(MEMORY_DIR, exist_ok=True)
|
46 |
+
logger.info(f"Created memory directory: {MEMORY_DIR}")
|
47 |
+
|
48 |
+
except Exception as e:
|
49 |
+
logger.critical(f"❌ Failed to setup base/memory directory for MemoryBank: {e}. Using fallback in user home.")
|
50 |
+
MEMORY_DIR = os.path.join(os.path.expanduser("~"), ".zotheos", "zotheos_memory")
|
51 |
+
os.makedirs(MEMORY_DIR, exist_ok=True)
|
52 |
+
|
53 |
+
|
54 |
+
MEMORY_FILE_PATH = os.path.join(MEMORY_DIR, "zotheos_memory.json")
|
55 |
+
MEMORY_FILE_TMP = os.path.join(MEMORY_DIR, "zotheos_memory_tmp.json")
|
56 |
+
MEMORY_SIZE_LIMIT = 1000
|
57 |
+
MEMORY_SCHEMA_VERSION = 1.2 # Bumped for metadata structure in entry
|
58 |
+
|
59 |
+
class MemoryBank:
|
60 |
+
def __init__(self):
|
61 |
+
self.memory_list: List[Dict[str, Any]] = []
|
62 |
+
self.memory_dict: Dict[str, Dict[str, Any]] = {} # For ID-based lookups, ID should be string
|
63 |
+
self.next_id = 0
|
64 |
+
logger.info(f"🧠 Initializing Memory Bank. Memory file: {MEMORY_FILE_PATH}")
|
65 |
+
self._load_memory() # Load initial state
|
66 |
+
|
67 |
+
if self.memory_list:
|
68 |
+
try:
|
69 |
+
# Ensure IDs are treated as integers for max() if they are numeric strings
|
70 |
+
numeric_ids = [int(m.get('id', -1)) for m in self.memory_list if str(m.get('id', '')).isdigit()]
|
71 |
+
if numeric_ids:
|
72 |
+
max_id = max(numeric_ids)
|
73 |
+
self.next_id = max(max_id + 1, len(self.memory_list))
|
74 |
+
else: # No numeric IDs found
|
75 |
+
self.next_id = len(self.memory_list)
|
76 |
+
except ValueError: # Fallback if conversion to int fails for some reason
|
77 |
+
self.next_id = len(self.memory_list)
|
78 |
+
logger.info(f"Loaded {len(self.memory_list)} memories. Next ID set to {self.next_id}.")
|
79 |
+
else:
|
80 |
+
logger.info("Initialized with empty memory.")
|
81 |
+
|
82 |
+
def _reset_memory_state(self):
|
83 |
+
self.memory_list = []
|
84 |
+
self.memory_dict = {}
|
85 |
+
self.next_id = 0
|
86 |
+
logger.info("Memory state has been reset.")
|
87 |
+
|
88 |
+
def _load_memory(self):
|
89 |
+
logger.info(f"Attempting to load memory from {MEMORY_FILE_PATH}...")
|
90 |
+
try:
|
91 |
+
if os.path.exists(MEMORY_FILE_PATH):
|
92 |
+
with open(MEMORY_FILE_PATH, "r", encoding="utf-8") as file:
|
93 |
+
data = json.load(file)
|
94 |
+
if isinstance(data, dict) and "entries" in data and isinstance(data["entries"], list):
|
95 |
+
self.memory_list = data["entries"]
|
96 |
+
# Rebuild dictionary, ensuring IDs are strings for keys
|
97 |
+
self.memory_dict = {str(m['id']): m for m in self.memory_list if m.get('id') is not None}
|
98 |
+
logger.info(f"✅ Successfully loaded {len(self.memory_list)} memory entries (schema version: {data.get('schema_version', 'Unknown')}).")
|
99 |
+
elif isinstance(data, list):
|
100 |
+
logger.warning(f"Old memory format (list) detected. Converting and saving in new format.")
|
101 |
+
self.memory_list = data
|
102 |
+
self.memory_dict = {str(m.get('id','')): m for m in self.memory_list if m.get('id') is not None}
|
103 |
+
self._save_memory() # Save immediately in new format
|
104 |
+
else:
|
105 |
+
logger.warning(f"⚠️ Memory file {MEMORY_FILE_PATH} has an unexpected main structure. Resetting memory.")
|
106 |
+
self._reset_memory_state()
|
107 |
+
else:
|
108 |
+
logger.info(f"✅ No existing memory file found at {MEMORY_FILE_PATH}. Starting fresh.")
|
109 |
+
self._reset_memory_state()
|
110 |
+
except JSONDecodeError as e:
|
111 |
+
logger.error(f"❌ Error decoding JSON from memory file {MEMORY_FILE_PATH}: {e}. File might be corrupted. Resetting memory.", exc_info=False)
|
112 |
+
self._handle_corrupted_memory_file()
|
113 |
+
except FileNotFoundError:
|
114 |
+
logger.info(f"✅ Memory file {MEMORY_FILE_PATH} not found. Starting fresh (FileNotFound).")
|
115 |
+
self._reset_memory_state()
|
116 |
+
except Exception as e:
|
117 |
+
logger.error(f"❌ Unexpected error loading memory: {e}. Resetting memory.", exc_info=True)
|
118 |
+
self._reset_memory_state()
|
119 |
+
|
120 |
+
def _handle_corrupted_memory_file(self):
|
121 |
+
self._reset_memory_state()
|
122 |
+
if os.path.exists(MEMORY_FILE_PATH):
|
123 |
+
try:
|
124 |
+
corrupt_backup_path = f"{MEMORY_FILE_PATH}.corrupt_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
125 |
+
os.rename(MEMORY_FILE_PATH, corrupt_backup_path)
|
126 |
+
logger.info(f"Backed up corrupted memory file to {corrupt_backup_path}")
|
127 |
+
except OSError as backup_err:
|
128 |
+
logger.error(f"Failed to backup corrupted memory file: {backup_err}")
|
129 |
+
|
130 |
+
def _save_memory(self): # This is synchronous
|
131 |
+
logger.debug(f"Attempting atomic save to {MEMORY_FILE_PATH}...")
|
132 |
+
try:
|
133 |
+
data_to_save = {"schema_version": MEMORY_SCHEMA_VERSION, "entries": self.memory_list}
|
134 |
+
with open(MEMORY_FILE_TMP, "w", encoding="utf-8") as file:
|
135 |
+
json.dump(data_to_save, file, indent=2)
|
136 |
+
os.replace(MEMORY_FILE_TMP, MEMORY_FILE_PATH)
|
137 |
+
logger.info(f"✅ Memory saved successfully ({len(self.memory_list)} entries).")
|
138 |
+
except Exception as e:
|
139 |
+
logger.error(f"❌ Error saving memory: {e}", exc_info=True)
|
140 |
+
if os.path.exists(MEMORY_FILE_TMP):
|
141 |
+
try: os.remove(MEMORY_FILE_TMP)
|
142 |
+
except OSError: pass
|
143 |
+
finally:
|
144 |
+
if os.path.exists(MEMORY_FILE_TMP):
|
145 |
+
try: os.remove(MEMORY_FILE_TMP)
|
146 |
+
except OSError as e_rem: logger.warning(f"Could not remove temp memory file {MEMORY_FILE_TMP}: {e_rem}")
|
147 |
+
|
148 |
+
async def save_memory_async(self):
|
149 |
+
logger.debug("Scheduling asynchronous memory save...")
|
150 |
+
await asyncio.to_thread(self._save_memory)
|
151 |
+
|
152 |
+
async def store_memory_async(self, query: str, response: str, metadata: Optional[Dict[str, Any]] = None):
|
153 |
+
if not query or not response:
|
154 |
+
logger.warning("⚠️ Attempted to store empty query or response. Skipping.")
|
155 |
+
return
|
156 |
+
try:
|
157 |
+
current_id_num = self.next_id
|
158 |
+
current_metadata = metadata.copy() if metadata is not None else {} # Work with a copy
|
159 |
+
|
160 |
+
# Ensure a proper ISO-formatted timestamp is in metadata
|
161 |
+
current_metadata['timestamp_iso'] = datetime.now(timezone.utc).isoformat()
|
162 |
+
|
163 |
+
memory_entry = {
|
164 |
+
'id': str(current_id_num),
|
165 |
+
'query': query,
|
166 |
+
'response': response, # This is the full_fused_output from main_fusion
|
167 |
+
'created_at_unix': time.time(), # Retain for fallback sorting
|
168 |
+
'schema_version': MEMORY_SCHEMA_VERSION,
|
169 |
+
'metadata': current_metadata # This now contains timestamp_iso and other details
|
170 |
+
}
|
171 |
+
self.next_id += 1
|
172 |
+
self.memory_list.append(memory_entry)
|
173 |
+
self.memory_dict[str(current_id_num)] = memory_entry
|
174 |
+
logger.info(f"Stored memory entry ID {current_id_num}.")
|
175 |
+
|
176 |
+
removed_count = 0
|
177 |
+
while len(self.memory_list) > MEMORY_SIZE_LIMIT:
|
178 |
+
oldest_memory = self.memory_list.pop(0)
|
179 |
+
oldest_id = str(oldest_memory.get('id', ''))
|
180 |
+
if oldest_id and oldest_id in self.memory_dict: del self.memory_dict[oldest_id]
|
181 |
+
removed_count += 1
|
182 |
+
if removed_count > 0: logger.info(f"Removed {removed_count} oldest entries for size limit.")
|
183 |
+
await self.save_memory_async()
|
184 |
+
except Exception as e: logger.error(f"❌ Error storing memory entry: {e}", exc_info=True)
|
185 |
+
|
186 |
+
async def retrieve_recent_memories_async(self, limit: int = 5) -> List[Dict[str, Any]]:
|
187 |
+
logger.debug(f"Retrieving up to {limit} recent memories, sorted.")
|
188 |
+
if not self.memory_list: return []
|
189 |
+
try:
|
190 |
+
def get_sort_key(entry):
|
191 |
+
ts_iso = entry.get('metadata', {}).get('timestamp_iso')
|
192 |
+
# Fallback to created_at_unix if timestamp_iso is missing or unparsable
|
193 |
+
if ts_iso:
|
194 |
+
try: return datetime.fromisoformat(ts_iso.replace('Z', '+00:00'))
|
195 |
+
except ValueError:
|
196 |
+
logger.warning(f"Could not parse timestamp_iso '{ts_iso}' for entry ID {entry.get('id')}. Falling back to created_at_unix.")
|
197 |
+
pass # Fall through to use created_at_unix
|
198 |
+
return datetime.fromtimestamp(entry.get('created_at_unix', 0), timezone.utc)
|
199 |
+
|
200 |
+
# Make a copy for sorting to avoid modifying self.memory_list if other operations occur
|
201 |
+
sorted_entries = sorted(list(self.memory_list), key=get_sort_key, reverse=True)
|
202 |
+
|
203 |
+
actual_limit = max(0, min(limit, len(sorted_entries)))
|
204 |
+
recent_sorted_memories = sorted_entries[:actual_limit]
|
205 |
+
logger.info(f"Retrieved {len(recent_sorted_memories)} recent memories (sorted).")
|
206 |
+
return recent_sorted_memories
|
207 |
+
except Exception as e:
|
208 |
+
logger.error(f"❌ Error retrieving and sorting recent memories: {e}", exc_info=True)
|
209 |
+
# Fallback to unsorted last N if sorting fails
|
210 |
+
return self.memory_list[-limit:][::-1] if limit > 0 and self.memory_list else []
|
211 |
+
|
212 |
+
def load_all_memory_entries_structured(self) -> List[Dict[str, Any]]:
|
213 |
+
logger.info(f"Loading all {len(self.memory_list)} memory entries (structured).")
|
214 |
+
return list(self.memory_list)
|
215 |
+
|
216 |
+
async def load_all_memory_entries_structured_async(self) -> List[Dict[str, Any]]:
|
217 |
+
logger.info(f"Asynchronously loading all {len(self.memory_list)} memory entries (structured).")
|
218 |
+
return list(self.memory_list) # For now, direct copy is fine as it's in-memory
|
219 |
+
|
220 |
+
async def retrieve_memory_by_id(self, memory_id: str) -> Optional[Dict[str, Any]]:
|
221 |
+
memory_id_str = str(memory_id)
|
222 |
+
try:
|
223 |
+
memory = self.memory_dict.get(memory_id_str)
|
224 |
+
if memory: logger.info(f"Retrieved memory entry ID {memory_id_str}.")
|
225 |
+
else: logger.warning(f"Memory ID {memory_id_str} not found in dictionary.")
|
226 |
+
return memory
|
227 |
+
except Exception as e: logger.error(f"❌ Error retrieving memory ID {memory_id_str}: {e}", exc_info=True); return None
|
228 |
+
|
229 |
+
async def retrieve_last_response(self) -> Optional[str]:
|
230 |
+
if not self.memory_list: logger.info("Retrieve last response: Memory list is empty."); return None
|
231 |
+
try:
|
232 |
+
last_entry = self.memory_list[-1]; response = last_entry.get('response')
|
233 |
+
if isinstance(response, str) and response.strip(): logger.info("Retrieve last response: Found valid response."); return response
|
234 |
+
else: logger.warning(f"Retrieve last response: Last entry (ID {last_entry.get('id', 'N/A')}) has empty response."); return None
|
235 |
+
except Exception as e: logger.error(f"❌ Error retrieving last response: {e}", exc_info=True); return None
|
236 |
+
|
237 |
+
async def clear_all_memory(self):
|
238 |
+
logger.warning("Initiating complete memory wipe...")
|
239 |
+
try: self._reset_memory_state(); await self.save_memory_async(); logger.info("✅ All memory cleared successfully."); return True
|
240 |
+
except Exception as e: logger.error(f"❌ Error clearing memory: {e}", exc_info=True); return False
|
241 |
+
|
242 |
+
async def delete_memory_by_id(self, memory_id: str):
|
243 |
+
logger.warning(f"Attempting to delete memory ID {memory_id}...")
|
244 |
+
memory_id_str = str(memory_id)
|
245 |
+
try:
|
246 |
+
if memory_id_str in self.memory_dict:
|
247 |
+
del self.memory_dict[memory_id_str]
|
248 |
+
self.memory_list = [m for m in self.memory_list if str(m.get('id', '')) != memory_id_str]
|
249 |
+
await self.save_memory_async()
|
250 |
+
logger.info(f"✅ Memory with ID {memory_id_str} deleted successfully.")
|
251 |
+
return True
|
252 |
+
else:
|
253 |
+
logger.warning(f"⚠️ Memory ID {memory_id_str} not found for deletion.")
|
254 |
+
return False
|
255 |
+
except Exception as e:
|
256 |
+
logger.error(f"❌ Error deleting memory ID {memory_id_str}: {e}", exc_info=True)
|
257 |
+
return False
|
258 |
+
|
259 |
+
async def get_memory_stats(self) -> Dict[str, Any]: # This is now async
|
260 |
+
logger.info("Calculating memory statistics...")
|
261 |
+
stats: Dict[str, Any] = {'total_entries': len(self.memory_list), 'disk_usage_mb': 0.0, 'memory_limit': MEMORY_SIZE_LIMIT, 'next_id': self.next_id, 'schema_version': MEMORY_SCHEMA_VERSION }
|
262 |
+
try:
|
263 |
+
if os.path.exists(MEMORY_FILE_PATH):
|
264 |
+
file_size_bytes = await asyncio.to_thread(os.path.getsize, MEMORY_FILE_PATH)
|
265 |
+
stats['disk_usage_mb'] = round(file_size_bytes / (1024 * 1024), 3)
|
266 |
+
logger.info(f"Memory Stats: {stats}")
|
267 |
+
except Exception as e: logger.error(f"❌ Error calculating memory file size: {e}", exc_info=True)
|
268 |
+
return stats
|
269 |
+
|
270 |
+
def export_memory_to_file_sync(self) -> Optional[str]:
|
271 |
+
"""Synchronously exports memory file. Returns exported file path or None."""
|
272 |
+
# NOTE (Future Personalization): Consider allowing user to choose export location via UI dialog.
|
273 |
+
if not os.path.exists(MEMORY_FILE_PATH):
|
274 |
+
logger.warning("No memory file to export because it doesn't exist.")
|
275 |
+
return None
|
276 |
+
if not self.memory_list: # Also check if there are any entries to export
|
277 |
+
logger.warning("No memory entries to export, memory file might be empty or just schema.")
|
278 |
+
# Decide if you want to export an empty "entries" file or return None
|
279 |
+
# For now, let's allow exporting an empty structure.
|
280 |
+
# return None
|
281 |
+
|
282 |
+
try:
|
283 |
+
export_dir = Path.home() / "Desktop"
|
284 |
+
if not (export_dir.exists() and export_dir.is_dir()):
|
285 |
+
export_dir = Path.home() / "Downloads"
|
286 |
+
if not (export_dir.exists() and export_dir.is_dir()):
|
287 |
+
export_dir = Path(MEMORY_DIR) # Fallback
|
288 |
+
logger.warning(f"Desktop/Downloads not found/accessible, exporting to memory directory: {export_dir}")
|
289 |
+
os.makedirs(export_dir, exist_ok=True) # Ensure export_dir exists
|
290 |
+
|
291 |
+
timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S')
|
292 |
+
export_filename = f"zotheos_memory_export_{timestamp_str}.json"
|
293 |
+
export_full_path = export_dir / export_filename
|
294 |
+
|
295 |
+
import shutil
|
296 |
+
shutil.copy2(MEMORY_FILE_PATH, export_full_path)
|
297 |
+
|
298 |
+
logger.info(f"✅ Memory successfully exported to: {export_full_path}")
|
299 |
+
return str(export_full_path)
|
300 |
+
except Exception as e:
|
301 |
+
logger.error(f"❌ Failed to export memory: {e}", exc_info=True)
|
302 |
+
return None
|
303 |
+
|
304 |
+
async def export_memory_to_file_async(self) -> Optional[str]:
|
305 |
+
"""Asynchronously exports the memory file."""
|
306 |
+
logger.info("Scheduling asynchronous memory export...")
|
307 |
+
return await asyncio.to_thread(self.export_memory_to_file_sync)
|
308 |
+
|
309 |
+
|
310 |
+
async def main_test():
|
311 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
312 |
+
logger.info("--- MemoryBank Test ---")
|
313 |
+
mb = MemoryBank()
|
314 |
+
|
315 |
+
logger.info(f"Initial memory file path: {MEMORY_FILE_PATH}")
|
316 |
+
if os.path.exists(MEMORY_FILE_PATH):
|
317 |
+
logger.info("Memory file exists. Clearing for fresh test.")
|
318 |
+
await mb.clear_all_memory()
|
319 |
+
else:
|
320 |
+
logger.info("No pre-existing memory file found for test.")
|
321 |
+
|
322 |
+
stats1 = await mb.get_memory_stats()
|
323 |
+
logger.info(f"Stats after potential clear: {stats1}")
|
324 |
+
|
325 |
+
# Test Store
|
326 |
+
await mb.store_memory_async("Query 1", "Response 1", metadata={"custom_field": "value1", "tier_at_interaction": "free"})
|
327 |
+
await asyncio.sleep(0.01)
|
328 |
+
await mb.store_memory_async("Query 2", "Response 2", metadata={"user_token_used_prefix": "tes***", "synthesized_summary_text": "Summary for Q2"})
|
329 |
+
await asyncio.sleep(0.01)
|
330 |
+
await mb.store_memory_async("Query 3", "Response 3", metadata={"tier_at_interaction": "pro", "synthesized_summary_text": "Summary for Q3"})
|
331 |
+
await asyncio.sleep(0.01)
|
332 |
+
await mb.store_memory_async("Query 4", "Response 4", metadata={}) # No extra metadata
|
333 |
+
await asyncio.sleep(0.01)
|
334 |
+
await mb.store_memory_async("Query 5", "Response 5", metadata={"synthesized_summary_text": "This is summary 5."})
|
335 |
+
await asyncio.sleep(0.01)
|
336 |
+
await mb.store_memory_async("Query 6", "Response 6", metadata={"synthesized_summary_text": "This is summary 6, a bit longer than the preview."})
|
337 |
+
|
338 |
+
recent_for_display = await mb.retrieve_recent_memories_async(limit=5)
|
339 |
+
logger.info(f"Recent 5 (for display, should be newest first - Q6, Q5, Q4, Q3, Q2):")
|
340 |
+
for i, item in enumerate(recent_for_display):
|
341 |
+
ts = item.get('metadata',{}).get('timestamp_iso', item.get('created_at_unix'))
|
342 |
+
logger.info(f" {i+1}. ID: {item.get('id')}, Timestamp: {ts}, Query: {item.get('query')[:20]}..., Summary in meta: {'synthesized_summary_text' in item.get('metadata', {})}")
|
343 |
+
|
344 |
+
stats2 = await mb.get_memory_stats()
|
345 |
+
logger.info(f"Memory Stats after storing: {stats2}")
|
346 |
+
|
347 |
+
exported_file = await mb.export_memory_to_file_async()
|
348 |
+
if exported_file: logger.info(f"Test export successful: {exported_file}")
|
349 |
+
else: logger.error("Test export failed.")
|
350 |
+
|
351 |
+
logger.info("--- MemoryBank Test Complete ---")
|
352 |
+
|
353 |
+
if __name__ == "__main__":
|
354 |
+
asyncio.run(main_test())
|
modules/model_manager_public.py
ADDED
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FILE: modules/model_manager_public.py
|
2 |
+
|
3 |
+
import asyncio
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
import time
|
8 |
+
import threading
|
9 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
10 |
+
from typing import Dict, Any, Optional, List, Callable, Coroutine
|
11 |
+
|
12 |
+
# --- Llama.cpp Python Backend Import & Debug ---
|
13 |
+
logger_import_debug = logging.getLogger("ModelManager_ImportDebug")
|
14 |
+
if not logger_import_debug.handlers: # Minimal setup for this specific logger
|
15 |
+
_h = logging.StreamHandler(sys.stdout)
|
16 |
+
_f = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
17 |
+
_h.setFormatter(_f)
|
18 |
+
logger_import_debug.addHandler(_h)
|
19 |
+
logger_import_debug.setLevel(logging.INFO)
|
20 |
+
|
21 |
+
# --- Start of FIX: Define LlamaMock unconditionally and manage Llama variable ---
|
22 |
+
class LlamaMock:
|
23 |
+
def __init__(self, model_path: str = "mock_model_path", *args, **kwargs):
|
24 |
+
# Use logger_import_debug as it's defined at this point
|
25 |
+
logger_import_debug.error(f"LlamaMock initialized for model_path='{model_path}' with args: {args}, kwargs: {kwargs}. llama_cpp is not installed or importable.")
|
26 |
+
self.model_path = model_path
|
27 |
+
self.n_ctx_train = kwargs.get('n_ctx', 0) # Mock common attributes
|
28 |
+
self.metadata = {}
|
29 |
+
|
30 |
+
def create_chat_completion(self, messages: List[Dict[str,str]], *args, **kwargs) -> Dict[str, Any]:
|
31 |
+
logger_import_debug.error(f"LlamaMock: create_chat_completion called for '{self.model_path}', but llama_cpp is unavailable.")
|
32 |
+
return {
|
33 |
+
"id": "chatcmpl-mock", "object": "chat.completion", "created": int(time.time()),
|
34 |
+
"model": self.model_path,
|
35 |
+
"choices": [{"index": 0, "message": {"role": "assistant", "content": f"[Error: Llama.cpp backend not available for {self.model_path}]"}, "finish_reason": "stop"}],
|
36 |
+
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
37 |
+
}
|
38 |
+
|
39 |
+
def __call__(self, prompt: str, *args, **kwargs) -> Dict[str, Any]: # For older direct call style
|
40 |
+
logger_import_debug.error(f"LlamaMock: __call__ used for '{self.model_path}', but llama_cpp is unavailable.")
|
41 |
+
return {
|
42 |
+
"id": "cmpl-mock", "object": "text_completion", "created": int(time.time()),
|
43 |
+
"model": self.model_path,
|
44 |
+
"choices": [{"text": f"[Error: Llama.cpp backend not available for {self.model_path}]", "index": 0, "logprobs": None, "finish_reason": "stop"}],
|
45 |
+
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
46 |
+
}
|
47 |
+
|
48 |
+
def get_metadata(self) -> Dict[str, Any]:
|
49 |
+
return self.metadata
|
50 |
+
|
51 |
+
def tokenizer(self) -> Any:
|
52 |
+
class MockTokenizer:
|
53 |
+
def encode(self, text: str) -> List[int]: return [0] * len(text)
|
54 |
+
def decode(self, tokens: List[int]) -> str: return "".join([chr(t) for t in tokens if t < 256]) # Simplistic
|
55 |
+
return MockTokenizer()
|
56 |
+
|
57 |
+
# Initialize variables that will hold the class to be used
|
58 |
+
Llama_imported_class: Any = LlamaMock # Default to mock
|
59 |
+
LlamaCppError_imported_class: Any = Exception # Default to base Exception
|
60 |
+
LLAMA_CPP_AVAILABLE: bool = False
|
61 |
+
|
62 |
+
try:
|
63 |
+
from llama_cpp import Llama as RealLlama, LlamaCppError as RealLlamaCppSpecificError
|
64 |
+
Llama_imported_class = RealLlama
|
65 |
+
LlamaCppError_imported_class = RealLlamaCppSpecificError
|
66 |
+
LLAMA_CPP_AVAILABLE = True
|
67 |
+
logger_import_debug.info("🎉 Final llama-cpp-python status: AVAILABLE. Using REAL Llama class. Error catching for Llama.cpp specifics will use 'LlamaCppSpecificError'.")
|
68 |
+
except ImportError:
|
69 |
+
logger_import_debug.warning("Could not import 'Llama' and 'LlamaCppSpecificError' from 'llama_cpp' directly. Trying simpler import...")
|
70 |
+
try:
|
71 |
+
from llama_cpp import Llama as RealLlamaOnly # Try again without specific error import (older versions)
|
72 |
+
Llama_imported_class = RealLlamaOnly
|
73 |
+
# LlamaCppError_imported_class remains Exception
|
74 |
+
LLAMA_CPP_AVAILABLE = True
|
75 |
+
logger_import_debug.warning("Note: 'LlamaCppError' not found at llama_cpp top-level. Using base 'Exception' for LlamaCpp errors.")
|
76 |
+
logger_import_debug.info("🎉 Final llama-cpp-python status: AVAILABLE (LlamaCppError not found). Using REAL Llama class.")
|
77 |
+
except ImportError:
|
78 |
+
# Llama_imported_class remains LlamaMock, LLAMA_CPP_AVAILABLE remains False
|
79 |
+
logger_import_debug.critical("CRITICAL FAILURE: Cannot import 'Llama' from 'llama_cpp': No module named 'llama_cpp'. Using LlamaMock. Model loading will fail for real models.")
|
80 |
+
except Exception as e_import_general:
|
81 |
+
# Llama_imported_class remains LlamaMock, LLAMA_CPP_AVAILABLE remains False
|
82 |
+
logger_import_debug.critical(f"CRITICAL FAILURE: Unexpected error during 'llama_cpp' import: {e_import_general}. Using LlamaMock.", exc_info=True)
|
83 |
+
|
84 |
+
# Assign to the names used throughout the rest of this module
|
85 |
+
Llama = Llama_imported_class
|
86 |
+
LlamaCppError = LlamaCppError_imported_class
|
87 |
+
# --- End of FIX ---
|
88 |
+
# --- End Llama.cpp Python Backend Import & Debug ---
|
89 |
+
|
90 |
+
|
91 |
+
try:
|
92 |
+
# Assume config_settings_public.py is in the same 'modules' directory or project root is in sys.path
|
93 |
+
from modules.config_settings_public import (
|
94 |
+
MODEL_PATHS, MODEL_SPECIFIC_PARAMS, INFERENCE_PRESETS, N_CTX_FALLBACK, VERBOSE_LLAMA_CPP
|
95 |
+
)
|
96 |
+
except ImportError as e_conf:
|
97 |
+
logging.critical(f"CRITICAL IMPORT ERROR in ModelManager: Cannot import from config_settings_public: {e_conf}")
|
98 |
+
# Provide minimal fallbacks if config cannot be loaded, though this is a critical failure
|
99 |
+
MODEL_PATHS = {}
|
100 |
+
MODEL_SPECIFIC_PARAMS = {"_default": {"n_ctx": 2048, "n_gpu_layers": -1, "verbose": False}}
|
101 |
+
INFERENCE_PRESETS = {"default": {"temperature": 0.7}}
|
102 |
+
N_CTX_FALLBACK = 2048
|
103 |
+
VERBOSE_LLAMA_CPP = False
|
104 |
+
|
105 |
+
|
106 |
+
logger = logging.getLogger("ZOTHEOS_ModelManager")
|
107 |
+
if not logger.handlers:
|
108 |
+
handler = logging.StreamHandler(sys.stdout)
|
109 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
110 |
+
handler.setFormatter(formatter)
|
111 |
+
logger.addHandler(handler)
|
112 |
+
logger.setLevel(logging.INFO)
|
113 |
+
|
114 |
+
# Timeout for model generation tasks when run in a thread
|
115 |
+
GENERATION_THREAD_TIMEOUT_SECONDS = 300 # 5 minutes
|
116 |
+
|
117 |
+
class ModelManager:
|
118 |
+
def __init__(self, device_preference: Optional[str] = "cuda", max_model_count: int = 1, max_ram_models_gb: float = 8.0):
|
119 |
+
self.device_preference = device_preference
|
120 |
+
self.loaded_models: Dict[str, Any] = {} # Stores Llama instances
|
121 |
+
self.model_load_status: Dict[str, str] = {} # "unloaded", "loading", "loaded", "error"
|
122 |
+
self.model_ram_estimate_gb: Dict[str, float] = {}
|
123 |
+
self.total_estimated_ram_gb: float = 0.0
|
124 |
+
self.max_model_count = max_model_count
|
125 |
+
self.max_ram_models_gb = max_ram_models_gb
|
126 |
+
self.model_load_order: List[str] = [] # Tracks order for LRU
|
127 |
+
self.model_locks: Dict[str, asyncio.Lock] = {} # Per-model lock for loading
|
128 |
+
self.executor = ThreadPoolExecutor(max_workers=max_model_count + 1, thread_name_prefix="LLM_Gen") # +1 for summarizer
|
129 |
+
|
130 |
+
dev_log = "CPU (default)"
|
131 |
+
if self.device_preference == "cuda":
|
132 |
+
# Check NVIDIA drivers and CUDA toolkit availability (conceptual)
|
133 |
+
try:
|
134 |
+
import torch
|
135 |
+
if torch.cuda.is_available():
|
136 |
+
dev_log = f"CUDA (preference): {torch.cuda.get_device_name(0)}"
|
137 |
+
else:
|
138 |
+
dev_log = "CUDA (preference, but torch.cuda.is_available()=False)"
|
139 |
+
except ImportError:
|
140 |
+
dev_log = "CUDA (preference, but PyTorch not found for detailed check)"
|
141 |
+
except Exception as e_cuda_check:
|
142 |
+
dev_log = f"CUDA (preference, error during torch check: {e_cuda_check})"
|
143 |
+
# Actual CUDA usage is determined by n_gpu_layers > 0 during Llama init
|
144 |
+
|
145 |
+
logger.info(f"🔥 MMgr init. Dev Preference: {dev_log}. MaxRAM Models: {self.max_ram_models_gb}GB. Max Count: {self.max_model_count}.")
|
146 |
+
if not LLAMA_CPP_AVAILABLE:
|
147 |
+
logger.critical("❌ Llama.cpp backend CRITICAL FAILURE: The 'Llama' class could not be imported from 'llama_cpp'. ModelManager cannot load GGUF models. Please check installation and logs from 'ModelManager_ImportDebug'.")
|
148 |
+
logger.info(f"✅ MMGR Config: Max Models: {self.max_model_count}, Max RAM: {self.max_ram_models_gb} GB.")
|
149 |
+
|
150 |
+
async def _ensure_model_loaded(self, model_name: str) -> bool:
|
151 |
+
if model_name not in self.model_locks:
|
152 |
+
self.model_locks[model_name] = asyncio.Lock()
|
153 |
+
|
154 |
+
async with self.model_locks[model_name]:
|
155 |
+
if self.model_load_status.get(model_name) == "loaded" and model_name in self.loaded_models:
|
156 |
+
if model_name in self.model_load_order: self.model_load_order.remove(model_name)
|
157 |
+
self.model_load_order.append(model_name) # Move to end of LRU
|
158 |
+
return True
|
159 |
+
|
160 |
+
if self.model_load_status.get(model_name) == "loading":
|
161 |
+
logger.info(f"Model '{model_name}' is already being loaded by another task. Waiting...")
|
162 |
+
while self.model_load_status.get(model_name) == "loading": await asyncio.sleep(0.5)
|
163 |
+
return self.model_load_status.get(model_name) == "loaded"
|
164 |
+
|
165 |
+
self.model_load_status[model_name] = "loading"
|
166 |
+
logger.info(f"Attempting to load model '{model_name}'...")
|
167 |
+
|
168 |
+
await self._evict_models_if_needed(new_model_name_to_load=model_name)
|
169 |
+
|
170 |
+
model_instance, ram_gb = await asyncio.to_thread(self._load_model_from_disk, model_name)
|
171 |
+
if model_instance:
|
172 |
+
self.loaded_models[model_name] = model_instance
|
173 |
+
self.model_ram_estimate_gb[model_name] = ram_gb
|
174 |
+
self.total_estimated_ram_gb += ram_gb
|
175 |
+
self.model_load_status[model_name] = "loaded"
|
176 |
+
if model_name in self.model_load_order: self.model_load_order.remove(model_name)
|
177 |
+
self.model_load_order.append(model_name)
|
178 |
+
logger.info(f"✅ Model '{model_name}' loaded successfully. RAM used by this model: {ram_gb:.2f}GB. Total est. RAM: {self.total_estimated_ram_gb:.2f}GB.")
|
179 |
+
return True
|
180 |
+
else:
|
181 |
+
self.model_load_status[model_name] = "error"
|
182 |
+
logger.error(f"❌ Failed to load model '{model_name}'.")
|
183 |
+
# Clean up RAM if an estimate was added prematurely or if partial load occurred
|
184 |
+
if model_name in self.model_ram_estimate_gb:
|
185 |
+
self.total_estimated_ram_gb -= self.model_ram_estimate_gb.pop(model_name)
|
186 |
+
return False
|
187 |
+
|
188 |
+
def _load_model_from_disk(self, model_name: str) -> tuple[Optional[Any], float]:
|
189 |
+
# The check Llama == LlamaMock now works because LlamaMock is always defined
|
190 |
+
if not LLAMA_CPP_AVAILABLE or Llama == LlamaMock: # Llama is the variable pointing to the class
|
191 |
+
logger.error(f"Cannot load model '{model_name}': Llama.cpp backend not available (LLAMA_CPP_AVAILABLE={LLAMA_CPP_AVAILABLE}, Llama is LlamaMock: {Llama == LlamaMock}).")
|
192 |
+
return None, 0.0
|
193 |
+
|
194 |
+
model_path = MODEL_PATHS.get(model_name)
|
195 |
+
if not model_path or not os.path.exists(model_path):
|
196 |
+
logger.error(f"Model path for '{model_name}' not found or invalid: {model_path}")
|
197 |
+
return None, 0.0
|
198 |
+
|
199 |
+
specific_params = MODEL_SPECIFIC_PARAMS.get(model_name, {})
|
200 |
+
default_params = MODEL_SPECIFIC_PARAMS.get("_default", {})
|
201 |
+
|
202 |
+
load_params = default_params.copy()
|
203 |
+
load_params.update(specific_params)
|
204 |
+
load_params['model_path'] = model_path
|
205 |
+
load_params.setdefault('verbose', VERBOSE_LLAMA_CPP) # Use global verbose if not set per model
|
206 |
+
|
207 |
+
if 'n_ctx' not in load_params or load_params['n_ctx'] == 0: # Ensure n_ctx is valid
|
208 |
+
load_params['n_ctx'] = N_CTX_FALLBACK
|
209 |
+
logger.warning(f"n_ctx for model {model_name} was invalid or not found, using fallback: {N_CTX_FALLBACK}")
|
210 |
+
|
211 |
+
logger.info(f"🔄 Loading '{model_name}' from: {model_path}")
|
212 |
+
logger.debug(f"--- FINAL LOAD PARAMS FOR {model_name.upper()}: {load_params} ---")
|
213 |
+
|
214 |
+
start_time = time.perf_counter()
|
215 |
+
try:
|
216 |
+
model_instance = Llama(**load_params) # Llama here is the variable assigned above
|
217 |
+
load_time = time.perf_counter() - start_time
|
218 |
+
logger.info(f"✅ Model '{model_name}' (Llama instance) initialized by Llama(...). Load time: {load_time:.2f}s.")
|
219 |
+
|
220 |
+
ram_gb = os.path.getsize(model_path) / (1024**3)
|
221 |
+
ram_gb_with_buffer = ram_gb * 1.5 + 1 # Heuristic: file size + 50% + 1GB for KV cache, etc.
|
222 |
+
logger.info(f"Model '{model_name}' path: '{model_path}'. Est. RAM for this model (with buffer): {ram_gb_with_buffer:.2f}GB.")
|
223 |
+
return model_instance, ram_gb_with_buffer
|
224 |
+
except LlamaCppError as e_llama: # Catch specific LlamaCpp errors
|
225 |
+
logger.error(f"❌ LlamaCppError loading model '{model_name}': {e_llama}", exc_info=True)
|
226 |
+
except Exception as e:
|
227 |
+
logger.error(f"❌ Generic error loading model '{model_name}': {e}", exc_info=True)
|
228 |
+
return None, 0.0
|
229 |
+
|
230 |
+
async def _evict_models_if_needed(self, new_model_name_to_load: Optional[str] = None):
|
231 |
+
new_model_ram_gb_estimate = 0.0
|
232 |
+
if new_model_name_to_load:
|
233 |
+
model_path = MODEL_PATHS.get(new_model_name_to_load)
|
234 |
+
if model_path and os.path.exists(model_path):
|
235 |
+
new_model_ram_gb_estimate = (os.path.getsize(model_path) / (1024**3)) * 1.5 + 1 # Same heuristic
|
236 |
+
|
237 |
+
# Check if eviction is needed:
|
238 |
+
# 1. If we are at max model count AND the new model isn't already loaded.
|
239 |
+
# 2. OR if adding the new model would exceed total RAM limit AND we have models loaded.
|
240 |
+
while (len(self.loaded_models) >= self.max_model_count and (new_model_name_to_load not in self.loaded_models)) or \
|
241 |
+
(self.total_estimated_ram_gb + new_model_ram_gb_estimate > self.max_ram_models_gb and self.model_load_order):
|
242 |
+
if not self.model_load_order:
|
243 |
+
logger.warning("Eviction needed but model_load_order is empty. This should not happen if loaded_models is populated.")
|
244 |
+
break
|
245 |
+
|
246 |
+
model_to_evict = self.model_load_order.pop(0) # Evict LRU
|
247 |
+
if model_to_evict in self.loaded_models:
|
248 |
+
logger.warning(f"⚠️ Evicting model '{model_to_evict}' due to resource limits (Count: {len(self.loaded_models)}/{self.max_model_count}, RAM: {self.total_estimated_ram_gb:.2f}/{self.max_ram_models_gb:.2f}GB).")
|
249 |
+
del self.loaded_models[model_to_evict] # Release model instance
|
250 |
+
evicted_ram = self.model_ram_estimate_gb.pop(model_to_evict, 0)
|
251 |
+
self.total_estimated_ram_gb -= evicted_ram
|
252 |
+
self.model_load_status[model_to_evict] = "unloaded"
|
253 |
+
logger.info(f"Model '{model_to_evict}' unloaded. RAM reclaimed: {evicted_ram:.2f}GB. Total est. RAM: {self.total_estimated_ram_gb:.2f}GB.")
|
254 |
+
else:
|
255 |
+
logger.warning(f"Model '{model_to_evict}' was in load order but not in loaded_models dict. Inconsistency detected.")
|
256 |
+
|
257 |
+
|
258 |
+
def _generation_task_sync(self, model_instance: Any, prompt_messages: List[Dict[str,str]], gen_params: Dict[str, Any]) -> str:
|
259 |
+
model_log_name = model_instance.model_path.split(os.sep)[-1] if hasattr(model_instance, 'model_path') else 'UnknownModel'
|
260 |
+
logger.debug(f"--- [{model_log_name}] ENTERING _generation_task_sync ---")
|
261 |
+
output_text = "[Error: Generation failed in thread]"
|
262 |
+
try:
|
263 |
+
completion = model_instance.create_chat_completion(messages=prompt_messages, **gen_params)
|
264 |
+
if completion and "choices" in completion and completion["choices"]:
|
265 |
+
message_content = completion["choices"][0].get("message", {}).get("content")
|
266 |
+
output_text = message_content.strip() if message_content else "[No content in choice message]"
|
267 |
+
else:
|
268 |
+
output_text = "[No choices in completion or completion is empty]"
|
269 |
+
logger.warning(f"[{model_log_name}] Malformed completion object: {completion}")
|
270 |
+
logger.debug(f"✅ [{model_log_name}] Sync generation successful. Preview: {output_text[:100].replace(chr(10),' ')}...")
|
271 |
+
except Exception as e:
|
272 |
+
logger.error(f"❌ [{model_log_name}] Exception during model generation in thread: {e}", exc_info=True)
|
273 |
+
output_text = f"[Error: Exception during generation - {type(e).__name__}: {str(e)[:100]}]"
|
274 |
+
logger.debug(f"--- [{model_log_name}] EXITING _generation_task_sync ---")
|
275 |
+
return output_text
|
276 |
+
|
277 |
+
async def async_generation_wrapper(self, model_instance: Any, prompt_messages: List[Dict[str,str]], gen_params: Dict[str, Any], model_name_for_log: str) -> str:
|
278 |
+
logger.debug(f"--- [{model_name_for_log}] ENTERING async_generation_wrapper ---")
|
279 |
+
output_str = f"[Error: Model '{model_name_for_log}' instance not available for generation]"
|
280 |
+
if not model_instance:
|
281 |
+
logger.error(f"[{model_name_for_log}] Model instance is None in async_generation_wrapper.")
|
282 |
+
return output_str
|
283 |
+
try:
|
284 |
+
output_str = await asyncio.wait_for(
|
285 |
+
asyncio.to_thread(self._generation_task_sync, model_instance, prompt_messages, gen_params),
|
286 |
+
timeout=GENERATION_THREAD_TIMEOUT_SECONDS + 10
|
287 |
+
)
|
288 |
+
except asyncio.TimeoutError:
|
289 |
+
logger.error(f"❌ [{model_name_for_log}] Generation task TIMED OUT in asyncio.wait_for (>{GENERATION_THREAD_TIMEOUT_SECONDS + 10}s).")
|
290 |
+
output_str = f"[Error: Generation timed out for model {model_name_for_log}]"
|
291 |
+
except Exception as e:
|
292 |
+
logger.error(f"❌ [{model_name_for_log}] Exception in async_generation_wrapper: {e}", exc_info=True)
|
293 |
+
output_str = f"[Error: Async wrapper exception for model {model_name_for_log} - {type(e).__name__}]"
|
294 |
+
logger.debug(f"--- [{model_name_for_log}] EXITING async_generation_wrapper ---")
|
295 |
+
return output_str
|
296 |
+
|
297 |
+
|
298 |
+
async def generate_with_model(self, model_name: str, prompt: str, preset_name: str = "default", system_prompt: Optional[str] = None) -> str:
|
299 |
+
logger.info(f"--- [{model_name}] Received generation request. System prompt: {'Yes' if system_prompt else 'No'}. Preset: {preset_name} ---")
|
300 |
+
# Check if Llama class is LlamaMock (meaning llama.cpp is not available)
|
301 |
+
if not LLAMA_CPP_AVAILABLE or Llama == LlamaMock:
|
302 |
+
logger.error(f"Cannot generate with model '{model_name}': Llama.cpp backend not available (LLAMA_CPP_AVAILABLE={LLAMA_CPP_AVAILABLE}, Llama is LlamaMock: {Llama == LlamaMock}).")
|
303 |
+
return "[Error: Llama.cpp backend core ('Llama' class) not available or is mocked]"
|
304 |
+
|
305 |
+
if not await self._ensure_model_loaded(model_name):
|
306 |
+
return f"[Error: Model '{model_name}' could not be loaded or is not available.]"
|
307 |
+
|
308 |
+
model_instance = self.loaded_models.get(model_name)
|
309 |
+
if not model_instance:
|
310 |
+
logger.error(f"Model '{model_name}' instance not found in loaded_models after successful _ensure_model_loaded. This is unexpected.")
|
311 |
+
return f"[Error: Model '{model_name}' instance unexpectedly not found after load attempt.]"
|
312 |
+
|
313 |
+
gen_params = INFERENCE_PRESETS.get(preset_name, INFERENCE_PRESETS.get("balanced", {"temperature": 0.7})).copy() # Fallback to "balanced" then hardcoded default
|
314 |
+
|
315 |
+
prompt_messages = []
|
316 |
+
if system_prompt and system_prompt.strip():
|
317 |
+
prompt_messages.append({"role": "system", "content": system_prompt})
|
318 |
+
prompt_messages.append({"role": "user", "content": prompt})
|
319 |
+
|
320 |
+
logger.debug(f"[{model_name}] Calling async_generation_wrapper with messages: {prompt_messages}, params: {gen_params}")
|
321 |
+
response_text = await self.async_generation_wrapper(model_instance, prompt_messages, gen_params, model_name)
|
322 |
+
logger.info(f"--- [{model_name}] Generation complete. Response length: {len(response_text)} ---")
|
323 |
+
return response_text
|
324 |
+
|
325 |
+
def get_loaded_model_stats(self) -> Dict[str, Any]:
|
326 |
+
return {
|
327 |
+
"loaded_model_count": len(self.loaded_models),
|
328 |
+
"total_ram_estimate_gb": round(self.total_estimated_ram_gb, 2),
|
329 |
+
"max_ram_models_gb": self.max_ram_models_gb,
|
330 |
+
"max_model_count": self.max_model_count,
|
331 |
+
"models_in_memory": list(self.loaded_models.keys()),
|
332 |
+
"load_order_lru": self.model_load_order, # LRU (oldest) to MRU (newest)
|
333 |
+
"model_ram_details_gb": {name: round(ram, 2) for name, ram in self.model_ram_estimate_gb.items()},
|
334 |
+
"model_load_status": self.model_load_status,
|
335 |
+
"llama_cpp_available": LLAMA_CPP_AVAILABLE,
|
336 |
+
"llama_class_is_mock": Llama == LlamaMock
|
337 |
+
}
|
338 |
+
|
339 |
+
def shutdown(self):
|
340 |
+
logger.info("🔌 Shutting down ModelManager...")
|
341 |
+
self.executor.shutdown(wait=True, cancel_futures=True) # Cancel pending futures on shutdown
|
342 |
+
self.loaded_models.clear()
|
343 |
+
self.model_ram_estimate_gb.clear()
|
344 |
+
self.model_load_order.clear()
|
345 |
+
self.model_load_status.clear()
|
346 |
+
self.total_estimated_ram_gb = 0
|
347 |
+
logger.info("✅ ModelManager shutdown complete.")
|
348 |
+
|
349 |
+
# Example usage for direct testing
|
350 |
+
async def main_model_manager_test():
|
351 |
+
# Configure logging for the test
|
352 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
353 |
+
# Set specific loggers to higher levels if too verbose
|
354 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
355 |
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
356 |
+
|
357 |
+
logger.info("--- ModelManager Test ---")
|
358 |
+
|
359 |
+
if not MODEL_PATHS:
|
360 |
+
logger.error("MODEL_PATHS is empty. Cannot run test. Check config_settings_public.py")
|
361 |
+
return
|
362 |
+
if Llama == LlamaMock: # Check if we are using the mock
|
363 |
+
logger.warning("Llama.cpp is not available. Running test with LlamaMock. Responses will be mocked.")
|
364 |
+
|
365 |
+
# Example: max 2 models, 10GB RAM limit
|
366 |
+
mm = ModelManager(max_model_count=2, max_ram_models_gb=10.0)
|
367 |
+
|
368 |
+
test_models_available = list(MODEL_PATHS.keys())
|
369 |
+
if not test_models_available:
|
370 |
+
logger.error("No models defined in MODEL_PATHS for testing.")
|
371 |
+
mm.shutdown()
|
372 |
+
return
|
373 |
+
|
374 |
+
# Select up to 3 models for testing, or fewer if not available
|
375 |
+
model1_name = test_models_available[0]
|
376 |
+
model2_name = test_models_available[1] if len(test_models_available) > 1 else model1_name
|
377 |
+
model3_name = test_models_available[2] if len(test_models_available) > 2 else model1_name
|
378 |
+
|
379 |
+
test_queries = [
|
380 |
+
(model1_name, "What is the capital of France?", "precise", "You are a helpful geography expert."),
|
381 |
+
(model2_name, "Explain black holes simply.", "creative", None),
|
382 |
+
(model1_name, "What is 1+1?", "precise", None), # Query model1 again
|
383 |
+
]
|
384 |
+
if len(test_models_available) > 2 and model3_name not in [model1_name, model2_name]: # Only add model3 if it's distinct and available
|
385 |
+
test_queries.append((model3_name, "Tell me a short joke.", "default", None))
|
386 |
+
|
387 |
+
for i, (model_name, query, preset, sys_prompt) in enumerate(test_queries):
|
388 |
+
logger.info(f"\n--- Test Query {i+1}: Model '{model_name}', Preset '{preset}' ---")
|
389 |
+
response = await mm.generate_with_model(model_name, query, preset_name=preset, system_prompt=sys_prompt)
|
390 |
+
print(f"Response from '{model_name}': {response[:150]}...") # Print more of the response
|
391 |
+
print(f"Stats after '{model_name}': {mm.get_loaded_model_stats()}")
|
392 |
+
await asyncio.sleep(1) # Small delay between queries
|
393 |
+
|
394 |
+
mm.shutdown()
|
395 |
+
logger.info("--- ModelManager Test Complete ---")
|
396 |
+
|
397 |
+
if __name__ == "__main__":
|
398 |
+
# This allows testing ModelManager directly if needed
|
399 |
+
# Ensure your MODEL_PATHS in config_settings_public.py point to valid GGUF files
|
400 |
+
# To run: python -m modules.model_manager_public
|
401 |
+
asyncio.run(main_model_manager_test())
|
modules/plugin_manager_public.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import asyncio
|
3 |
+
from typing import Dict, Any, Type
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
# ✅ Define Pre-Approved Plugins (Only Trusted Plugins for Public Use)
|
8 |
+
ALLOWED_PLUGINS = {
|
9 |
+
"basic_math": "BasicMathPlugin", # Example of a basic utility plugin
|
10 |
+
"date_time": "DateTimePlugin" # Example of a simple utility plugin
|
11 |
+
}
|
12 |
+
|
13 |
+
class PluginManager:
|
14 |
+
"""🚀 Public Plugin Manager (Pre-Approved Plugins Only)"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.plugins: Dict[str, Any] = {}
|
18 |
+
|
19 |
+
async def load_plugin(self, plugin_name: str):
|
20 |
+
"""✅ Load Pre-Approved Plugin."""
|
21 |
+
if plugin_name not in ALLOWED_PLUGINS:
|
22 |
+
logger.warning(f"⚠️ Plugin '{plugin_name}' is not available in the public version.")
|
23 |
+
return f"⚠️ Plugin '{plugin_name}' not allowed in public version."
|
24 |
+
|
25 |
+
if plugin_name in self.plugins:
|
26 |
+
logger.info(f"✅ Plugin '{plugin_name}' already loaded.")
|
27 |
+
return f"✅ Plugin '{plugin_name}' already loaded."
|
28 |
+
|
29 |
+
try:
|
30 |
+
# ✅ Use pre-defined plugin mappings for security
|
31 |
+
plugin_class = globals().get(ALLOWED_PLUGINS[plugin_name])
|
32 |
+
if plugin_class:
|
33 |
+
plugin_instance = plugin_class()
|
34 |
+
self.plugins[plugin_name] = plugin_instance
|
35 |
+
logger.info(f"✅ Plugin '{plugin_name}' loaded successfully.")
|
36 |
+
return f"✅ Plugin '{plugin_name}' loaded successfully."
|
37 |
+
else:
|
38 |
+
logger.error(f"❌ Plugin class '{plugin_name}' not found.")
|
39 |
+
return f"❌ Plugin class '{plugin_name}' not found."
|
40 |
+
except Exception as e:
|
41 |
+
logger.exception(f"❌ Error loading plugin '{plugin_name}': {e}")
|
42 |
+
return f"❌ Error loading plugin '{plugin_name}'."
|
43 |
+
|
44 |
+
async def execute_plugin_command(self, plugin_name: str, command: str, args: Dict[str, Any] = {}):
|
45 |
+
"""✅ Execute Plugin Command (Predefined Only)."""
|
46 |
+
if plugin_name not in self.plugins:
|
47 |
+
logger.warning(f"⚠️ Attempt to execute command from unloaded plugin '{plugin_name}'.")
|
48 |
+
return f"⚠️ Plugin '{plugin_name}' not loaded."
|
49 |
+
|
50 |
+
plugin = self.plugins[plugin_name]
|
51 |
+
if hasattr(plugin, command):
|
52 |
+
method = getattr(plugin, command)
|
53 |
+
try:
|
54 |
+
if asyncio.iscoroutinefunction(method):
|
55 |
+
return await method(**args)
|
56 |
+
return method(**args)
|
57 |
+
except Exception as e:
|
58 |
+
logger.error(f"❌ Error executing command '{command}' in plugin '{plugin_name}': {e}")
|
59 |
+
return f"❌ Error executing '{command}' in '{plugin_name}'."
|
60 |
+
else:
|
61 |
+
logger.warning(f"⚠️ Command '{command}' not found in plugin '{plugin_name}'.")
|
62 |
+
return f"⚠️ Command '{command}' not found in '{plugin_name}'."
|
63 |
+
|
64 |
+
# ✅ Example Predefined Plugin Classes
|
65 |
+
class BasicMathPlugin:
|
66 |
+
"""Basic Math Plugin (Example)"""
|
67 |
+
|
68 |
+
def add(self, x, y):
|
69 |
+
return x + y
|
70 |
+
|
71 |
+
def subtract(self, x, y):
|
72 |
+
return x - y
|
73 |
+
|
74 |
+
class DateTimePlugin:
|
75 |
+
"""Date/Time Plugin (Example)"""
|
76 |
+
|
77 |
+
async def current_time(self):
|
78 |
+
from datetime import datetime
|
79 |
+
return datetime.utcnow().isoformat()
|
80 |
+
|
modules/query_processor_public.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import logging
|
3 |
+
import torch
|
4 |
+
from difflib import SequenceMatcher
|
5 |
+
from time import time
|
6 |
+
from typing import Dict, List, Tuple
|
7 |
+
|
8 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
+
|
11 |
+
from config_settings_public import MODEL_PATHS, MODEL_CONFIG, MODEL_WEIGHTS
|
12 |
+
from model_manager_public import ModelManager
|
13 |
+
from response_optimizer_public import ResponseOptimizer
|
14 |
+
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
class QueryProcessor:
|
19 |
+
def __init__(self, model_manager: ModelManager, response_optimizer: ResponseOptimizer):
|
20 |
+
self.model_manager = model_manager
|
21 |
+
self.response_optimizer = response_optimizer
|
22 |
+
self.model_paths = MODEL_PATHS
|
23 |
+
self.model_config = MODEL_CONFIG
|
24 |
+
self.memory_bank = []
|
25 |
+
|
26 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
27 |
+
if torch.cuda.is_available():
|
28 |
+
torch.cuda.set_device(0)
|
29 |
+
torch.backends.cudnn.benchmark = True
|
30 |
+
torch.backends.cudnn.enabled = True
|
31 |
+
torch.set_float32_matmul_precision('high')
|
32 |
+
logger.info(f"✅ QueryProcessor initialized on {self.device}")
|
33 |
+
else:
|
34 |
+
logger.warning("⚠️ Running on CPU")
|
35 |
+
|
36 |
+
self.model_tiers = {
|
37 |
+
"balanced": ["gemma", "mistral", "llama"]
|
38 |
+
}
|
39 |
+
|
40 |
+
def select_models(self, query: str) -> List[str]:
|
41 |
+
return [m for m in self.model_tiers["balanced"] if m in self.model_paths]
|
42 |
+
|
43 |
+
async def process_query_with_fusion(self, query: str) -> str:
|
44 |
+
selected_models = self.select_models(query)
|
45 |
+
logger.info(f"🧠 Selected models: {selected_models}")
|
46 |
+
|
47 |
+
tasks = [asyncio.create_task(self.invoke_model(m, query)) for m in selected_models]
|
48 |
+
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
49 |
+
|
50 |
+
valid_responses = {
|
51 |
+
m: r for m, r in zip(selected_models, responses)
|
52 |
+
if isinstance(r, str) and r.strip()
|
53 |
+
}
|
54 |
+
|
55 |
+
if not valid_responses:
|
56 |
+
logger.error("❌ All models failed or returned empty responses.")
|
57 |
+
return "⚠️ No valid responses received from models."
|
58 |
+
|
59 |
+
logger.info(f"✅ {len(valid_responses)} valid responses received.")
|
60 |
+
fused = await self.advanced_fusion(valid_responses, query)
|
61 |
+
self.memory_bank.append({"query": query, "response": fused, "timestamp": time()})
|
62 |
+
return fused
|
63 |
+
|
64 |
+
async def invoke_model(self, model_name: str, query: str) -> str:
|
65 |
+
try:
|
66 |
+
model = self.model_manager.loaded_models.get(model_name)
|
67 |
+
if not model:
|
68 |
+
model = self.model_manager.load_model_from_disk(model_name)
|
69 |
+
if not model:
|
70 |
+
raise RuntimeError(f"Model '{model_name}' failed to load.")
|
71 |
+
|
72 |
+
config = self.model_config.get(model_name, {})
|
73 |
+
max_tokens = config.get("max_tokens", 800)
|
74 |
+
temperature = config.get("temperature", 0.7)
|
75 |
+
|
76 |
+
logger.info(f"🔧 Invoking '{model_name}' with temp={temperature}, max_tokens={max_tokens}")
|
77 |
+
|
78 |
+
with torch.no_grad():
|
79 |
+
completion = await asyncio.to_thread(
|
80 |
+
model.create_completion,
|
81 |
+
prompt=query,
|
82 |
+
max_tokens=max_tokens,
|
83 |
+
temperature=temperature,
|
84 |
+
stop=["</s>", "User:", "Assistant:"]
|
85 |
+
)
|
86 |
+
|
87 |
+
response_text = completion.get("choices", [{}])[0].get("text", "").strip()
|
88 |
+
return response_text
|
89 |
+
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"❌ Error in model '{model_name}': {e}")
|
92 |
+
return ""
|
93 |
+
|
94 |
+
def clean_response(self, raw) -> str:
|
95 |
+
if isinstance(raw, str):
|
96 |
+
return raw.strip()
|
97 |
+
|
98 |
+
if isinstance(raw, dict):
|
99 |
+
return raw.get("generated_text", "").strip() or raw.get("text", "").strip()
|
100 |
+
|
101 |
+
if isinstance(raw, list):
|
102 |
+
if isinstance(raw[0], dict) and "generated_text" in raw[0]:
|
103 |
+
return raw[0]["generated_text"].strip()
|
104 |
+
return " ".join(str(x).strip() for x in raw if isinstance(x, str))
|
105 |
+
|
106 |
+
return str(raw).strip()
|
107 |
+
|
108 |
+
async def advanced_fusion(self, responses: Dict[str, str], query: str) -> str:
|
109 |
+
cleaned = {m: self.clean_response(r) for m, r in responses.items()}
|
110 |
+
seen = set()
|
111 |
+
unique: List[Tuple[str, float, str]] = []
|
112 |
+
|
113 |
+
for model, text in cleaned.items():
|
114 |
+
if not text or any(self.is_similar(text, prev) for prev in seen):
|
115 |
+
continue
|
116 |
+
seen.add(text)
|
117 |
+
score = self.score_response(text, query)
|
118 |
+
unique.append((text, score, model))
|
119 |
+
|
120 |
+
if not unique:
|
121 |
+
return "⚠️ No high-quality content to merge."
|
122 |
+
|
123 |
+
unique.sort(key=lambda x: x[1], reverse=True)
|
124 |
+
best_responses = [await self.response_optimizer.optimize_response(t, query) for t, _, _ in unique[:3]]
|
125 |
+
return "\n---\n".join(best_responses).strip()
|
126 |
+
|
127 |
+
def is_similar(self, a: str, b: str, threshold: float = 0.9) -> bool:
|
128 |
+
return SequenceMatcher(None, a.strip().lower(), b.strip().lower()).ratio() > threshold
|
129 |
+
|
130 |
+
def score_response(self, response: str, query: str) -> float:
|
131 |
+
try:
|
132 |
+
tfidf = TfidfVectorizer().fit_transform([response, query])
|
133 |
+
return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
|
134 |
+
except:
|
135 |
+
return 0.5
|
modules/response_optimizer_public.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from collections import defaultdict
|
3 |
+
from typing import Dict
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
class ResponseOptimizer:
|
8 |
+
"""🚀 Public Response Optimizer (Basic Filtering + Length Control)"""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
self.response_cache = {} # Stores past responses to reduce redundant processing
|
12 |
+
self.optim_rules = defaultdict(list) # Holds rule-based response refinements
|
13 |
+
self.cache_limit = 100 # ✅ Limit cache size to avoid overflow
|
14 |
+
|
15 |
+
async def optimize_response(self, response: str, context: Dict) -> str:
|
16 |
+
"""✅ Optimizes AI-generated responses (Fast + Secure)."""
|
17 |
+
|
18 |
+
# ✅ Return cached response if available
|
19 |
+
if response in self.response_cache:
|
20 |
+
logger.info("✅ Returning cached optimized response.")
|
21 |
+
return self.response_cache[response]
|
22 |
+
|
23 |
+
# ✅ Apply context-based optimization (length, profanity)
|
24 |
+
optimized_response = self.apply_optimizations(response, context)
|
25 |
+
|
26 |
+
# ✅ Store in cache (Respect size limit)
|
27 |
+
if len(self.response_cache) >= self.cache_limit:
|
28 |
+
oldest_response = next(iter(self.response_cache))
|
29 |
+
del self.response_cache[oldest_response]
|
30 |
+
|
31 |
+
self.response_cache[response] = optimized_response
|
32 |
+
return optimized_response
|
33 |
+
|
34 |
+
def apply_optimizations(self, response: str, context: Dict) -> str:
|
35 |
+
"""✅ Applies context-specific response optimization."""
|
36 |
+
if "filter_profanity" in context and context["filter_profanity"]:
|
37 |
+
response = self.remove_profanity(response)
|
38 |
+
|
39 |
+
if "trim_length" in context:
|
40 |
+
response = response[:context["trim_length"]].strip() + "..." # Trims to desired length
|
41 |
+
|
42 |
+
return response
|
43 |
+
|
44 |
+
def remove_profanity(self, response: str) -> str:
|
45 |
+
"""🚫 Removes flagged words from AI-generated responses."""
|
46 |
+
banned_words = [
|
47 |
+
"badword1", "badword2", "badword3", "shit", "fuck", "damn", "bitch", "asshole"
|
48 |
+
] # ✅ Add or remove based on testing
|
49 |
+
|
50 |
+
for word in banned_words:
|
51 |
+
response = response.replace(word, "***") # ✅ Replace with censorship symbol
|
52 |
+
|
53 |
+
return response
|
modules/user_auth.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FILE: modules/user_auth.py
|
2 |
+
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
import logging
|
7 |
+
|
8 |
+
logger = logging.getLogger("ZOTHEOS_UserAuth")
|
9 |
+
if not logger.handlers:
|
10 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
11 |
+
handler = logging.StreamHandler(sys.stdout)
|
12 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
|
13 |
+
handler.setFormatter(formatter)
|
14 |
+
logger.addHandler(handler)
|
15 |
+
logger.setLevel(logging.INFO)
|
16 |
+
|
17 |
+
USERS_FILE_PATH = "stripe_users.json" # Default name, path will be determined below
|
18 |
+
|
19 |
+
try:
|
20 |
+
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
|
21 |
+
# Running in a PyInstaller bundle
|
22 |
+
# _MEIPASS is the path to the temporary directory where PyInstaller unpacks files
|
23 |
+
application_path = sys._MEIPASS
|
24 |
+
logger.debug(f"UserAuth running in PyInstaller bundle. MEIPASS: {application_path}")
|
25 |
+
# stripe_users.json should be in the root of _MEIPASS, alongside the .exe's main script
|
26 |
+
candidate_path = os.path.join(application_path, "stripe_users.json")
|
27 |
+
# Check if it exists there directly
|
28 |
+
if os.path.exists(candidate_path):
|
29 |
+
USERS_FILE_PATH = candidate_path
|
30 |
+
else:
|
31 |
+
# Fallback: if user_auth.py is in a 'modules' subdir within _MEIPASS, try one level up for stripe_users.json
|
32 |
+
# This handles if --add-data "modules;modules" was used and user_auth.py is at _MEIPASS/modules/user_auth.py
|
33 |
+
# and stripe_users.json was added to _MEIPASS/.
|
34 |
+
# This might happen if the main script is also at the root of _MEIPASS.
|
35 |
+
current_module_dir = os.path.dirname(os.path.abspath(__file__)) # This would be _MEIPASS/modules
|
36 |
+
project_root_guess = os.path.dirname(current_module_dir) # This would be _MEIPASS
|
37 |
+
alt_candidate_path = os.path.join(project_root_guess, "stripe_users.json")
|
38 |
+
if os.path.exists(alt_candidate_path):
|
39 |
+
USERS_FILE_PATH = alt_candidate_path
|
40 |
+
else:
|
41 |
+
# If still not found, log a warning. The get_user_tier will handle the FileNotFoundError.
|
42 |
+
logger.warning(f"stripe_users.json not found at primary bundle path '{candidate_path}' or alternate '{alt_candidate_path}'. It must be bundled at the root relative to the main script.")
|
43 |
+
# Keep USERS_FILE_PATH as "stripe_users.json" to allow relative loading if script is at root of _MEIPASS
|
44 |
+
# This will likely fail if not at root, and get_user_tier will default to 'free'
|
45 |
+
else:
|
46 |
+
# Running as a normal script (user_auth.py is in modules folder)
|
47 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
48 |
+
project_root = os.path.dirname(script_dir)
|
49 |
+
USERS_FILE_PATH = os.path.join(project_root, "stripe_users.json")
|
50 |
+
|
51 |
+
logger.info(f"User authentication file path dynamically set to: {os.path.abspath(USERS_FILE_PATH) if os.path.isabs(USERS_FILE_PATH) else USERS_FILE_PATH}")
|
52 |
+
|
53 |
+
except Exception as e_path:
|
54 |
+
logger.error(f"Critical error setting USERS_FILE_PATH in user_auth.py: {e_path}. Using default 'stripe_users.json'.")
|
55 |
+
USERS_FILE_PATH = "stripe_users.json"
|
56 |
+
|
57 |
+
|
58 |
+
def get_user_tier(user_token: str) -> str:
|
59 |
+
"""
|
60 |
+
Retrieves the user's tier based on their token from stripe_users.json.
|
61 |
+
Defaults to 'free' if token not found or an error occurs.
|
62 |
+
"""
|
63 |
+
default_tier = "free"
|
64 |
+
if not user_token or not isinstance(user_token, str) or not user_token.strip():
|
65 |
+
# This log is fine, but can be noisy if called frequently with no token (e.g. UI init)
|
66 |
+
# logger.debug("No user token provided or invalid token, defaulting to 'free' tier.")
|
67 |
+
return default_tier
|
68 |
+
|
69 |
+
try:
|
70 |
+
# Log the path being attempted for diagnosis
|
71 |
+
logger.debug(f"Attempting to load users file from: {os.path.abspath(USERS_FILE_PATH)}")
|
72 |
+
if not os.path.exists(USERS_FILE_PATH):
|
73 |
+
logger.warning(f"Users file '{USERS_FILE_PATH}' not found. Defaulting all users to '{default_tier}' tier. Please ensure this file is correctly bundled at the application root.")
|
74 |
+
return default_tier
|
75 |
+
|
76 |
+
with open(USERS_FILE_PATH, "r", encoding="utf-8") as f:
|
77 |
+
users = json.load(f)
|
78 |
+
|
79 |
+
tier = users.get(user_token, default_tier)
|
80 |
+
log_token_display = user_token[:3] + "***" if len(user_token) > 3 else user_token
|
81 |
+
logger.info(f"Token '{log_token_display}' resolved to tier: '{tier}'.")
|
82 |
+
return tier
|
83 |
+
except json.JSONDecodeError:
|
84 |
+
logger.error(f"Error decoding JSON from {USERS_FILE_PATH}. Defaulting to '{default_tier}' tier.", exc_info=True)
|
85 |
+
return default_tier
|
86 |
+
except Exception as e:
|
87 |
+
logger.error(f"An unexpected error occurred in get_user_tier: {e}. Defaulting to '{default_tier}' tier.", exc_info=True)
|
88 |
+
return default_tier
|
89 |
+
|
90 |
+
if __name__ == '__main__':
|
91 |
+
logger.setLevel(logging.DEBUG)
|
92 |
+
print(f"--- Testing user_auth.py ---")
|
93 |
+
print(f"USERS_FILE_PATH is configured as: {os.path.abspath(USERS_FILE_PATH)}")
|
94 |
+
|
95 |
+
# Create a dummy stripe_users.json in the expected location for testing if it doesn't exist
|
96 |
+
# For standalone testing, this means it looks for it relative to where this script would be if it were project root.
|
97 |
+
# If this script is in 'modules', project_root is one level up.
|
98 |
+
|
99 |
+
# Determine the correct test path based on whether script is run directly or from project root
|
100 |
+
if os.path.basename(os.getcwd()) == "modules": # If CWD is modules folder
|
101 |
+
test_stripe_users_path = "../stripe_users.json"
|
102 |
+
else: # If CWD is project root (where zotheos_interface_public.py is)
|
103 |
+
test_stripe_users_path = "stripe_users.json"
|
104 |
+
|
105 |
+
if not os.path.exists(test_stripe_users_path):
|
106 |
+
print(f"'{test_stripe_users_path}' not found relative to CWD ({os.getcwd()}). Creating a dummy file for testing purposes at this location.")
|
107 |
+
dummy_users_for_test = {
|
108 |
+
"TOKEN_FREE_USER": "free",
|
109 |
+
"TOKEN_STARTER_USER": "starter",
|
110 |
+
"TOKEN_PRO_USER": "pro"
|
111 |
+
}
|
112 |
+
try:
|
113 |
+
with open(test_stripe_users_path, "w") as f:
|
114 |
+
json.dump(dummy_users_for_test, f, indent=2)
|
115 |
+
print(f"Dummy '{test_stripe_users_path}' created successfully with test tokens.")
|
116 |
+
# Update USERS_FILE_PATH for this test run if it was different
|
117 |
+
USERS_FILE_PATH = test_stripe_users_path
|
118 |
+
print(f"For this test run, USERS_FILE_PATH is now: {os.path.abspath(USERS_FILE_PATH)}")
|
119 |
+
except Exception as e_create:
|
120 |
+
print(f"Could not create dummy users file at {test_stripe_users_path}: {e_create}")
|
121 |
+
print("Please ensure stripe_users.json exists in the project root for user_auth.py to function correctly.")
|
122 |
+
|
123 |
+
print("\nTest Results:")
|
124 |
+
test_tokens = {
|
125 |
+
"Pro User Token": "TOKEN_PRO_USER",
|
126 |
+
"Starter User Token": "TOKEN_STARTER_USER",
|
127 |
+
"Free User Token": "TOKEN_FREE_USER",
|
128 |
+
"Non-existent Token": "invalid_dummy_token_123",
|
129 |
+
"Empty String Token": "",
|
130 |
+
}
|
131 |
+
|
132 |
+
for description, token_to_test in test_tokens.items():
|
133 |
+
actual_tier = get_user_tier(token_to_test)
|
134 |
+
print(f"- Test '{description}' (Input: '{token_to_test}'): Tier = '{actual_tier}'")
|
135 |
+
|
136 |
+
print(f"\n--- End of user_auth.py test ---")
|