ZOTHEOS commited on
Commit
4d1849f
·
verified ·
1 Parent(s): 13efbf9

Upload 9 files

Browse files
modules/config_settings_public.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: modules/config_settings_public.py
2
+
3
+ import os
4
+ import sys
5
+ import logging
6
+ from huggingface_hub import hf_hub_download # ✅ IMPORT THE DOWNLOADER
7
+
8
+ logger = logging.getLogger("ZOTHEOS_Config")
9
+ if not logger.handlers:
10
+ handler = logging.StreamHandler(sys.stdout)
11
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
12
+ handler.setFormatter(formatter)
13
+ logger.addHandler(handler)
14
+ logger.setLevel(logging.INFO)
15
+
16
+ # --- ✅ WEB DEPLOYMENT & LOCAL PATH CONFIG ---
17
+
18
+ # Standard way to detect if we are running in a Hugging Face Space
19
+ IS_WEB_MODE = "HF_SPACE_ID" in os.environ
20
+
21
+ _is_frozen = getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
22
+
23
+ if _is_frozen:
24
+ APP_DIR = os.path.dirname(sys.executable)
25
+ else:
26
+ try:
27
+ APP_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
28
+ except NameError:
29
+ APP_DIR = os.getcwd()
30
+ logger.warning(f"__file__ not defined, APP_DIR set to CWD: {APP_DIR}")
31
+
32
+ # This directory structure is primarily for local/desktop use.
33
+ # On Hugging Face, these paths will be created in the virtual environment.
34
+ BASE_MODELS_DIR = os.path.join(APP_DIR, "models")
35
+ BASE_DATA_SUBDIR = "zotheos_public_data"
36
+ BASE_DATA_PATH = os.path.join(APP_DIR, BASE_DATA_SUBDIR)
37
+ CORE_DIRS_TO_VERIFY = {
38
+ "data_base": BASE_DATA_PATH, "memory": os.path.join(BASE_DATA_PATH, "zotheos_memory"),
39
+ "cache": os.path.join(BASE_DATA_PATH, "cache"), "cache_transformers": os.path.join(BASE_DATA_PATH, "cache", "transformers"),
40
+ "cache_huggingface": os.path.join(BASE_DATA_PATH, "cache", "huggingface"), "cache_hf_hub": os.path.join(BASE_DATA_PATH, "cache", "huggingface", "hub"),
41
+ "logs": os.path.join(BASE_DATA_PATH, "logs"), "temp": os.path.join(BASE_DATA_PATH, "temp_files"),
42
+ "models_root": BASE_MODELS_DIR
43
+ }
44
+
45
+ for dir_key, dir_path in CORE_DIRS_TO_VERIFY.items():
46
+ try:
47
+ if dir_key == "models_root" and (_is_frozen or IS_WEB_MODE):
48
+ continue # Skip creating models folder for frozen app or web app
49
+ os.makedirs(dir_path, exist_ok=True)
50
+ except Exception as e:
51
+ logger.error(f"Error creating directory {dir_path}: {e}")
52
+
53
+ # --- ✅ CONDITIONAL MODEL PATHS & GPU CONFIG ---
54
+
55
+ if IS_WEB_MODE:
56
+ logger.info("✅✅✅ RUNNING IN WEB MODE (Hugging Face Space) ✅✅✅")
57
+ logger.info("Model paths will be resolved by hf_hub_download.")
58
+
59
+ # Download models from the Hub instead of looking for local files
60
+ MODEL_PATHS = {
61
+ "mistral": hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"),
62
+ "gemma": hf_hub_download(repo_id="google/gemma-2b-it-gguf", filename="gemma-2b-it.Q4_K_M.gguf"),
63
+ "qwen": hf_hub_download(repo_id="Qwen/Qwen1.5-1.8B-Chat-GGUF", filename="qwen1.5-1.8b-chat.Q4_K_M.gguf")
64
+ }
65
+ # Free Hugging Face Spaces are CPU-only, so GPU layers MUST be 0
66
+ N_GPU_LAYERS_FALLBACK = 0
67
+ logger.info("N_GPU_LAYERS_FALLBACK forced to 0 for CPU-only web environment.")
68
+
69
+ else:
70
+ logger.info("✅✅✅ RUNNING IN LOCAL MODE (Desktop/PC) ✅✅✅")
71
+ logger.info(f"Models will be loaded from local directory: {BASE_MODELS_DIR}")
72
+
73
+ # Use local file paths
74
+ # Note: I've updated your paths to match the ones from the hf_hub_download for consistency.
75
+ # Please ensure your local folder structure matches these paths.
76
+ MODEL_PATHS = {
77
+ "mistral": os.path.join(BASE_MODELS_DIR, "mistral-7b-instruct-v0.2.Q4_K_M.gguf"),
78
+ "gemma": os.path.join(BASE_MODELS_DIR, "gemma-2b-it.Q4_K_M.gguf"),
79
+ "qwen": os.path.join(BASE_MODELS_DIR, "qwen1.5-1.8b-chat.Q4_K_M.gguf"),
80
+ }
81
+ # On local machine, use your GPU
82
+ N_GPU_LAYERS_FALLBACK = -1 # -1 means offload all to GPU
83
+ logger.info("N_GPU_LAYERS_FALLBACK set to -1 for local GPU acceleration.")
84
+
85
+
86
+ # --- Shared Configurations ---
87
+
88
+ MAX_RAM_MODELS_GB = 23.8
89
+ MAX_CONCURRENT_MODELS = 3
90
+ N_CTX_FALLBACK = 2048
91
+ N_THREADS_FALLBACK = 8
92
+ VERBOSE_LLAMA_CPP = True
93
+
94
+ MODEL_SPECIFIC_PARAMS = {
95
+ "mistral": { "chat_format": "mistral-instruct", "n_ctx": N_CTX_FALLBACK },
96
+ "gemma": { "chat_format": "gemma", "n_ctx": N_CTX_FALLBACK },
97
+ "qwen": { "chat_format": "chatml", "n_ctx": N_CTX_FALLBACK },
98
+ "_default": {
99
+ "f16_kv": True, "use_mmap": True, "use_mlock": False,
100
+ "verbose": VERBOSE_LLAMA_CPP,
101
+ "n_gpu_layers": N_GPU_LAYERS_FALLBACK, # This now uses the correct value for web or local
102
+ "n_threads": N_THREADS_FALLBACK,
103
+ "n_ctx": N_CTX_FALLBACK
104
+ }
105
+ }
106
+
107
+ INFERENCE_PRESETS = {
108
+ "balanced": {"temperature": 0.7, "top_p": 0.9, "top_k": 40, "repeat_penalty": 1.1, "mirostat_mode": 0, "max_tokens": 1024},
109
+ "precise": {"temperature": 0.2, "top_p": 0.7, "top_k": 20, "repeat_penalty": 1.05, "mirostat_mode": 0, "max_tokens": 1536},
110
+ "creative": {"temperature": 0.9, "top_p": 0.95, "top_k": 60, "repeat_penalty": 1.15, "mirostat_mode": 2, "mirostat_tau": 4.0, "mirostat_eta": 0.1, "max_tokens": 1024},
111
+ "passthrough": {}
112
+ }
113
+ DEFAULT_INFERENCE_PRESET = "balanced"
114
+
115
+ DEFAULT_SYSTEM_PROMPT = "You are ZOTHEOS, an ethical AI developed to help humanity. Provide clear, concise, and helpful responses. Be respectful and avoid harmful content."
116
+ SYSTEM_PERSONAS = {
117
+ "default": DEFAULT_SYSTEM_PROMPT, "helpful_assistant": "You are a helpful AI assistant. Your goal is to provide accurate and informative answers.",
118
+ "philosopher": "You are an AI philosopher. Engage with complex questions thoughtfully and explore different perspectives.",
119
+ "coder": "You are an expert AI programmer. Provide code examples and explain them clearly. Assume a senior developer audience.",
120
+ "concise_summarizer": "You are an AI tasked with providing very concise summaries. Get straight to the point. Use bullet points where appropriate.",
121
+ }
122
+
123
+ MODEL_ROLES = { "mistral": "analyst", "gemma": "humanist", "qwen": "skeptic" }
124
+ MODEL_ROLE_SYSTEM_PROMPTS = {
125
+ "analyst": "You are an impartial analyst. Focus on facts, clarity, and cause-effect logic. Provide structured, evidence-based reasoning.",
126
+ "humanist": "You are a human-centered assistant. Focus on emotion, empathy, ethical considerations, and the potential human impact or experience related to the query.",
127
+ "skeptic": "You are a critical evaluator and a respectful skeptic. Your role is to challenge assumptions, highlight potential risks, point out biases, and explore alternative or less obvious interpretations. Question the premises if necessary.",
128
+ "general": DEFAULT_SYSTEM_PROMPT
129
+ }
130
+
131
+ MODEL_WEIGHTS = { "mistral": 1.0, "gemma": 0.9, "qwen": 1.1 }
132
+ LOG_LEVEL = "INFO"
133
+ LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s'
134
+
135
+ ENV_VARS_TO_SET = {
136
+ "TRANSFORMERS_CACHE": CORE_DIRS_TO_VERIFY["cache_transformers"], "HF_HOME": CORE_DIRS_TO_VERIFY["cache_huggingface"],
137
+ "HF_HUB_CACHE": CORE_DIRS_TO_VERIFY["cache_hf_hub"], "TOKENIZERS_PARALLELISM": "false",
138
+ }
139
+
140
+ ZOTHEOS_VERSION = "Public Beta 1.4 (Web Enabled)"
141
+
142
+ logger.info(f"Config settings loaded. Version: {ZOTHEOS_VERSION}")
143
+ logger.info(f"APP_DIR: {APP_DIR} (Frozen: {_is_frozen}) | Web Mode: {IS_WEB_MODE}")
modules/knowledge_core_public.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ import asyncio
5
+ import traceback
6
+ from typing import List
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # ✅ Define Knowledge Storage Path
11
+ KNOWLEDGE_FILE = os.path.expanduser("~/zotheos_public/zotheos_knowledge.json")
12
+
13
+ class KnowledgeCore:
14
+ """🚀 Public Version of Knowledge Core (Simplified Storage and Retrieval)"""
15
+
16
+ def __init__(self):
17
+ """✅ Initialize Knowledge Storage."""
18
+ self.knowledge_store = {}
19
+ self.load_knowledge()
20
+
21
+ def load_knowledge(self):
22
+ """✅ Load Knowledge from JSON."""
23
+ try:
24
+ if os.path.exists(KNOWLEDGE_FILE):
25
+ with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as file:
26
+ self.knowledge_store = json.load(file)
27
+ logger.info(f"✅ Knowledge loaded from {KNOWLEDGE_FILE}")
28
+ else:
29
+ self.knowledge_store = {}
30
+ logger.info(f"✅ Created new knowledge store at {KNOWLEDGE_FILE}")
31
+ except Exception as e:
32
+ logger.warning(f"⚠️ Error loading knowledge: {e}")
33
+ self.knowledge_store = {}
34
+
35
+ def save_knowledge(self):
36
+ """✅ Save Knowledge to JSON."""
37
+ try:
38
+ with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as file:
39
+ json.dump(self.knowledge_store, file, indent=4)
40
+ logger.info(f"✅ Knowledge saved to {KNOWLEDGE_FILE}")
41
+ except Exception as e:
42
+ logger.warning(f"⚠️ Error saving knowledge: {e}")
43
+
44
+ async def update(self, new_knowledge: str, category: str = "general") -> bool:
45
+ """✅ Add New Knowledge to the Store."""
46
+ try:
47
+ knowledge_id = str(len(self.knowledge_store) + 1)
48
+ self.knowledge_store[knowledge_id] = {
49
+ 'content': new_knowledge,
50
+ 'category': category
51
+ }
52
+ self.save_knowledge()
53
+ logger.info(f"✅ Knowledge added with ID {knowledge_id}")
54
+ return True
55
+ except Exception as e:
56
+ logger.error(f"❌ Error updating knowledge: {traceback.format_exc()}")
57
+ return False
58
+
59
+ async def retrieve(self, query: str, n_results: int = 5) -> List[dict]:
60
+ """🔎 Retrieve Relevant Knowledge Based on Simple Keyword Match."""
61
+ try:
62
+ matches = []
63
+ for knowledge_id, data in self.knowledge_store.items():
64
+ if query.lower() in data['content'].lower():
65
+ matches.append({
66
+ 'id': knowledge_id,
67
+ 'content': data['content'],
68
+ 'category': data['category']
69
+ })
70
+ if len(matches) >= n_results:
71
+ break
72
+ logger.info(f"✅ Retrieved {len(matches)} matches for query '{query}'")
73
+ return matches
74
+ except Exception as e:
75
+ logger.error(f"❌ Error retrieving knowledge: {traceback.format_exc()}")
76
+ return []
77
+
78
+ async def reset(self) -> bool:
79
+ """🗑️ Reset Knowledge Store (Delete All Stored Data)."""
80
+ try:
81
+ self.knowledge_store = {}
82
+ self.save_knowledge()
83
+ logger.info("✅ Knowledge store reset successfully.")
84
+ return True
85
+ except Exception as e:
86
+ logger.error(f"❌ Error resetting knowledge store: {traceback.format_exc()}")
87
+ return False
88
+
89
+ async def delete_knowledge(self, knowledge_id: str) -> bool:
90
+ """🗑️ Delete Specific Knowledge Entry by ID."""
91
+ try:
92
+ if knowledge_id in self.knowledge_store:
93
+ del self.knowledge_store[knowledge_id]
94
+ self.save_knowledge()
95
+ logger.info(f"✅ Deleted knowledge ID: {knowledge_id}")
96
+ return True
97
+ else:
98
+ logger.warning(f"⚠️ Knowledge ID {knowledge_id} not found.")
99
+ return False
100
+ except Exception as e:
101
+ logger.error(f"❌ Error deleting knowledge ID {knowledge_id}: {traceback.format_exc()}")
102
+ return False
modules/main_fusion_public.py ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: modules/main_fusion_public.py (Finalized with Tier Logic, Async, Fusion Summary)
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ import sys
7
+ import time
8
+ import json
9
+ from typing import Dict, Any, Optional, List, Union
10
+
11
+ try:
12
+ # Correctly determine project_root assuming this file is in 'modules'
13
+ script_dir = os.path.dirname(os.path.abspath(__file__))
14
+ project_root = os.path.dirname(script_dir)
15
+ if project_root not in sys.path:
16
+ sys.path.insert(0, project_root)
17
+ except Exception as e_path:
18
+ # Basic logging if path setup fails, though critical
19
+ logging.basicConfig(level=logging.ERROR)
20
+ logging.error(f"Error setting up sys.path in main_fusion_public.py: {e_path}")
21
+
22
+ try:
23
+ from modules.config_settings_public import (
24
+ MODEL_PATHS, MAX_CONCURRENT_MODELS, MAX_RAM_MODELS_GB, # Used by ModelManager init
25
+ DEFAULT_SYSTEM_PROMPT, SYSTEM_PERSONAS, INFERENCE_PRESETS, DEFAULT_INFERENCE_PRESET,
26
+ MODEL_ROLES, MODEL_ROLE_SYSTEM_PROMPTS
27
+ )
28
+ from modules.model_manager_public import ModelManager
29
+ from modules.memory_bank_public import MemoryBank
30
+ from modules.user_auth import get_user_tier
31
+ except ImportError as e:
32
+ logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
33
+ logging.critical(f"CRITICAL IMPORT ERROR in main_fusion_public.py: {e}. ZOTHEOS may not function.", exc_info=True)
34
+ # Provide fallbacks for critical components if possible, or allow failure
35
+ # For now, if these fail, the __init__ will likely raise an error or log critical status.
36
+ # Making the application fail loudly is often better than silent dysfunction.
37
+ # Consider exiting if critical imports fail: sys.exit(f"Fatal Import Error in main_fusion_public.py: {e}")
38
+
39
+ # --- Start of FIX: Define LLAMA_CPP_AVAILABLE ---
40
+ try:
41
+ # Attempt to import the core Llama class from llama_cpp
42
+ from llama_cpp import Llama # You might also need LlamaCppError if you use it
43
+ LLAMA_CPP_AVAILABLE = True
44
+ # print("DEBUG: llama_cpp imported successfully, LLAMA_CPP_AVAILABLE=True") # Optional debug print
45
+ except ImportError:
46
+ LLAMA_CPP_AVAILABLE = False
47
+ # print("DEBUG: llama_cpp import failed, LLAMA_CPP_AVAILABLE=False") # Optional debug print
48
+ except Exception as e_llama_import: # Catch other potential errors during import
49
+ LLAMA_CPP_AVAILABLE = False
50
+ # print(f"DEBUG: An unexpected error occurred during llama_cpp import: {e_llama_import}, LLAMA_CPP_AVAILABLE=False") # Optional debug print
51
+ # --- End of FIX ---
52
+
53
+ logger = logging.getLogger("ZOTHEOS_MainFusion")
54
+ if not logger.handlers:
55
+ handler = logging.StreamHandler(sys.stdout)
56
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
57
+ handler.setFormatter(formatter)
58
+ logger.addHandler(handler)
59
+ logger.setLevel(logging.INFO)
60
+
61
+
62
+ class MainFusionPublic:
63
+ def __init__(self, device_preference: Optional[str] = "cuda"):
64
+ logger.info("🚀 ZOTHEOS MainFusion Initializing (Tier Logic, Async Fusion Summary Enabled)...")
65
+
66
+ self.config = {
67
+ "MODEL_ROLES": MODEL_ROLES,
68
+ "MODEL_ROLE_SYSTEM_PROMPTS": MODEL_ROLE_SYSTEM_PROMPTS,
69
+ "DEFAULT_SYSTEM_PROMPT": DEFAULT_SYSTEM_PROMPT,
70
+ "TIER_CONFIG": {
71
+ "free": {"model_limit": 2, "memory_enabled": False, "display_name": "Free Tier"},
72
+ "starter": {"model_limit": 3, "memory_enabled": True, "display_name": "Starter Tier"},
73
+ # Pro uses max models from MODEL_PATHS, ensuring it uses all available configured models
74
+ "pro": {"model_limit": len(MODEL_PATHS.keys()) if MODEL_PATHS else 3, "memory_enabled": True, "display_name": "Pro Tier"}
75
+ }
76
+ }
77
+ self.models_last_queried_for_perspectives: List[str] = []
78
+
79
+ try:
80
+ # Pass global config values for ModelManager initialization
81
+ logger.info(f"Initializing ModelManager with device_preference='{device_preference}', max_count={MAX_CONCURRENT_MODELS}, max_ram_gb={MAX_RAM_MODELS_GB}...")
82
+ self.model_manager = ModelManager(
83
+ device_preference=device_preference,
84
+ max_model_count=MAX_CONCURRENT_MODELS,
85
+ max_ram_models_gb=MAX_RAM_MODELS_GB
86
+ )
87
+ # active_model_names_in_order is the master list of all models ZOTHEOS *could* use
88
+ self.active_model_names_in_order: List[str] = list(MODEL_PATHS.keys()) if MODEL_PATHS else []
89
+ logger.info(f"✅ MainFusion initialized. Max available models for fusion: {self.active_model_names_in_order}")
90
+ except Exception as e_mm_init:
91
+ logger.critical(f"❌ CRITICAL: ModelManager failed to initialize in MainFusion: {e_mm_init}", exc_info=True)
92
+ self.model_manager = None
93
+ self.active_model_names_in_order = []
94
+
95
+ try:
96
+ self.memory_bank = MemoryBank()
97
+ logger.info("✅ MemoryBank initialized.")
98
+ except Exception as e_mb_init:
99
+ logger.error(f"❌ MemoryBank failed to initialize: {e_mb_init}. Interactions may not be stored.", exc_info=True)
100
+ self.memory_bank = None
101
+
102
+ if not self.model_manager or not LLAMA_CPP_AVAILABLE: # Check if ModelManager itself or Llama backend failed
103
+ logger.critical("MainFusion started in a DEGRADED state: ModelManager or Llama.cpp backend is UNAVAILABLE.")
104
+
105
+
106
+ async def _get_single_model_response_direct(self, model_name: str, user_query: str, system_prompt_for_call: str, preset_name_for_call: str) -> Dict[str, Any]:
107
+ response_text = f"[Error: Model '{model_name}' generation did not complete or model unavailable]"
108
+ start_time_model = time.perf_counter()
109
+ status = "Model Error or Unavailable"
110
+
111
+ if not self.model_manager:
112
+ logger.error(f"[{model_name}] ModelManager not available for generation.")
113
+ return {"model": model_name, "text": "[Error: ModelManager is offline]", "time_ms": 0, "status": "ModelManager Offline"}
114
+
115
+ try:
116
+ logger.info(f"⚙️ [{model_name}] Calling ModelManager.generate_with_model. System Prompt: '{system_prompt_for_call[:50]}...'. User Query: '{user_query[:50].replace(chr(10),' ')}...'")
117
+ # generate_with_model handles actual call to Llama instance via async_generation_wrapper
118
+ response_text = await self.model_manager.generate_with_model(
119
+ model_name=model_name,
120
+ prompt=user_query,
121
+ preset_name=preset_name_for_call,
122
+ system_prompt=system_prompt_for_call
123
+ )
124
+ preview = response_text[:100].replace("\n", " ")
125
+ if response_text.startswith(("[Error:", "[No text generated", "[Malformed response")) or not response_text.strip():
126
+ logger.warning(f"⚠️ [{model_name}] Model returned an error string or empty content: {preview}...")
127
+ status = "Model Error or Empty Response"
128
+ else:
129
+ logger.info(f"✅ [{model_name}] Response received ({len(response_text)} chars): {preview}...")
130
+ status = "Success"
131
+ except Exception as e:
132
+ logger.error(f"❌ [{model_name}] Exception in _get_single_model_response_direct: {e}", exc_info=True)
133
+ response_text = f"[Error: MainFusion encountered an exception during {model_name} generation: {type(e).__name__}]"
134
+ status = "MainFusion Exception"
135
+ inference_time_ms = (time.perf_counter() - start_time_model) * 1000
136
+ return {"model": model_name, "text": response_text.strip(), "time_ms": round(inference_time_ms, 2), "status": status}
137
+
138
+ async def _generate_true_fusion_summary(self, raw_responses_dict: Dict[str, str], original_query: str, models_actually_used: List[str]) -> str:
139
+ logger.info(f"Attempting to generate a true fusion summary from {len(models_actually_used)} perspective(s).")
140
+ summarizer_model_key = "gemma" # Default summarizer, ensure it's in MODEL_PATHS and loadable
141
+
142
+ valid_responses_for_summary = {
143
+ model: text for model, text in raw_responses_dict.items()
144
+ if model in models_actually_used and text and not text.startswith("[Error:")
145
+ }
146
+
147
+ if not valid_responses_for_summary: # No valid responses at all
148
+ logger.warning("No valid responses available to generate a fusion summary.")
149
+ return "A summary could not be generated as no valid perspectives were successfully gathered."
150
+
151
+ if len(valid_responses_for_summary) == 1:
152
+ single_model_name = list(valid_responses_for_summary.keys())[0]
153
+ logger.info(f"Only one valid perspective from {single_model_name}. Using it as the summary.")
154
+ return f"Based on the single available perspective from **{single_model_name.capitalize()}**:\n\n{list(valid_responses_for_summary.values())[0]}"
155
+
156
+ fusion_prompt = f"Original User Question: \"{original_query}\"\n\n"
157
+ fusion_prompt += "You are the Fusion Summary engine for ZOTHEOS. Your task is to read the multiple perspectives provided below and write a concise, balanced, and synthesized summary. Capture the most important points, common themes, and notable differences from all viewpoints. Your tone should be thoughtful, neutral, and respectful. Structure the summary clearly.\n\n"
158
+
159
+ for model_name, text in valid_responses_for_summary.items():
160
+ role = self.config["MODEL_ROLES"].get(model_name, "General")
161
+ fusion_prompt += f"--- PERSPECTIVE FROM {model_name.upper()} ({role.capitalize()}) ---\n{text}\n\n"
162
+
163
+ fusion_prompt += "--- SYNTHESIZED SUMMARY (Combine the perspectives above into a unified insight) ---\n"
164
+
165
+ logger.info(f"Calling summarizer model '{summarizer_model_key}' (from available: {self.active_model_names_in_order}) for fusion summary.")
166
+
167
+ if not self.model_manager:
168
+ logger.error("ModelManager not available for generating fusion summary.")
169
+ return "[Error: Summarizer service unavailable - ModelManager offline]"
170
+
171
+ try:
172
+ # The system prompt for the summarizer itself
173
+ summarizer_system_prompt = "You are an expert synthesis AI. Your role is to create a coherent and insightful summary from the provided texts."
174
+
175
+ summary_text = await self.model_manager.generate_with_model(
176
+ model_name=summarizer_model_key,
177
+ prompt=fusion_prompt, # The full context with all perspectives
178
+ preset_name="precise", # Use a precise preset for summarization
179
+ system_prompt=summarizer_system_prompt
180
+ )
181
+ if summary_text and not summary_text.startswith("[Error:"):
182
+ logger.info("✅ True fusion summary generated successfully.")
183
+ return summary_text.strip()
184
+ else:
185
+ logger.warning(f"Summarizer model '{summarizer_model_key}' returned an error or empty response: {summary_text}")
186
+ return "[Warning: Summary generation was partial or failed. Displaying raw perspectives.]"
187
+ except Exception as e:
188
+ logger.error(f"❌ Exception while generating true fusion summary with '{summarizer_model_key}': {e}", exc_info=True)
189
+ return f"[Error: The summary generation process failed. Exception: {type(e).__name__}]"
190
+
191
+ def _analyze_responses_basic(self, responses_dict: Dict[str, str], model_roles: Dict[str, str]) -> Dict[str, Any]:
192
+ valid_responses = {model: text for model, text in responses_dict.items() if text and not text.startswith("[Error:")}
193
+ consensus_points = []
194
+ if len(valid_responses) > 1: consensus_points.append("Multiple perspectives were gathered and synthesized.")
195
+ elif len(valid_responses) == 1: consensus_points.append("A single primary perspective was available for synthesis.")
196
+ else: consensus_points.append("No valid primary perspectives were available for synthesis.")
197
+ return {"consensus_points": consensus_points, "contradictions": [], "unique_insights": valid_responses}
198
+
199
+ def _synthesize_fusion_response(self, analysis_result: dict, model_roles: dict, raw_responses_dict: dict, final_summary_text: str, models_used_for_perspectives: List[str]) -> str:
200
+ response_parts = []
201
+
202
+ response_parts.append("## ✨ ZOTHEOS Final Synthesized Insight ✨")
203
+ response_parts.append(final_summary_text if final_summary_text and not final_summary_text.startswith(("[Error:", "[Warning:")) else "*Synthesis process encountered an issue or no summary was generated. Please see detailed perspectives below.*")
204
+ response_parts.append("\n---\n")
205
+
206
+ response_parts.append("### 💬 Detailed Individual Perspectives")
207
+ has_any_valid_perspectives = False
208
+ for model_name in models_used_for_perspectives:
209
+ text = raw_responses_dict.get(model_name)
210
+ role = model_roles.get(model_name, "General") # Default role
211
+ response_parts.append(f"**Perspective from {model_name.capitalize()} ({role.capitalize()}):**")
212
+ if text and not text.startswith("[Error:"):
213
+ response_parts.append(text.strip())
214
+ has_any_valid_perspectives = True
215
+ else:
216
+ response_parts.append(f"*{text if text else '[No response or error from this model.]'}*") # Display error or placeholder
217
+ response_parts.append("")
218
+ if not has_any_valid_perspectives and not (final_summary_text and not final_summary_text.startswith(("[Error:", "[Warning:"))):
219
+ # If summary also failed/empty and no valid individual perspectives.
220
+ response_parts = ["## ⚠️ ZOTHEOS Alert\n\nUnfortunately, ZOTHEOS encountered issues processing your query with all available AI cores for your tier. No insights could be gathered at this time. Please try rephrasing your query or try again later."]
221
+ elif not has_any_valid_perspectives : # Summary might be there, but no individual details.
222
+ response_parts.append("*No valid individual perspectives were successfully retrieved to display in detail.*")
223
+
224
+
225
+ return "\n".join(response_parts).strip()
226
+
227
+ async def process_query_with_fusion(
228
+ self,
229
+ query: str,
230
+ user_token: Optional[str] = None,
231
+ persona_key: Optional[str] = None,
232
+ fusion_mode_override: str = "balanced",
233
+ **kwargs
234
+ ) -> str:
235
+ process_start_time = time.time()
236
+ current_tier_name = get_user_tier(user_token if user_token else "")
237
+ tier_settings = self.config["TIER_CONFIG"].get(current_tier_name, self.config["TIER_CONFIG"]["free"])
238
+ tier_model_limit = tier_settings["model_limit"]
239
+ tier_memory_enabled = tier_settings["memory_enabled"]
240
+ logger.info(f"User Tier: '{current_tier_name}' ({tier_settings['display_name']}). Model Limit: {tier_model_limit}, Memory: {'Enabled' if tier_memory_enabled else 'Disabled'}.")
241
+
242
+ if not self.model_manager or not LLAMA_CPP_AVAILABLE: return "[Error: ZOTHEOS Core Model Manager not ready or Llama.cpp backend unavailable.]"
243
+ if not self.active_model_names_in_order: return "[Error: ZOTHEOS Core not ready. No models configured in MODEL_PATHS.]"
244
+ if not query or not query.strip(): return "[Error: Query is empty. Please provide a question or topic.]"
245
+
246
+ current_query_text = query
247
+ current_preset_name = fusion_mode_override if fusion_mode_override in INFERENCE_PRESETS else DEFAULT_INFERENCE_PRESET
248
+ base_persona_prompt = SYSTEM_PERSONAS.get(persona_key or "default", self.config["DEFAULT_SYSTEM_PROMPT"])
249
+
250
+ # Determine actual models to use based on tier limit and availability
251
+ models_to_use_for_perspectives = [m for m in self.active_model_names_in_order if m in MODEL_PATHS][:tier_model_limit]
252
+ self.models_last_queried_for_perspectives = models_to_use_for_perspectives # For status report
253
+
254
+ if not models_to_use_for_perspectives:
255
+ logger.error(f"No models available for tier '{current_tier_name}' after applying limit of {tier_model_limit}.")
256
+ return f"[Error: No models available for your current tier ('{current_tier_name}').]"
257
+
258
+ logger.info(f"🔎 Processing query. Models for perspectives (Tier: {current_tier_name}): {models_to_use_for_perspectives}. Preset: '{current_preset_name}'. Query: '{current_query_text[:60].replace(chr(10),' ')}...'")
259
+
260
+ raw_responses_dict: Dict[str, str] = {}
261
+ individual_results_for_memory: List[Dict[str, Any]] = []
262
+ successful_responses = 0
263
+
264
+ for model_name in models_to_use_for_perspectives:
265
+ model_role = self.config["MODEL_ROLES"].get(model_name, "general")
266
+ system_prompt_for_model = self.config["MODEL_ROLE_SYSTEM_PROMPTS"].get(model_role, base_persona_prompt)
267
+ query_for_this_model = current_query_text
268
+ if model_name.lower() == "gemma" and system_prompt_for_model:
269
+ query_for_this_model = f"<start_of_turn>user\n{system_prompt_for_model.strip()}\n{current_query_text}<end_of_turn>\n<start_of_turn>model\n"
270
+ system_prompt_for_model = ""
271
+ model_output_data = await self._get_single_model_response_direct(model_name, query_for_this_model, system_prompt_for_model, current_preset_name)
272
+ individual_results_for_memory.append(model_output_data)
273
+ raw_responses_dict[model_name] = model_output_data.get("text", "[Error: No text field in response data]")
274
+ if model_output_data.get("status") == "Success":
275
+ successful_responses += 1
276
+
277
+ synthesized_summary_text = await self._generate_true_fusion_summary(raw_responses_dict, current_query_text, models_to_use_for_perspectives)
278
+ analysis_result = self._analyze_responses_basic(raw_responses_dict, self.config["MODEL_ROLES"]) # Basic analysis for now
279
+ final_fused_output_content = self._synthesize_fusion_response(analysis_result, self.config["MODEL_ROLES"], raw_responses_dict, synthesized_summary_text, models_to_use_for_perspectives)
280
+
281
+ persona_display = (persona_key or "default").capitalize()
282
+ mode_display = current_preset_name.capitalize()
283
+ tier_display_name = tier_settings.get("display_name", current_tier_name.capitalize())
284
+ final_header = f"## 🧠 ZOTHEOS Fused Perspectives 🧠\n*(Fusion Mode: {mode_display} | Persona: {persona_display} | Tier: {tier_display_name})*\n\n"
285
+ final_fused_output = final_header + final_fused_output_content
286
+
287
+ if successful_responses == 0 and not "[Error:" in final_fused_output_content and not "[Warning:" in final_fused_output_content:
288
+ logger.error(f"All models ({len(models_to_use_for_perspectives)}) failed for tier '{current_tier_name}'.")
289
+ final_fused_output = final_header + "[Critical Error: ZOTHEOS was unable to obtain any valid responses from its AI cores for this query.]\n\n" + final_fused_output_content.split("\n\n",1)[-1]
290
+
291
+ if tier_memory_enabled:
292
+ if self.memory_bank:
293
+ try:
294
+ memory_metadata = {
295
+ "user_token_used_prefix": user_token[:3] + "***" if user_token and len(user_token) > 3 else "N/A (No Token)" if not user_token else user_token,
296
+ "tier_at_interaction": current_tier_name,
297
+ "persona_key": persona_key or "default", "fusion_mode_used": current_preset_name,
298
+ # timestamp_iso is now added within store_memory_async
299
+ "duration_seconds": round(time.time() - process_start_time, 3),
300
+ "active_models_queried": models_to_use_for_perspectives, # Models actually used for perspectives
301
+ "individual_model_outputs": individual_results_for_memory, # Detailed dicts
302
+ "synthesized_summary_text": synthesized_summary_text, # The AI-generated summary
303
+ "fused_response_length_chars": len(final_fused_output),
304
+ "successful_model_responses": successful_responses,
305
+ "total_models_queried": len(models_to_use_for_perspectives)
306
+ }
307
+ await self.memory_bank.store_memory_async(query=current_query_text, response=final_fused_output, metadata=memory_metadata)
308
+ except Exception as e_mem: logger.error(f"Failed to store fusion interaction in MemoryBank (Tier: '{current_tier_name}'): {e_mem}", exc_info=True)
309
+ else: logger.warning(f"MemoryBank not initialized. Skipping storage (Tier: '{current_tier_name}').")
310
+ else: logger.info(f"Memory storage disabled for tier '{current_tier_name}'. Skipping storage.")
311
+
312
+ total_processing_time = round(time.time() - process_start_time, 2)
313
+ logger.info(f"🧠 Fusion complete in {total_processing_time}s. Output len: {len(final_fused_output)}. Models used: {len(models_to_use_for_perspectives)} (Tier: {current_tier_name}).")
314
+ return final_fused_output
315
+
316
+ async def get_status_report(self) -> Dict[str, Any]:
317
+ report: Dict[str, Any] = {
318
+ "status": "Full Multi-Model Fusion Mode Active",
319
+ "fusion_engine_status": "Online" if self.model_manager and self.active_model_names_in_order and LLAMA_CPP_AVAILABLE else "Degraded/Offline",
320
+ "all_available_models": self.active_model_names_in_order,
321
+ "models_last_queried_for_perspectives": getattr(self, 'models_last_queried_for_perspectives', []),
322
+ "model_manager_status": "Online" if self.model_manager else "Offline/Init Failed",
323
+ "llama_cpp_backend_available": LLAMA_CPP_AVAILABLE,
324
+ "memory_bank_status": "Online" if self.memory_bank else "Offline/Init Failed"
325
+ }
326
+ if self.model_manager and hasattr(self.model_manager, 'get_loaded_model_stats'):
327
+ try: report["model_manager_runtime_stats"] = self.model_manager.get_loaded_model_stats()
328
+ except Exception as e: report["model_manager_runtime_stats"] = f"Error getting MM stats: {e}"
329
+ else: report["model_manager_runtime_stats"] = "ModelManager N/A for stats."
330
+
331
+ if self.memory_bank and hasattr(self.memory_bank, 'get_memory_stats'):
332
+ try: report["memory_bank_stats"] = await self.memory_bank.get_memory_stats()
333
+ except Exception as e: report["memory_bank_stats"] = f"Error getting MB stats: {e}"
334
+ else: report["memory_bank_stats"] = "MemoryBank N/A for stats."
335
+ return report
336
+
337
+ if __name__ == "__main__":
338
+ if os.getenv("ZOTHEOS_DEBUG_DEPS", "false").lower() != "true":
339
+ for lib_logger_name in ["torch", "huggingface_hub", "psutil", "llama_cpp", "httpx", "PIL"]: logging.getLogger(lib_logger_name).setLevel(logging.WARNING)
340
+ logger.setLevel(logging.DEBUG)
341
+ logger.info("--- MainFusionPublic (Tier Logic & Async Summary) CLI Test ---")
342
+ async def run_main_fusion_cli_test_with_token(test_token=None, token_desc="Default (Free Tier)"):
343
+ main_fusion_instance: Optional[MainFusionPublic] = None
344
+ logger.info(f"\n--- Testing with token: '{token_desc}' ---")
345
+ try:
346
+ main_fusion_instance = MainFusionPublic(device_preference="cuda")
347
+ if not main_fusion_instance.model_manager or not main_fusion_instance.active_model_names_in_order or not LLAMA_CPP_AVAILABLE:
348
+ logger.critical("CLI Test Aborted: MainFusion init failed (MM or LlamaCPP unavailable)."); return
349
+ test_query = "What are the core principles of Stoicism and how can they be applied in modern life?"
350
+ logger.info(f"CLI Test: Querying (Token: {test_token[:3] + '...' if test_token and len(test_token)>3 else test_token}): '{test_query}'")
351
+ response = await main_fusion_instance.process_query_with_fusion(query=test_query, user_token=test_token, persona_key="philosopher", fusion_mode_override="balanced")
352
+ print("\n" + "="*25 + f" CLI Test Response ({token_desc}) " + "="*25); print(response); print("="* (50 + len(f" CLI Test Response ({token_desc}) ") + 2))
353
+ status = await main_fusion_instance.get_status_report()
354
+ print("\nSystem Status Report After Query:"); print(json.dumps(status, indent=2, default=str))
355
+ except Exception as e: logger.critical(f"Error during CLI test ({token_desc}): {e}", exc_info=True); print(f"🚨 CLI Test Error ({token_desc}): {e}")
356
+ finally:
357
+ if main_fusion_instance and main_fusion_instance.model_manager and hasattr(main_fusion_instance.model_manager, 'shutdown'):
358
+ logger.info(f"CLI Test ({token_desc}): Shutting down ModelManager...")
359
+ main_fusion_instance.model_manager.shutdown() # This is synchronous
360
+ logger.info(f"🛑 MainFusion CLI test ({token_desc}) shutdown.")
361
+ async def run_all_cli_tests():
362
+ # Ensure stripe_users.json is in project root for this test to work with tokens
363
+ tokens_to_test = {
364
+ None: "No Token (Defaults to Free)",
365
+ "TOKEN_FOR_FREE_TEST": "Free Tier Token", # Add this to your stripe_users.json
366
+ "TOKEN_FOR_STARTER_TEST": "Starter Tier Token", # Add this
367
+ "TOKEN_FOR_PRO_TEST": "Pro Tier Token" # Add this
368
+ }
369
+ # Create dummy stripe_users.json if not exists for test
370
+ if not os.path.exists(os.path.join(project_root, "stripe_users.json")):
371
+ logger.warning("Creating dummy stripe_users.json for CLI test.")
372
+ dummy_users = {t: t.split('_')[1].lower() for t in tokens_to_test if t} # type: ignore
373
+ with open(os.path.join(project_root, "stripe_users.json"), "w") as f:
374
+ json.dump(dummy_users, f, indent=2)
375
+
376
+ for token, desc in tokens_to_test.items():
377
+ await run_main_fusion_cli_test_with_token(token, desc)
378
+
379
+ asyncio.run(run_all_cli_tests())
modules/memory_bank_public.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: modules/memory_bank_public.py (Finalized for Beta Checklist & Async Correctness)
2
+
3
+ import os
4
+ import json
5
+ import time
6
+ import asyncio
7
+ import logging
8
+ import traceback # For more detailed error logging if needed
9
+ from typing import List, Dict, Optional, Any
10
+ from json import JSONDecodeError
11
+ from datetime import datetime, timezone # For ISO timestamps
12
+ from pathlib import Path # For home directory in export
13
+ import sys # For logger setup if run standalone
14
+
15
+ logger = logging.getLogger("ZOTHEOS_MemoryBank")
16
+ if not logger.handlers:
17
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
18
+ handler = logging.StreamHandler(sys.stdout)
19
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
20
+ handler.setFormatter(formatter)
21
+ logger.addHandler(handler)
22
+ logger.setLevel(logging.INFO)
23
+
24
+ # --- Define memory file paths and limits ---
25
+ try:
26
+ # Path logic for when memory_bank_public.py is in 'modules' folder
27
+ # and data directory is in project root ('../zotheos_public_data')
28
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
29
+ project_root_dir = os.path.dirname(current_file_dir) # This is ZOTHEOS_Release_Package
30
+
31
+ # Check if running from PyInstaller bundle
32
+ if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
33
+ # If bundled, data directory is relative to sys._MEIPASS (the temp extraction folder)
34
+ DATA_BASE_DIR = os.path.join(sys._MEIPASS, "zotheos_public_data")
35
+ else:
36
+ # If running as script, data directory is relative to project root
37
+ DATA_BASE_DIR = os.path.join(project_root_dir, "zotheos_public_data")
38
+
39
+ if not os.path.exists(DATA_BASE_DIR):
40
+ os.makedirs(DATA_BASE_DIR, exist_ok=True)
41
+ logger.info(f"Created data base directory: {DATA_BASE_DIR}")
42
+
43
+ MEMORY_DIR = os.path.join(DATA_BASE_DIR, "zotheos_memory")
44
+ if not os.path.exists(MEMORY_DIR):
45
+ os.makedirs(MEMORY_DIR, exist_ok=True)
46
+ logger.info(f"Created memory directory: {MEMORY_DIR}")
47
+
48
+ except Exception as e:
49
+ logger.critical(f"❌ Failed to setup base/memory directory for MemoryBank: {e}. Using fallback in user home.")
50
+ MEMORY_DIR = os.path.join(os.path.expanduser("~"), ".zotheos", "zotheos_memory")
51
+ os.makedirs(MEMORY_DIR, exist_ok=True)
52
+
53
+
54
+ MEMORY_FILE_PATH = os.path.join(MEMORY_DIR, "zotheos_memory.json")
55
+ MEMORY_FILE_TMP = os.path.join(MEMORY_DIR, "zotheos_memory_tmp.json")
56
+ MEMORY_SIZE_LIMIT = 1000
57
+ MEMORY_SCHEMA_VERSION = 1.2 # Bumped for metadata structure in entry
58
+
59
+ class MemoryBank:
60
+ def __init__(self):
61
+ self.memory_list: List[Dict[str, Any]] = []
62
+ self.memory_dict: Dict[str, Dict[str, Any]] = {} # For ID-based lookups, ID should be string
63
+ self.next_id = 0
64
+ logger.info(f"🧠 Initializing Memory Bank. Memory file: {MEMORY_FILE_PATH}")
65
+ self._load_memory() # Load initial state
66
+
67
+ if self.memory_list:
68
+ try:
69
+ # Ensure IDs are treated as integers for max() if they are numeric strings
70
+ numeric_ids = [int(m.get('id', -1)) for m in self.memory_list if str(m.get('id', '')).isdigit()]
71
+ if numeric_ids:
72
+ max_id = max(numeric_ids)
73
+ self.next_id = max(max_id + 1, len(self.memory_list))
74
+ else: # No numeric IDs found
75
+ self.next_id = len(self.memory_list)
76
+ except ValueError: # Fallback if conversion to int fails for some reason
77
+ self.next_id = len(self.memory_list)
78
+ logger.info(f"Loaded {len(self.memory_list)} memories. Next ID set to {self.next_id}.")
79
+ else:
80
+ logger.info("Initialized with empty memory.")
81
+
82
+ def _reset_memory_state(self):
83
+ self.memory_list = []
84
+ self.memory_dict = {}
85
+ self.next_id = 0
86
+ logger.info("Memory state has been reset.")
87
+
88
+ def _load_memory(self):
89
+ logger.info(f"Attempting to load memory from {MEMORY_FILE_PATH}...")
90
+ try:
91
+ if os.path.exists(MEMORY_FILE_PATH):
92
+ with open(MEMORY_FILE_PATH, "r", encoding="utf-8") as file:
93
+ data = json.load(file)
94
+ if isinstance(data, dict) and "entries" in data and isinstance(data["entries"], list):
95
+ self.memory_list = data["entries"]
96
+ # Rebuild dictionary, ensuring IDs are strings for keys
97
+ self.memory_dict = {str(m['id']): m for m in self.memory_list if m.get('id') is not None}
98
+ logger.info(f"✅ Successfully loaded {len(self.memory_list)} memory entries (schema version: {data.get('schema_version', 'Unknown')}).")
99
+ elif isinstance(data, list):
100
+ logger.warning(f"Old memory format (list) detected. Converting and saving in new format.")
101
+ self.memory_list = data
102
+ self.memory_dict = {str(m.get('id','')): m for m in self.memory_list if m.get('id') is not None}
103
+ self._save_memory() # Save immediately in new format
104
+ else:
105
+ logger.warning(f"⚠️ Memory file {MEMORY_FILE_PATH} has an unexpected main structure. Resetting memory.")
106
+ self._reset_memory_state()
107
+ else:
108
+ logger.info(f"✅ No existing memory file found at {MEMORY_FILE_PATH}. Starting fresh.")
109
+ self._reset_memory_state()
110
+ except JSONDecodeError as e:
111
+ logger.error(f"❌ Error decoding JSON from memory file {MEMORY_FILE_PATH}: {e}. File might be corrupted. Resetting memory.", exc_info=False)
112
+ self._handle_corrupted_memory_file()
113
+ except FileNotFoundError:
114
+ logger.info(f"✅ Memory file {MEMORY_FILE_PATH} not found. Starting fresh (FileNotFound).")
115
+ self._reset_memory_state()
116
+ except Exception as e:
117
+ logger.error(f"❌ Unexpected error loading memory: {e}. Resetting memory.", exc_info=True)
118
+ self._reset_memory_state()
119
+
120
+ def _handle_corrupted_memory_file(self):
121
+ self._reset_memory_state()
122
+ if os.path.exists(MEMORY_FILE_PATH):
123
+ try:
124
+ corrupt_backup_path = f"{MEMORY_FILE_PATH}.corrupt_{datetime.now().strftime('%Y%m%d%H%M%S')}"
125
+ os.rename(MEMORY_FILE_PATH, corrupt_backup_path)
126
+ logger.info(f"Backed up corrupted memory file to {corrupt_backup_path}")
127
+ except OSError as backup_err:
128
+ logger.error(f"Failed to backup corrupted memory file: {backup_err}")
129
+
130
+ def _save_memory(self): # This is synchronous
131
+ logger.debug(f"Attempting atomic save to {MEMORY_FILE_PATH}...")
132
+ try:
133
+ data_to_save = {"schema_version": MEMORY_SCHEMA_VERSION, "entries": self.memory_list}
134
+ with open(MEMORY_FILE_TMP, "w", encoding="utf-8") as file:
135
+ json.dump(data_to_save, file, indent=2)
136
+ os.replace(MEMORY_FILE_TMP, MEMORY_FILE_PATH)
137
+ logger.info(f"✅ Memory saved successfully ({len(self.memory_list)} entries).")
138
+ except Exception as e:
139
+ logger.error(f"❌ Error saving memory: {e}", exc_info=True)
140
+ if os.path.exists(MEMORY_FILE_TMP):
141
+ try: os.remove(MEMORY_FILE_TMP)
142
+ except OSError: pass
143
+ finally:
144
+ if os.path.exists(MEMORY_FILE_TMP):
145
+ try: os.remove(MEMORY_FILE_TMP)
146
+ except OSError as e_rem: logger.warning(f"Could not remove temp memory file {MEMORY_FILE_TMP}: {e_rem}")
147
+
148
+ async def save_memory_async(self):
149
+ logger.debug("Scheduling asynchronous memory save...")
150
+ await asyncio.to_thread(self._save_memory)
151
+
152
+ async def store_memory_async(self, query: str, response: str, metadata: Optional[Dict[str, Any]] = None):
153
+ if not query or not response:
154
+ logger.warning("⚠️ Attempted to store empty query or response. Skipping.")
155
+ return
156
+ try:
157
+ current_id_num = self.next_id
158
+ current_metadata = metadata.copy() if metadata is not None else {} # Work with a copy
159
+
160
+ # Ensure a proper ISO-formatted timestamp is in metadata
161
+ current_metadata['timestamp_iso'] = datetime.now(timezone.utc).isoformat()
162
+
163
+ memory_entry = {
164
+ 'id': str(current_id_num),
165
+ 'query': query,
166
+ 'response': response, # This is the full_fused_output from main_fusion
167
+ 'created_at_unix': time.time(), # Retain for fallback sorting
168
+ 'schema_version': MEMORY_SCHEMA_VERSION,
169
+ 'metadata': current_metadata # This now contains timestamp_iso and other details
170
+ }
171
+ self.next_id += 1
172
+ self.memory_list.append(memory_entry)
173
+ self.memory_dict[str(current_id_num)] = memory_entry
174
+ logger.info(f"Stored memory entry ID {current_id_num}.")
175
+
176
+ removed_count = 0
177
+ while len(self.memory_list) > MEMORY_SIZE_LIMIT:
178
+ oldest_memory = self.memory_list.pop(0)
179
+ oldest_id = str(oldest_memory.get('id', ''))
180
+ if oldest_id and oldest_id in self.memory_dict: del self.memory_dict[oldest_id]
181
+ removed_count += 1
182
+ if removed_count > 0: logger.info(f"Removed {removed_count} oldest entries for size limit.")
183
+ await self.save_memory_async()
184
+ except Exception as e: logger.error(f"❌ Error storing memory entry: {e}", exc_info=True)
185
+
186
+ async def retrieve_recent_memories_async(self, limit: int = 5) -> List[Dict[str, Any]]:
187
+ logger.debug(f"Retrieving up to {limit} recent memories, sorted.")
188
+ if not self.memory_list: return []
189
+ try:
190
+ def get_sort_key(entry):
191
+ ts_iso = entry.get('metadata', {}).get('timestamp_iso')
192
+ # Fallback to created_at_unix if timestamp_iso is missing or unparsable
193
+ if ts_iso:
194
+ try: return datetime.fromisoformat(ts_iso.replace('Z', '+00:00'))
195
+ except ValueError:
196
+ logger.warning(f"Could not parse timestamp_iso '{ts_iso}' for entry ID {entry.get('id')}. Falling back to created_at_unix.")
197
+ pass # Fall through to use created_at_unix
198
+ return datetime.fromtimestamp(entry.get('created_at_unix', 0), timezone.utc)
199
+
200
+ # Make a copy for sorting to avoid modifying self.memory_list if other operations occur
201
+ sorted_entries = sorted(list(self.memory_list), key=get_sort_key, reverse=True)
202
+
203
+ actual_limit = max(0, min(limit, len(sorted_entries)))
204
+ recent_sorted_memories = sorted_entries[:actual_limit]
205
+ logger.info(f"Retrieved {len(recent_sorted_memories)} recent memories (sorted).")
206
+ return recent_sorted_memories
207
+ except Exception as e:
208
+ logger.error(f"❌ Error retrieving and sorting recent memories: {e}", exc_info=True)
209
+ # Fallback to unsorted last N if sorting fails
210
+ return self.memory_list[-limit:][::-1] if limit > 0 and self.memory_list else []
211
+
212
+ def load_all_memory_entries_structured(self) -> List[Dict[str, Any]]:
213
+ logger.info(f"Loading all {len(self.memory_list)} memory entries (structured).")
214
+ return list(self.memory_list)
215
+
216
+ async def load_all_memory_entries_structured_async(self) -> List[Dict[str, Any]]:
217
+ logger.info(f"Asynchronously loading all {len(self.memory_list)} memory entries (structured).")
218
+ return list(self.memory_list) # For now, direct copy is fine as it's in-memory
219
+
220
+ async def retrieve_memory_by_id(self, memory_id: str) -> Optional[Dict[str, Any]]:
221
+ memory_id_str = str(memory_id)
222
+ try:
223
+ memory = self.memory_dict.get(memory_id_str)
224
+ if memory: logger.info(f"Retrieved memory entry ID {memory_id_str}.")
225
+ else: logger.warning(f"Memory ID {memory_id_str} not found in dictionary.")
226
+ return memory
227
+ except Exception as e: logger.error(f"❌ Error retrieving memory ID {memory_id_str}: {e}", exc_info=True); return None
228
+
229
+ async def retrieve_last_response(self) -> Optional[str]:
230
+ if not self.memory_list: logger.info("Retrieve last response: Memory list is empty."); return None
231
+ try:
232
+ last_entry = self.memory_list[-1]; response = last_entry.get('response')
233
+ if isinstance(response, str) and response.strip(): logger.info("Retrieve last response: Found valid response."); return response
234
+ else: logger.warning(f"Retrieve last response: Last entry (ID {last_entry.get('id', 'N/A')}) has empty response."); return None
235
+ except Exception as e: logger.error(f"❌ Error retrieving last response: {e}", exc_info=True); return None
236
+
237
+ async def clear_all_memory(self):
238
+ logger.warning("Initiating complete memory wipe...")
239
+ try: self._reset_memory_state(); await self.save_memory_async(); logger.info("✅ All memory cleared successfully."); return True
240
+ except Exception as e: logger.error(f"❌ Error clearing memory: {e}", exc_info=True); return False
241
+
242
+ async def delete_memory_by_id(self, memory_id: str):
243
+ logger.warning(f"Attempting to delete memory ID {memory_id}...")
244
+ memory_id_str = str(memory_id)
245
+ try:
246
+ if memory_id_str in self.memory_dict:
247
+ del self.memory_dict[memory_id_str]
248
+ self.memory_list = [m for m in self.memory_list if str(m.get('id', '')) != memory_id_str]
249
+ await self.save_memory_async()
250
+ logger.info(f"✅ Memory with ID {memory_id_str} deleted successfully.")
251
+ return True
252
+ else:
253
+ logger.warning(f"⚠️ Memory ID {memory_id_str} not found for deletion.")
254
+ return False
255
+ except Exception as e:
256
+ logger.error(f"❌ Error deleting memory ID {memory_id_str}: {e}", exc_info=True)
257
+ return False
258
+
259
+ async def get_memory_stats(self) -> Dict[str, Any]: # This is now async
260
+ logger.info("Calculating memory statistics...")
261
+ stats: Dict[str, Any] = {'total_entries': len(self.memory_list), 'disk_usage_mb': 0.0, 'memory_limit': MEMORY_SIZE_LIMIT, 'next_id': self.next_id, 'schema_version': MEMORY_SCHEMA_VERSION }
262
+ try:
263
+ if os.path.exists(MEMORY_FILE_PATH):
264
+ file_size_bytes = await asyncio.to_thread(os.path.getsize, MEMORY_FILE_PATH)
265
+ stats['disk_usage_mb'] = round(file_size_bytes / (1024 * 1024), 3)
266
+ logger.info(f"Memory Stats: {stats}")
267
+ except Exception as e: logger.error(f"❌ Error calculating memory file size: {e}", exc_info=True)
268
+ return stats
269
+
270
+ def export_memory_to_file_sync(self) -> Optional[str]:
271
+ """Synchronously exports memory file. Returns exported file path or None."""
272
+ # NOTE (Future Personalization): Consider allowing user to choose export location via UI dialog.
273
+ if not os.path.exists(MEMORY_FILE_PATH):
274
+ logger.warning("No memory file to export because it doesn't exist.")
275
+ return None
276
+ if not self.memory_list: # Also check if there are any entries to export
277
+ logger.warning("No memory entries to export, memory file might be empty or just schema.")
278
+ # Decide if you want to export an empty "entries" file or return None
279
+ # For now, let's allow exporting an empty structure.
280
+ # return None
281
+
282
+ try:
283
+ export_dir = Path.home() / "Desktop"
284
+ if not (export_dir.exists() and export_dir.is_dir()):
285
+ export_dir = Path.home() / "Downloads"
286
+ if not (export_dir.exists() and export_dir.is_dir()):
287
+ export_dir = Path(MEMORY_DIR) # Fallback
288
+ logger.warning(f"Desktop/Downloads not found/accessible, exporting to memory directory: {export_dir}")
289
+ os.makedirs(export_dir, exist_ok=True) # Ensure export_dir exists
290
+
291
+ timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S')
292
+ export_filename = f"zotheos_memory_export_{timestamp_str}.json"
293
+ export_full_path = export_dir / export_filename
294
+
295
+ import shutil
296
+ shutil.copy2(MEMORY_FILE_PATH, export_full_path)
297
+
298
+ logger.info(f"✅ Memory successfully exported to: {export_full_path}")
299
+ return str(export_full_path)
300
+ except Exception as e:
301
+ logger.error(f"❌ Failed to export memory: {e}", exc_info=True)
302
+ return None
303
+
304
+ async def export_memory_to_file_async(self) -> Optional[str]:
305
+ """Asynchronously exports the memory file."""
306
+ logger.info("Scheduling asynchronous memory export...")
307
+ return await asyncio.to_thread(self.export_memory_to_file_sync)
308
+
309
+
310
+ async def main_test():
311
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
312
+ logger.info("--- MemoryBank Test ---")
313
+ mb = MemoryBank()
314
+
315
+ logger.info(f"Initial memory file path: {MEMORY_FILE_PATH}")
316
+ if os.path.exists(MEMORY_FILE_PATH):
317
+ logger.info("Memory file exists. Clearing for fresh test.")
318
+ await mb.clear_all_memory()
319
+ else:
320
+ logger.info("No pre-existing memory file found for test.")
321
+
322
+ stats1 = await mb.get_memory_stats()
323
+ logger.info(f"Stats after potential clear: {stats1}")
324
+
325
+ # Test Store
326
+ await mb.store_memory_async("Query 1", "Response 1", metadata={"custom_field": "value1", "tier_at_interaction": "free"})
327
+ await asyncio.sleep(0.01)
328
+ await mb.store_memory_async("Query 2", "Response 2", metadata={"user_token_used_prefix": "tes***", "synthesized_summary_text": "Summary for Q2"})
329
+ await asyncio.sleep(0.01)
330
+ await mb.store_memory_async("Query 3", "Response 3", metadata={"tier_at_interaction": "pro", "synthesized_summary_text": "Summary for Q3"})
331
+ await asyncio.sleep(0.01)
332
+ await mb.store_memory_async("Query 4", "Response 4", metadata={}) # No extra metadata
333
+ await asyncio.sleep(0.01)
334
+ await mb.store_memory_async("Query 5", "Response 5", metadata={"synthesized_summary_text": "This is summary 5."})
335
+ await asyncio.sleep(0.01)
336
+ await mb.store_memory_async("Query 6", "Response 6", metadata={"synthesized_summary_text": "This is summary 6, a bit longer than the preview."})
337
+
338
+ recent_for_display = await mb.retrieve_recent_memories_async(limit=5)
339
+ logger.info(f"Recent 5 (for display, should be newest first - Q6, Q5, Q4, Q3, Q2):")
340
+ for i, item in enumerate(recent_for_display):
341
+ ts = item.get('metadata',{}).get('timestamp_iso', item.get('created_at_unix'))
342
+ logger.info(f" {i+1}. ID: {item.get('id')}, Timestamp: {ts}, Query: {item.get('query')[:20]}..., Summary in meta: {'synthesized_summary_text' in item.get('metadata', {})}")
343
+
344
+ stats2 = await mb.get_memory_stats()
345
+ logger.info(f"Memory Stats after storing: {stats2}")
346
+
347
+ exported_file = await mb.export_memory_to_file_async()
348
+ if exported_file: logger.info(f"Test export successful: {exported_file}")
349
+ else: logger.error("Test export failed.")
350
+
351
+ logger.info("--- MemoryBank Test Complete ---")
352
+
353
+ if __name__ == "__main__":
354
+ asyncio.run(main_test())
modules/model_manager_public.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: modules/model_manager_public.py
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ import sys
7
+ import time
8
+ import threading
9
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
10
+ from typing import Dict, Any, Optional, List, Callable, Coroutine
11
+
12
+ # --- Llama.cpp Python Backend Import & Debug ---
13
+ logger_import_debug = logging.getLogger("ModelManager_ImportDebug")
14
+ if not logger_import_debug.handlers: # Minimal setup for this specific logger
15
+ _h = logging.StreamHandler(sys.stdout)
16
+ _f = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
+ _h.setFormatter(_f)
18
+ logger_import_debug.addHandler(_h)
19
+ logger_import_debug.setLevel(logging.INFO)
20
+
21
+ # --- Start of FIX: Define LlamaMock unconditionally and manage Llama variable ---
22
+ class LlamaMock:
23
+ def __init__(self, model_path: str = "mock_model_path", *args, **kwargs):
24
+ # Use logger_import_debug as it's defined at this point
25
+ logger_import_debug.error(f"LlamaMock initialized for model_path='{model_path}' with args: {args}, kwargs: {kwargs}. llama_cpp is not installed or importable.")
26
+ self.model_path = model_path
27
+ self.n_ctx_train = kwargs.get('n_ctx', 0) # Mock common attributes
28
+ self.metadata = {}
29
+
30
+ def create_chat_completion(self, messages: List[Dict[str,str]], *args, **kwargs) -> Dict[str, Any]:
31
+ logger_import_debug.error(f"LlamaMock: create_chat_completion called for '{self.model_path}', but llama_cpp is unavailable.")
32
+ return {
33
+ "id": "chatcmpl-mock", "object": "chat.completion", "created": int(time.time()),
34
+ "model": self.model_path,
35
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": f"[Error: Llama.cpp backend not available for {self.model_path}]"}, "finish_reason": "stop"}],
36
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
37
+ }
38
+
39
+ def __call__(self, prompt: str, *args, **kwargs) -> Dict[str, Any]: # For older direct call style
40
+ logger_import_debug.error(f"LlamaMock: __call__ used for '{self.model_path}', but llama_cpp is unavailable.")
41
+ return {
42
+ "id": "cmpl-mock", "object": "text_completion", "created": int(time.time()),
43
+ "model": self.model_path,
44
+ "choices": [{"text": f"[Error: Llama.cpp backend not available for {self.model_path}]", "index": 0, "logprobs": None, "finish_reason": "stop"}],
45
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
46
+ }
47
+
48
+ def get_metadata(self) -> Dict[str, Any]:
49
+ return self.metadata
50
+
51
+ def tokenizer(self) -> Any:
52
+ class MockTokenizer:
53
+ def encode(self, text: str) -> List[int]: return [0] * len(text)
54
+ def decode(self, tokens: List[int]) -> str: return "".join([chr(t) for t in tokens if t < 256]) # Simplistic
55
+ return MockTokenizer()
56
+
57
+ # Initialize variables that will hold the class to be used
58
+ Llama_imported_class: Any = LlamaMock # Default to mock
59
+ LlamaCppError_imported_class: Any = Exception # Default to base Exception
60
+ LLAMA_CPP_AVAILABLE: bool = False
61
+
62
+ try:
63
+ from llama_cpp import Llama as RealLlama, LlamaCppError as RealLlamaCppSpecificError
64
+ Llama_imported_class = RealLlama
65
+ LlamaCppError_imported_class = RealLlamaCppSpecificError
66
+ LLAMA_CPP_AVAILABLE = True
67
+ logger_import_debug.info("🎉 Final llama-cpp-python status: AVAILABLE. Using REAL Llama class. Error catching for Llama.cpp specifics will use 'LlamaCppSpecificError'.")
68
+ except ImportError:
69
+ logger_import_debug.warning("Could not import 'Llama' and 'LlamaCppSpecificError' from 'llama_cpp' directly. Trying simpler import...")
70
+ try:
71
+ from llama_cpp import Llama as RealLlamaOnly # Try again without specific error import (older versions)
72
+ Llama_imported_class = RealLlamaOnly
73
+ # LlamaCppError_imported_class remains Exception
74
+ LLAMA_CPP_AVAILABLE = True
75
+ logger_import_debug.warning("Note: 'LlamaCppError' not found at llama_cpp top-level. Using base 'Exception' for LlamaCpp errors.")
76
+ logger_import_debug.info("🎉 Final llama-cpp-python status: AVAILABLE (LlamaCppError not found). Using REAL Llama class.")
77
+ except ImportError:
78
+ # Llama_imported_class remains LlamaMock, LLAMA_CPP_AVAILABLE remains False
79
+ logger_import_debug.critical("CRITICAL FAILURE: Cannot import 'Llama' from 'llama_cpp': No module named 'llama_cpp'. Using LlamaMock. Model loading will fail for real models.")
80
+ except Exception as e_import_general:
81
+ # Llama_imported_class remains LlamaMock, LLAMA_CPP_AVAILABLE remains False
82
+ logger_import_debug.critical(f"CRITICAL FAILURE: Unexpected error during 'llama_cpp' import: {e_import_general}. Using LlamaMock.", exc_info=True)
83
+
84
+ # Assign to the names used throughout the rest of this module
85
+ Llama = Llama_imported_class
86
+ LlamaCppError = LlamaCppError_imported_class
87
+ # --- End of FIX ---
88
+ # --- End Llama.cpp Python Backend Import & Debug ---
89
+
90
+
91
+ try:
92
+ # Assume config_settings_public.py is in the same 'modules' directory or project root is in sys.path
93
+ from modules.config_settings_public import (
94
+ MODEL_PATHS, MODEL_SPECIFIC_PARAMS, INFERENCE_PRESETS, N_CTX_FALLBACK, VERBOSE_LLAMA_CPP
95
+ )
96
+ except ImportError as e_conf:
97
+ logging.critical(f"CRITICAL IMPORT ERROR in ModelManager: Cannot import from config_settings_public: {e_conf}")
98
+ # Provide minimal fallbacks if config cannot be loaded, though this is a critical failure
99
+ MODEL_PATHS = {}
100
+ MODEL_SPECIFIC_PARAMS = {"_default": {"n_ctx": 2048, "n_gpu_layers": -1, "verbose": False}}
101
+ INFERENCE_PRESETS = {"default": {"temperature": 0.7}}
102
+ N_CTX_FALLBACK = 2048
103
+ VERBOSE_LLAMA_CPP = False
104
+
105
+
106
+ logger = logging.getLogger("ZOTHEOS_ModelManager")
107
+ if not logger.handlers:
108
+ handler = logging.StreamHandler(sys.stdout)
109
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
110
+ handler.setFormatter(formatter)
111
+ logger.addHandler(handler)
112
+ logger.setLevel(logging.INFO)
113
+
114
+ # Timeout for model generation tasks when run in a thread
115
+ GENERATION_THREAD_TIMEOUT_SECONDS = 300 # 5 minutes
116
+
117
+ class ModelManager:
118
+ def __init__(self, device_preference: Optional[str] = "cuda", max_model_count: int = 1, max_ram_models_gb: float = 8.0):
119
+ self.device_preference = device_preference
120
+ self.loaded_models: Dict[str, Any] = {} # Stores Llama instances
121
+ self.model_load_status: Dict[str, str] = {} # "unloaded", "loading", "loaded", "error"
122
+ self.model_ram_estimate_gb: Dict[str, float] = {}
123
+ self.total_estimated_ram_gb: float = 0.0
124
+ self.max_model_count = max_model_count
125
+ self.max_ram_models_gb = max_ram_models_gb
126
+ self.model_load_order: List[str] = [] # Tracks order for LRU
127
+ self.model_locks: Dict[str, asyncio.Lock] = {} # Per-model lock for loading
128
+ self.executor = ThreadPoolExecutor(max_workers=max_model_count + 1, thread_name_prefix="LLM_Gen") # +1 for summarizer
129
+
130
+ dev_log = "CPU (default)"
131
+ if self.device_preference == "cuda":
132
+ # Check NVIDIA drivers and CUDA toolkit availability (conceptual)
133
+ try:
134
+ import torch
135
+ if torch.cuda.is_available():
136
+ dev_log = f"CUDA (preference): {torch.cuda.get_device_name(0)}"
137
+ else:
138
+ dev_log = "CUDA (preference, but torch.cuda.is_available()=False)"
139
+ except ImportError:
140
+ dev_log = "CUDA (preference, but PyTorch not found for detailed check)"
141
+ except Exception as e_cuda_check:
142
+ dev_log = f"CUDA (preference, error during torch check: {e_cuda_check})"
143
+ # Actual CUDA usage is determined by n_gpu_layers > 0 during Llama init
144
+
145
+ logger.info(f"🔥 MMgr init. Dev Preference: {dev_log}. MaxRAM Models: {self.max_ram_models_gb}GB. Max Count: {self.max_model_count}.")
146
+ if not LLAMA_CPP_AVAILABLE:
147
+ logger.critical("❌ Llama.cpp backend CRITICAL FAILURE: The 'Llama' class could not be imported from 'llama_cpp'. ModelManager cannot load GGUF models. Please check installation and logs from 'ModelManager_ImportDebug'.")
148
+ logger.info(f"✅ MMGR Config: Max Models: {self.max_model_count}, Max RAM: {self.max_ram_models_gb} GB.")
149
+
150
+ async def _ensure_model_loaded(self, model_name: str) -> bool:
151
+ if model_name not in self.model_locks:
152
+ self.model_locks[model_name] = asyncio.Lock()
153
+
154
+ async with self.model_locks[model_name]:
155
+ if self.model_load_status.get(model_name) == "loaded" and model_name in self.loaded_models:
156
+ if model_name in self.model_load_order: self.model_load_order.remove(model_name)
157
+ self.model_load_order.append(model_name) # Move to end of LRU
158
+ return True
159
+
160
+ if self.model_load_status.get(model_name) == "loading":
161
+ logger.info(f"Model '{model_name}' is already being loaded by another task. Waiting...")
162
+ while self.model_load_status.get(model_name) == "loading": await asyncio.sleep(0.5)
163
+ return self.model_load_status.get(model_name) == "loaded"
164
+
165
+ self.model_load_status[model_name] = "loading"
166
+ logger.info(f"Attempting to load model '{model_name}'...")
167
+
168
+ await self._evict_models_if_needed(new_model_name_to_load=model_name)
169
+
170
+ model_instance, ram_gb = await asyncio.to_thread(self._load_model_from_disk, model_name)
171
+ if model_instance:
172
+ self.loaded_models[model_name] = model_instance
173
+ self.model_ram_estimate_gb[model_name] = ram_gb
174
+ self.total_estimated_ram_gb += ram_gb
175
+ self.model_load_status[model_name] = "loaded"
176
+ if model_name in self.model_load_order: self.model_load_order.remove(model_name)
177
+ self.model_load_order.append(model_name)
178
+ logger.info(f"✅ Model '{model_name}' loaded successfully. RAM used by this model: {ram_gb:.2f}GB. Total est. RAM: {self.total_estimated_ram_gb:.2f}GB.")
179
+ return True
180
+ else:
181
+ self.model_load_status[model_name] = "error"
182
+ logger.error(f"❌ Failed to load model '{model_name}'.")
183
+ # Clean up RAM if an estimate was added prematurely or if partial load occurred
184
+ if model_name in self.model_ram_estimate_gb:
185
+ self.total_estimated_ram_gb -= self.model_ram_estimate_gb.pop(model_name)
186
+ return False
187
+
188
+ def _load_model_from_disk(self, model_name: str) -> tuple[Optional[Any], float]:
189
+ # The check Llama == LlamaMock now works because LlamaMock is always defined
190
+ if not LLAMA_CPP_AVAILABLE or Llama == LlamaMock: # Llama is the variable pointing to the class
191
+ logger.error(f"Cannot load model '{model_name}': Llama.cpp backend not available (LLAMA_CPP_AVAILABLE={LLAMA_CPP_AVAILABLE}, Llama is LlamaMock: {Llama == LlamaMock}).")
192
+ return None, 0.0
193
+
194
+ model_path = MODEL_PATHS.get(model_name)
195
+ if not model_path or not os.path.exists(model_path):
196
+ logger.error(f"Model path for '{model_name}' not found or invalid: {model_path}")
197
+ return None, 0.0
198
+
199
+ specific_params = MODEL_SPECIFIC_PARAMS.get(model_name, {})
200
+ default_params = MODEL_SPECIFIC_PARAMS.get("_default", {})
201
+
202
+ load_params = default_params.copy()
203
+ load_params.update(specific_params)
204
+ load_params['model_path'] = model_path
205
+ load_params.setdefault('verbose', VERBOSE_LLAMA_CPP) # Use global verbose if not set per model
206
+
207
+ if 'n_ctx' not in load_params or load_params['n_ctx'] == 0: # Ensure n_ctx is valid
208
+ load_params['n_ctx'] = N_CTX_FALLBACK
209
+ logger.warning(f"n_ctx for model {model_name} was invalid or not found, using fallback: {N_CTX_FALLBACK}")
210
+
211
+ logger.info(f"🔄 Loading '{model_name}' from: {model_path}")
212
+ logger.debug(f"--- FINAL LOAD PARAMS FOR {model_name.upper()}: {load_params} ---")
213
+
214
+ start_time = time.perf_counter()
215
+ try:
216
+ model_instance = Llama(**load_params) # Llama here is the variable assigned above
217
+ load_time = time.perf_counter() - start_time
218
+ logger.info(f"✅ Model '{model_name}' (Llama instance) initialized by Llama(...). Load time: {load_time:.2f}s.")
219
+
220
+ ram_gb = os.path.getsize(model_path) / (1024**3)
221
+ ram_gb_with_buffer = ram_gb * 1.5 + 1 # Heuristic: file size + 50% + 1GB for KV cache, etc.
222
+ logger.info(f"Model '{model_name}' path: '{model_path}'. Est. RAM for this model (with buffer): {ram_gb_with_buffer:.2f}GB.")
223
+ return model_instance, ram_gb_with_buffer
224
+ except LlamaCppError as e_llama: # Catch specific LlamaCpp errors
225
+ logger.error(f"❌ LlamaCppError loading model '{model_name}': {e_llama}", exc_info=True)
226
+ except Exception as e:
227
+ logger.error(f"❌ Generic error loading model '{model_name}': {e}", exc_info=True)
228
+ return None, 0.0
229
+
230
+ async def _evict_models_if_needed(self, new_model_name_to_load: Optional[str] = None):
231
+ new_model_ram_gb_estimate = 0.0
232
+ if new_model_name_to_load:
233
+ model_path = MODEL_PATHS.get(new_model_name_to_load)
234
+ if model_path and os.path.exists(model_path):
235
+ new_model_ram_gb_estimate = (os.path.getsize(model_path) / (1024**3)) * 1.5 + 1 # Same heuristic
236
+
237
+ # Check if eviction is needed:
238
+ # 1. If we are at max model count AND the new model isn't already loaded.
239
+ # 2. OR if adding the new model would exceed total RAM limit AND we have models loaded.
240
+ while (len(self.loaded_models) >= self.max_model_count and (new_model_name_to_load not in self.loaded_models)) or \
241
+ (self.total_estimated_ram_gb + new_model_ram_gb_estimate > self.max_ram_models_gb and self.model_load_order):
242
+ if not self.model_load_order:
243
+ logger.warning("Eviction needed but model_load_order is empty. This should not happen if loaded_models is populated.")
244
+ break
245
+
246
+ model_to_evict = self.model_load_order.pop(0) # Evict LRU
247
+ if model_to_evict in self.loaded_models:
248
+ logger.warning(f"⚠️ Evicting model '{model_to_evict}' due to resource limits (Count: {len(self.loaded_models)}/{self.max_model_count}, RAM: {self.total_estimated_ram_gb:.2f}/{self.max_ram_models_gb:.2f}GB).")
249
+ del self.loaded_models[model_to_evict] # Release model instance
250
+ evicted_ram = self.model_ram_estimate_gb.pop(model_to_evict, 0)
251
+ self.total_estimated_ram_gb -= evicted_ram
252
+ self.model_load_status[model_to_evict] = "unloaded"
253
+ logger.info(f"Model '{model_to_evict}' unloaded. RAM reclaimed: {evicted_ram:.2f}GB. Total est. RAM: {self.total_estimated_ram_gb:.2f}GB.")
254
+ else:
255
+ logger.warning(f"Model '{model_to_evict}' was in load order but not in loaded_models dict. Inconsistency detected.")
256
+
257
+
258
+ def _generation_task_sync(self, model_instance: Any, prompt_messages: List[Dict[str,str]], gen_params: Dict[str, Any]) -> str:
259
+ model_log_name = model_instance.model_path.split(os.sep)[-1] if hasattr(model_instance, 'model_path') else 'UnknownModel'
260
+ logger.debug(f"--- [{model_log_name}] ENTERING _generation_task_sync ---")
261
+ output_text = "[Error: Generation failed in thread]"
262
+ try:
263
+ completion = model_instance.create_chat_completion(messages=prompt_messages, **gen_params)
264
+ if completion and "choices" in completion and completion["choices"]:
265
+ message_content = completion["choices"][0].get("message", {}).get("content")
266
+ output_text = message_content.strip() if message_content else "[No content in choice message]"
267
+ else:
268
+ output_text = "[No choices in completion or completion is empty]"
269
+ logger.warning(f"[{model_log_name}] Malformed completion object: {completion}")
270
+ logger.debug(f"✅ [{model_log_name}] Sync generation successful. Preview: {output_text[:100].replace(chr(10),' ')}...")
271
+ except Exception as e:
272
+ logger.error(f"❌ [{model_log_name}] Exception during model generation in thread: {e}", exc_info=True)
273
+ output_text = f"[Error: Exception during generation - {type(e).__name__}: {str(e)[:100]}]"
274
+ logger.debug(f"--- [{model_log_name}] EXITING _generation_task_sync ---")
275
+ return output_text
276
+
277
+ async def async_generation_wrapper(self, model_instance: Any, prompt_messages: List[Dict[str,str]], gen_params: Dict[str, Any], model_name_for_log: str) -> str:
278
+ logger.debug(f"--- [{model_name_for_log}] ENTERING async_generation_wrapper ---")
279
+ output_str = f"[Error: Model '{model_name_for_log}' instance not available for generation]"
280
+ if not model_instance:
281
+ logger.error(f"[{model_name_for_log}] Model instance is None in async_generation_wrapper.")
282
+ return output_str
283
+ try:
284
+ output_str = await asyncio.wait_for(
285
+ asyncio.to_thread(self._generation_task_sync, model_instance, prompt_messages, gen_params),
286
+ timeout=GENERATION_THREAD_TIMEOUT_SECONDS + 10
287
+ )
288
+ except asyncio.TimeoutError:
289
+ logger.error(f"❌ [{model_name_for_log}] Generation task TIMED OUT in asyncio.wait_for (>{GENERATION_THREAD_TIMEOUT_SECONDS + 10}s).")
290
+ output_str = f"[Error: Generation timed out for model {model_name_for_log}]"
291
+ except Exception as e:
292
+ logger.error(f"❌ [{model_name_for_log}] Exception in async_generation_wrapper: {e}", exc_info=True)
293
+ output_str = f"[Error: Async wrapper exception for model {model_name_for_log} - {type(e).__name__}]"
294
+ logger.debug(f"--- [{model_name_for_log}] EXITING async_generation_wrapper ---")
295
+ return output_str
296
+
297
+
298
+ async def generate_with_model(self, model_name: str, prompt: str, preset_name: str = "default", system_prompt: Optional[str] = None) -> str:
299
+ logger.info(f"--- [{model_name}] Received generation request. System prompt: {'Yes' if system_prompt else 'No'}. Preset: {preset_name} ---")
300
+ # Check if Llama class is LlamaMock (meaning llama.cpp is not available)
301
+ if not LLAMA_CPP_AVAILABLE or Llama == LlamaMock:
302
+ logger.error(f"Cannot generate with model '{model_name}': Llama.cpp backend not available (LLAMA_CPP_AVAILABLE={LLAMA_CPP_AVAILABLE}, Llama is LlamaMock: {Llama == LlamaMock}).")
303
+ return "[Error: Llama.cpp backend core ('Llama' class) not available or is mocked]"
304
+
305
+ if not await self._ensure_model_loaded(model_name):
306
+ return f"[Error: Model '{model_name}' could not be loaded or is not available.]"
307
+
308
+ model_instance = self.loaded_models.get(model_name)
309
+ if not model_instance:
310
+ logger.error(f"Model '{model_name}' instance not found in loaded_models after successful _ensure_model_loaded. This is unexpected.")
311
+ return f"[Error: Model '{model_name}' instance unexpectedly not found after load attempt.]"
312
+
313
+ gen_params = INFERENCE_PRESETS.get(preset_name, INFERENCE_PRESETS.get("balanced", {"temperature": 0.7})).copy() # Fallback to "balanced" then hardcoded default
314
+
315
+ prompt_messages = []
316
+ if system_prompt and system_prompt.strip():
317
+ prompt_messages.append({"role": "system", "content": system_prompt})
318
+ prompt_messages.append({"role": "user", "content": prompt})
319
+
320
+ logger.debug(f"[{model_name}] Calling async_generation_wrapper with messages: {prompt_messages}, params: {gen_params}")
321
+ response_text = await self.async_generation_wrapper(model_instance, prompt_messages, gen_params, model_name)
322
+ logger.info(f"--- [{model_name}] Generation complete. Response length: {len(response_text)} ---")
323
+ return response_text
324
+
325
+ def get_loaded_model_stats(self) -> Dict[str, Any]:
326
+ return {
327
+ "loaded_model_count": len(self.loaded_models),
328
+ "total_ram_estimate_gb": round(self.total_estimated_ram_gb, 2),
329
+ "max_ram_models_gb": self.max_ram_models_gb,
330
+ "max_model_count": self.max_model_count,
331
+ "models_in_memory": list(self.loaded_models.keys()),
332
+ "load_order_lru": self.model_load_order, # LRU (oldest) to MRU (newest)
333
+ "model_ram_details_gb": {name: round(ram, 2) for name, ram in self.model_ram_estimate_gb.items()},
334
+ "model_load_status": self.model_load_status,
335
+ "llama_cpp_available": LLAMA_CPP_AVAILABLE,
336
+ "llama_class_is_mock": Llama == LlamaMock
337
+ }
338
+
339
+ def shutdown(self):
340
+ logger.info("🔌 Shutting down ModelManager...")
341
+ self.executor.shutdown(wait=True, cancel_futures=True) # Cancel pending futures on shutdown
342
+ self.loaded_models.clear()
343
+ self.model_ram_estimate_gb.clear()
344
+ self.model_load_order.clear()
345
+ self.model_load_status.clear()
346
+ self.total_estimated_ram_gb = 0
347
+ logger.info("✅ ModelManager shutdown complete.")
348
+
349
+ # Example usage for direct testing
350
+ async def main_model_manager_test():
351
+ # Configure logging for the test
352
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
353
+ # Set specific loggers to higher levels if too verbose
354
+ logging.getLogger("httpx").setLevel(logging.WARNING)
355
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
356
+
357
+ logger.info("--- ModelManager Test ---")
358
+
359
+ if not MODEL_PATHS:
360
+ logger.error("MODEL_PATHS is empty. Cannot run test. Check config_settings_public.py")
361
+ return
362
+ if Llama == LlamaMock: # Check if we are using the mock
363
+ logger.warning("Llama.cpp is not available. Running test with LlamaMock. Responses will be mocked.")
364
+
365
+ # Example: max 2 models, 10GB RAM limit
366
+ mm = ModelManager(max_model_count=2, max_ram_models_gb=10.0)
367
+
368
+ test_models_available = list(MODEL_PATHS.keys())
369
+ if not test_models_available:
370
+ logger.error("No models defined in MODEL_PATHS for testing.")
371
+ mm.shutdown()
372
+ return
373
+
374
+ # Select up to 3 models for testing, or fewer if not available
375
+ model1_name = test_models_available[0]
376
+ model2_name = test_models_available[1] if len(test_models_available) > 1 else model1_name
377
+ model3_name = test_models_available[2] if len(test_models_available) > 2 else model1_name
378
+
379
+ test_queries = [
380
+ (model1_name, "What is the capital of France?", "precise", "You are a helpful geography expert."),
381
+ (model2_name, "Explain black holes simply.", "creative", None),
382
+ (model1_name, "What is 1+1?", "precise", None), # Query model1 again
383
+ ]
384
+ if len(test_models_available) > 2 and model3_name not in [model1_name, model2_name]: # Only add model3 if it's distinct and available
385
+ test_queries.append((model3_name, "Tell me a short joke.", "default", None))
386
+
387
+ for i, (model_name, query, preset, sys_prompt) in enumerate(test_queries):
388
+ logger.info(f"\n--- Test Query {i+1}: Model '{model_name}', Preset '{preset}' ---")
389
+ response = await mm.generate_with_model(model_name, query, preset_name=preset, system_prompt=sys_prompt)
390
+ print(f"Response from '{model_name}': {response[:150]}...") # Print more of the response
391
+ print(f"Stats after '{model_name}': {mm.get_loaded_model_stats()}")
392
+ await asyncio.sleep(1) # Small delay between queries
393
+
394
+ mm.shutdown()
395
+ logger.info("--- ModelManager Test Complete ---")
396
+
397
+ if __name__ == "__main__":
398
+ # This allows testing ModelManager directly if needed
399
+ # Ensure your MODEL_PATHS in config_settings_public.py point to valid GGUF files
400
+ # To run: python -m modules.model_manager_public
401
+ asyncio.run(main_model_manager_test())
modules/plugin_manager_public.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import asyncio
3
+ from typing import Dict, Any, Type
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ # ✅ Define Pre-Approved Plugins (Only Trusted Plugins for Public Use)
8
+ ALLOWED_PLUGINS = {
9
+ "basic_math": "BasicMathPlugin", # Example of a basic utility plugin
10
+ "date_time": "DateTimePlugin" # Example of a simple utility plugin
11
+ }
12
+
13
+ class PluginManager:
14
+ """🚀 Public Plugin Manager (Pre-Approved Plugins Only)"""
15
+
16
+ def __init__(self):
17
+ self.plugins: Dict[str, Any] = {}
18
+
19
+ async def load_plugin(self, plugin_name: str):
20
+ """✅ Load Pre-Approved Plugin."""
21
+ if plugin_name not in ALLOWED_PLUGINS:
22
+ logger.warning(f"⚠️ Plugin '{plugin_name}' is not available in the public version.")
23
+ return f"⚠️ Plugin '{plugin_name}' not allowed in public version."
24
+
25
+ if plugin_name in self.plugins:
26
+ logger.info(f"✅ Plugin '{plugin_name}' already loaded.")
27
+ return f"✅ Plugin '{plugin_name}' already loaded."
28
+
29
+ try:
30
+ # ✅ Use pre-defined plugin mappings for security
31
+ plugin_class = globals().get(ALLOWED_PLUGINS[plugin_name])
32
+ if plugin_class:
33
+ plugin_instance = plugin_class()
34
+ self.plugins[plugin_name] = plugin_instance
35
+ logger.info(f"✅ Plugin '{plugin_name}' loaded successfully.")
36
+ return f"✅ Plugin '{plugin_name}' loaded successfully."
37
+ else:
38
+ logger.error(f"❌ Plugin class '{plugin_name}' not found.")
39
+ return f"❌ Plugin class '{plugin_name}' not found."
40
+ except Exception as e:
41
+ logger.exception(f"❌ Error loading plugin '{plugin_name}': {e}")
42
+ return f"❌ Error loading plugin '{plugin_name}'."
43
+
44
+ async def execute_plugin_command(self, plugin_name: str, command: str, args: Dict[str, Any] = {}):
45
+ """✅ Execute Plugin Command (Predefined Only)."""
46
+ if plugin_name not in self.plugins:
47
+ logger.warning(f"⚠️ Attempt to execute command from unloaded plugin '{plugin_name}'.")
48
+ return f"⚠️ Plugin '{plugin_name}' not loaded."
49
+
50
+ plugin = self.plugins[plugin_name]
51
+ if hasattr(plugin, command):
52
+ method = getattr(plugin, command)
53
+ try:
54
+ if asyncio.iscoroutinefunction(method):
55
+ return await method(**args)
56
+ return method(**args)
57
+ except Exception as e:
58
+ logger.error(f"❌ Error executing command '{command}' in plugin '{plugin_name}': {e}")
59
+ return f"❌ Error executing '{command}' in '{plugin_name}'."
60
+ else:
61
+ logger.warning(f"⚠️ Command '{command}' not found in plugin '{plugin_name}'.")
62
+ return f"⚠️ Command '{command}' not found in '{plugin_name}'."
63
+
64
+ # ✅ Example Predefined Plugin Classes
65
+ class BasicMathPlugin:
66
+ """Basic Math Plugin (Example)"""
67
+
68
+ def add(self, x, y):
69
+ return x + y
70
+
71
+ def subtract(self, x, y):
72
+ return x - y
73
+
74
+ class DateTimePlugin:
75
+ """Date/Time Plugin (Example)"""
76
+
77
+ async def current_time(self):
78
+ from datetime import datetime
79
+ return datetime.utcnow().isoformat()
80
+
modules/query_processor_public.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import torch
4
+ from difflib import SequenceMatcher
5
+ from time import time
6
+ from typing import Dict, List, Tuple
7
+
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ from config_settings_public import MODEL_PATHS, MODEL_CONFIG, MODEL_WEIGHTS
12
+ from model_manager_public import ModelManager
13
+ from response_optimizer_public import ResponseOptimizer
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class QueryProcessor:
19
+ def __init__(self, model_manager: ModelManager, response_optimizer: ResponseOptimizer):
20
+ self.model_manager = model_manager
21
+ self.response_optimizer = response_optimizer
22
+ self.model_paths = MODEL_PATHS
23
+ self.model_config = MODEL_CONFIG
24
+ self.memory_bank = []
25
+
26
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
+ if torch.cuda.is_available():
28
+ torch.cuda.set_device(0)
29
+ torch.backends.cudnn.benchmark = True
30
+ torch.backends.cudnn.enabled = True
31
+ torch.set_float32_matmul_precision('high')
32
+ logger.info(f"✅ QueryProcessor initialized on {self.device}")
33
+ else:
34
+ logger.warning("⚠️ Running on CPU")
35
+
36
+ self.model_tiers = {
37
+ "balanced": ["gemma", "mistral", "llama"]
38
+ }
39
+
40
+ def select_models(self, query: str) -> List[str]:
41
+ return [m for m in self.model_tiers["balanced"] if m in self.model_paths]
42
+
43
+ async def process_query_with_fusion(self, query: str) -> str:
44
+ selected_models = self.select_models(query)
45
+ logger.info(f"🧠 Selected models: {selected_models}")
46
+
47
+ tasks = [asyncio.create_task(self.invoke_model(m, query)) for m in selected_models]
48
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
49
+
50
+ valid_responses = {
51
+ m: r for m, r in zip(selected_models, responses)
52
+ if isinstance(r, str) and r.strip()
53
+ }
54
+
55
+ if not valid_responses:
56
+ logger.error("❌ All models failed or returned empty responses.")
57
+ return "⚠️ No valid responses received from models."
58
+
59
+ logger.info(f"✅ {len(valid_responses)} valid responses received.")
60
+ fused = await self.advanced_fusion(valid_responses, query)
61
+ self.memory_bank.append({"query": query, "response": fused, "timestamp": time()})
62
+ return fused
63
+
64
+ async def invoke_model(self, model_name: str, query: str) -> str:
65
+ try:
66
+ model = self.model_manager.loaded_models.get(model_name)
67
+ if not model:
68
+ model = self.model_manager.load_model_from_disk(model_name)
69
+ if not model:
70
+ raise RuntimeError(f"Model '{model_name}' failed to load.")
71
+
72
+ config = self.model_config.get(model_name, {})
73
+ max_tokens = config.get("max_tokens", 800)
74
+ temperature = config.get("temperature", 0.7)
75
+
76
+ logger.info(f"🔧 Invoking '{model_name}' with temp={temperature}, max_tokens={max_tokens}")
77
+
78
+ with torch.no_grad():
79
+ completion = await asyncio.to_thread(
80
+ model.create_completion,
81
+ prompt=query,
82
+ max_tokens=max_tokens,
83
+ temperature=temperature,
84
+ stop=["</s>", "User:", "Assistant:"]
85
+ )
86
+
87
+ response_text = completion.get("choices", [{}])[0].get("text", "").strip()
88
+ return response_text
89
+
90
+ except Exception as e:
91
+ logger.error(f"❌ Error in model '{model_name}': {e}")
92
+ return ""
93
+
94
+ def clean_response(self, raw) -> str:
95
+ if isinstance(raw, str):
96
+ return raw.strip()
97
+
98
+ if isinstance(raw, dict):
99
+ return raw.get("generated_text", "").strip() or raw.get("text", "").strip()
100
+
101
+ if isinstance(raw, list):
102
+ if isinstance(raw[0], dict) and "generated_text" in raw[0]:
103
+ return raw[0]["generated_text"].strip()
104
+ return " ".join(str(x).strip() for x in raw if isinstance(x, str))
105
+
106
+ return str(raw).strip()
107
+
108
+ async def advanced_fusion(self, responses: Dict[str, str], query: str) -> str:
109
+ cleaned = {m: self.clean_response(r) for m, r in responses.items()}
110
+ seen = set()
111
+ unique: List[Tuple[str, float, str]] = []
112
+
113
+ for model, text in cleaned.items():
114
+ if not text or any(self.is_similar(text, prev) for prev in seen):
115
+ continue
116
+ seen.add(text)
117
+ score = self.score_response(text, query)
118
+ unique.append((text, score, model))
119
+
120
+ if not unique:
121
+ return "⚠️ No high-quality content to merge."
122
+
123
+ unique.sort(key=lambda x: x[1], reverse=True)
124
+ best_responses = [await self.response_optimizer.optimize_response(t, query) for t, _, _ in unique[:3]]
125
+ return "\n---\n".join(best_responses).strip()
126
+
127
+ def is_similar(self, a: str, b: str, threshold: float = 0.9) -> bool:
128
+ return SequenceMatcher(None, a.strip().lower(), b.strip().lower()).ratio() > threshold
129
+
130
+ def score_response(self, response: str, query: str) -> float:
131
+ try:
132
+ tfidf = TfidfVectorizer().fit_transform([response, query])
133
+ return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
134
+ except:
135
+ return 0.5
modules/response_optimizer_public.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import Dict
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class ResponseOptimizer:
8
+ """🚀 Public Response Optimizer (Basic Filtering + Length Control)"""
9
+
10
+ def __init__(self):
11
+ self.response_cache = {} # Stores past responses to reduce redundant processing
12
+ self.optim_rules = defaultdict(list) # Holds rule-based response refinements
13
+ self.cache_limit = 100 # ✅ Limit cache size to avoid overflow
14
+
15
+ async def optimize_response(self, response: str, context: Dict) -> str:
16
+ """✅ Optimizes AI-generated responses (Fast + Secure)."""
17
+
18
+ # ✅ Return cached response if available
19
+ if response in self.response_cache:
20
+ logger.info("✅ Returning cached optimized response.")
21
+ return self.response_cache[response]
22
+
23
+ # ✅ Apply context-based optimization (length, profanity)
24
+ optimized_response = self.apply_optimizations(response, context)
25
+
26
+ # ✅ Store in cache (Respect size limit)
27
+ if len(self.response_cache) >= self.cache_limit:
28
+ oldest_response = next(iter(self.response_cache))
29
+ del self.response_cache[oldest_response]
30
+
31
+ self.response_cache[response] = optimized_response
32
+ return optimized_response
33
+
34
+ def apply_optimizations(self, response: str, context: Dict) -> str:
35
+ """✅ Applies context-specific response optimization."""
36
+ if "filter_profanity" in context and context["filter_profanity"]:
37
+ response = self.remove_profanity(response)
38
+
39
+ if "trim_length" in context:
40
+ response = response[:context["trim_length"]].strip() + "..." # Trims to desired length
41
+
42
+ return response
43
+
44
+ def remove_profanity(self, response: str) -> str:
45
+ """🚫 Removes flagged words from AI-generated responses."""
46
+ banned_words = [
47
+ "badword1", "badword2", "badword3", "shit", "fuck", "damn", "bitch", "asshole"
48
+ ] # ✅ Add or remove based on testing
49
+
50
+ for word in banned_words:
51
+ response = response.replace(word, "***") # ✅ Replace with censorship symbol
52
+
53
+ return response
modules/user_auth.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: modules/user_auth.py
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ import logging
7
+
8
+ logger = logging.getLogger("ZOTHEOS_UserAuth")
9
+ if not logger.handlers:
10
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
11
+ handler = logging.StreamHandler(sys.stdout)
12
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s] - %(message)s')
13
+ handler.setFormatter(formatter)
14
+ logger.addHandler(handler)
15
+ logger.setLevel(logging.INFO)
16
+
17
+ USERS_FILE_PATH = "stripe_users.json" # Default name, path will be determined below
18
+
19
+ try:
20
+ if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
21
+ # Running in a PyInstaller bundle
22
+ # _MEIPASS is the path to the temporary directory where PyInstaller unpacks files
23
+ application_path = sys._MEIPASS
24
+ logger.debug(f"UserAuth running in PyInstaller bundle. MEIPASS: {application_path}")
25
+ # stripe_users.json should be in the root of _MEIPASS, alongside the .exe's main script
26
+ candidate_path = os.path.join(application_path, "stripe_users.json")
27
+ # Check if it exists there directly
28
+ if os.path.exists(candidate_path):
29
+ USERS_FILE_PATH = candidate_path
30
+ else:
31
+ # Fallback: if user_auth.py is in a 'modules' subdir within _MEIPASS, try one level up for stripe_users.json
32
+ # This handles if --add-data "modules;modules" was used and user_auth.py is at _MEIPASS/modules/user_auth.py
33
+ # and stripe_users.json was added to _MEIPASS/.
34
+ # This might happen if the main script is also at the root of _MEIPASS.
35
+ current_module_dir = os.path.dirname(os.path.abspath(__file__)) # This would be _MEIPASS/modules
36
+ project_root_guess = os.path.dirname(current_module_dir) # This would be _MEIPASS
37
+ alt_candidate_path = os.path.join(project_root_guess, "stripe_users.json")
38
+ if os.path.exists(alt_candidate_path):
39
+ USERS_FILE_PATH = alt_candidate_path
40
+ else:
41
+ # If still not found, log a warning. The get_user_tier will handle the FileNotFoundError.
42
+ logger.warning(f"stripe_users.json not found at primary bundle path '{candidate_path}' or alternate '{alt_candidate_path}'. It must be bundled at the root relative to the main script.")
43
+ # Keep USERS_FILE_PATH as "stripe_users.json" to allow relative loading if script is at root of _MEIPASS
44
+ # This will likely fail if not at root, and get_user_tier will default to 'free'
45
+ else:
46
+ # Running as a normal script (user_auth.py is in modules folder)
47
+ script_dir = os.path.dirname(os.path.abspath(__file__))
48
+ project_root = os.path.dirname(script_dir)
49
+ USERS_FILE_PATH = os.path.join(project_root, "stripe_users.json")
50
+
51
+ logger.info(f"User authentication file path dynamically set to: {os.path.abspath(USERS_FILE_PATH) if os.path.isabs(USERS_FILE_PATH) else USERS_FILE_PATH}")
52
+
53
+ except Exception as e_path:
54
+ logger.error(f"Critical error setting USERS_FILE_PATH in user_auth.py: {e_path}. Using default 'stripe_users.json'.")
55
+ USERS_FILE_PATH = "stripe_users.json"
56
+
57
+
58
+ def get_user_tier(user_token: str) -> str:
59
+ """
60
+ Retrieves the user's tier based on their token from stripe_users.json.
61
+ Defaults to 'free' if token not found or an error occurs.
62
+ """
63
+ default_tier = "free"
64
+ if not user_token or not isinstance(user_token, str) or not user_token.strip():
65
+ # This log is fine, but can be noisy if called frequently with no token (e.g. UI init)
66
+ # logger.debug("No user token provided or invalid token, defaulting to 'free' tier.")
67
+ return default_tier
68
+
69
+ try:
70
+ # Log the path being attempted for diagnosis
71
+ logger.debug(f"Attempting to load users file from: {os.path.abspath(USERS_FILE_PATH)}")
72
+ if not os.path.exists(USERS_FILE_PATH):
73
+ logger.warning(f"Users file '{USERS_FILE_PATH}' not found. Defaulting all users to '{default_tier}' tier. Please ensure this file is correctly bundled at the application root.")
74
+ return default_tier
75
+
76
+ with open(USERS_FILE_PATH, "r", encoding="utf-8") as f:
77
+ users = json.load(f)
78
+
79
+ tier = users.get(user_token, default_tier)
80
+ log_token_display = user_token[:3] + "***" if len(user_token) > 3 else user_token
81
+ logger.info(f"Token '{log_token_display}' resolved to tier: '{tier}'.")
82
+ return tier
83
+ except json.JSONDecodeError:
84
+ logger.error(f"Error decoding JSON from {USERS_FILE_PATH}. Defaulting to '{default_tier}' tier.", exc_info=True)
85
+ return default_tier
86
+ except Exception as e:
87
+ logger.error(f"An unexpected error occurred in get_user_tier: {e}. Defaulting to '{default_tier}' tier.", exc_info=True)
88
+ return default_tier
89
+
90
+ if __name__ == '__main__':
91
+ logger.setLevel(logging.DEBUG)
92
+ print(f"--- Testing user_auth.py ---")
93
+ print(f"USERS_FILE_PATH is configured as: {os.path.abspath(USERS_FILE_PATH)}")
94
+
95
+ # Create a dummy stripe_users.json in the expected location for testing if it doesn't exist
96
+ # For standalone testing, this means it looks for it relative to where this script would be if it were project root.
97
+ # If this script is in 'modules', project_root is one level up.
98
+
99
+ # Determine the correct test path based on whether script is run directly or from project root
100
+ if os.path.basename(os.getcwd()) == "modules": # If CWD is modules folder
101
+ test_stripe_users_path = "../stripe_users.json"
102
+ else: # If CWD is project root (where zotheos_interface_public.py is)
103
+ test_stripe_users_path = "stripe_users.json"
104
+
105
+ if not os.path.exists(test_stripe_users_path):
106
+ print(f"'{test_stripe_users_path}' not found relative to CWD ({os.getcwd()}). Creating a dummy file for testing purposes at this location.")
107
+ dummy_users_for_test = {
108
+ "TOKEN_FREE_USER": "free",
109
+ "TOKEN_STARTER_USER": "starter",
110
+ "TOKEN_PRO_USER": "pro"
111
+ }
112
+ try:
113
+ with open(test_stripe_users_path, "w") as f:
114
+ json.dump(dummy_users_for_test, f, indent=2)
115
+ print(f"Dummy '{test_stripe_users_path}' created successfully with test tokens.")
116
+ # Update USERS_FILE_PATH for this test run if it was different
117
+ USERS_FILE_PATH = test_stripe_users_path
118
+ print(f"For this test run, USERS_FILE_PATH is now: {os.path.abspath(USERS_FILE_PATH)}")
119
+ except Exception as e_create:
120
+ print(f"Could not create dummy users file at {test_stripe_users_path}: {e_create}")
121
+ print("Please ensure stripe_users.json exists in the project root for user_auth.py to function correctly.")
122
+
123
+ print("\nTest Results:")
124
+ test_tokens = {
125
+ "Pro User Token": "TOKEN_PRO_USER",
126
+ "Starter User Token": "TOKEN_STARTER_USER",
127
+ "Free User Token": "TOKEN_FREE_USER",
128
+ "Non-existent Token": "invalid_dummy_token_123",
129
+ "Empty String Token": "",
130
+ }
131
+
132
+ for description, token_to_test in test_tokens.items():
133
+ actual_tier = get_user_tier(token_to_test)
134
+ print(f"- Test '{description}' (Input: '{token_to_test}'): Tier = '{actual_tier}'")
135
+
136
+ print(f"\n--- End of user_auth.py test ---")