Final_Assignment / health_check.py
tonthatthienvu's picture
πŸ—οΈ Priority 2A: Architecture Consolidation & Optimization Complete
1fc2038
raw
history blame
7.81 kB
#!/usr/bin/env python3
"""
Health Check and Monitoring for GAIA Agent HuggingFace Space
Provides system status, capability checks, and performance monitoring.
"""
import os
import sys
import time
import json
from datetime import datetime
from pathlib import Path
class GAIAHealthCheck:
"""Comprehensive health check for GAIA Agent system."""
def __init__(self):
self.start_time = time.time()
self.check_results = {}
def check_dependencies(self):
"""Check availability of key dependencies."""
dependencies = {
'gradio': False,
'smolagents': False,
'litellm': False,
'transformers': False,
'torch': False,
'google.generativeai': False,
'pandas': False,
'chess': False
}
for dep in dependencies:
try:
__import__(dep)
dependencies[dep] = True
except ImportError:
dependencies[dep] = False
return dependencies
def check_api_keys(self):
"""Check availability of API keys."""
api_keys = {
'GEMINI_API_KEY': bool(os.getenv('GEMINI_API_KEY')),
'HUGGINGFACE_TOKEN': bool(os.getenv('HUGGINGFACE_TOKEN')),
'KLUSTER_API_KEY': bool(os.getenv('KLUSTER_API_KEY'))
}
return api_keys
def check_core_components(self):
"""Check availability of core GAIA components."""
components = {
'main_solver': False,
'hybrid_solver': False,
'gaia_tools': False,
'question_classifier': False,
'async_testing': False,
'advanced_testing': False
}
try:
from main import GAIASolver
components['main_solver'] = True
except:
pass
try:
from main_hybrid import HybridGAIASolver
components['hybrid_solver'] = True
except:
pass
try:
from gaia_tools import GAIA_TOOLS
components['gaia_tools'] = len(GAIA_TOOLS) > 0
except:
pass
try:
from question_classifier import QuestionClassifier
components['question_classifier'] = True
except:
pass
try:
from async_complete_test_hf import run_hf_comprehensive_test
components['async_testing'] = True
except:
pass
try:
from async_complete_test import AsyncGAIATestSystem
components['advanced_testing'] = True
except:
pass
return components
def check_file_system(self):
"""Check file system and required files."""
files = {
'main.py': False,
'app.py': False,
'gaia_tools.py': False,
'requirements.txt': False,
'CLAUDE.md': False
}
for file in files:
files[file] = Path(file).exists()
return files
def get_system_metrics(self):
"""Get system performance metrics."""
metrics = {
'uptime_seconds': time.time() - self.start_time,
'python_version': sys.version,
'platform': sys.platform,
'memory_usage': 'unknown',
'cpu_usage': 'unknown'
}
try:
import psutil
process = psutil.Process()
metrics['memory_usage'] = f"{process.memory_info().rss / 1024 / 1024:.1f} MB"
metrics['cpu_usage'] = f"{process.cpu_percent():.1f}%"
except ImportError:
pass
return metrics
def run_comprehensive_check(self):
"""Run all health checks and return comprehensive report."""
print("πŸ” Running comprehensive health check...")
self.check_results = {
'timestamp': datetime.now().isoformat(),
'dependencies': self.check_dependencies(),
'api_keys': self.check_api_keys(),
'components': self.check_core_components(),
'files': self.check_file_system(),
'metrics': self.get_system_metrics()
}
# Calculate overall health score
self.check_results['health_score'] = self._calculate_health_score()
self.check_results['status'] = self._get_overall_status()
return self.check_results
def _calculate_health_score(self):
"""Calculate overall health score (0-100)."""
scores = {
'dependencies': self._score_dict(self.check_results['dependencies']),
'api_keys': self._score_dict(self.check_results['api_keys']),
'components': self._score_dict(self.check_results['components']),
'files': self._score_dict(self.check_results['files'])
}
# Weighted average
weights = {'dependencies': 0.3, 'api_keys': 0.2, 'components': 0.4, 'files': 0.1}
total_score = sum(scores[key] * weights[key] for key in weights)
return round(total_score, 1)
def _score_dict(self, data_dict):
"""Calculate score for a dictionary of boolean values."""
if not data_dict:
return 0
return (sum(1 for v in data_dict.values() if v) / len(data_dict)) * 100
def _get_overall_status(self):
"""Get overall system status."""
score = self.check_results['health_score']
if score >= 90:
return "🟒 EXCELLENT"
elif score >= 75:
return "🟑 GOOD"
elif score >= 50:
return "🟠 FAIR"
else:
return "πŸ”΄ POOR"
def print_report(self):
"""Print formatted health check report."""
if not self.check_results:
self.run_comprehensive_check()
print("\n" + "="*60)
print("πŸ₯ GAIA AGENT HEALTH CHECK REPORT")
print("="*60)
print(f"Timestamp: {self.check_results['timestamp']}")
print(f"Overall Status: {self.check_results['status']}")
print(f"Health Score: {self.check_results['health_score']}/100")
print("\nπŸ“¦ Dependencies:")
for dep, status in self.check_results['dependencies'].items():
icon = "βœ…" if status else "❌"
print(f" {icon} {dep}")
print("\nπŸ”‘ API Keys:")
for key, status in self.check_results['api_keys'].items():
icon = "βœ…" if status else "❌"
print(f" {icon} {key}")
print("\n🧩 Components:")
for comp, status in self.check_results['components'].items():
icon = "βœ…" if status else "❌"
print(f" {icon} {comp}")
print("\nπŸ“ Files:")
for file, status in self.check_results['files'].items():
icon = "βœ…" if status else "❌"
print(f" {icon} {file}")
print("\nπŸ“Š System Metrics:")
for metric, value in self.check_results['metrics'].items():
print(f" πŸ“ˆ {metric}: {value}")
print("\n" + "="*60)
def get_json_report(self):
"""Get health check report as JSON."""
if not self.check_results:
self.run_comprehensive_check()
return json.dumps(self.check_results, indent=2)
def main():
"""Main function for health check CLI."""
health_check = GAIAHealthCheck()
if len(sys.argv) > 1 and sys.argv[1] == "--json":
print(health_check.get_json_report())
else:
health_check.print_report()
if __name__ == "__main__":
main()