Final_Assignment

Running

File size: 8,162 Bytes

c262d1a

#!/usr/bin/env python3
"""
Monitor GAIA test progress and provide real-time status updates
"""

import os
import time
import json
from pathlib import Path
from datetime import datetime
import argparse

def get_latest_log_file():
    """Find the most recent classification test log file"""
    log_dir = Path("logs")
    if not log_dir.exists():
        return None
    
    log_files = list(log_dir.glob("classification_test_*.log"))
    if not log_files:
        return None
    
    return max(log_files, key=lambda x: x.stat().st_mtime)

def parse_log_progress(log_file):
    """Parse log file to extract current progress"""
    if not log_file or not log_file.exists():
        return None
    
    try:
        with open(log_file, 'r') as f:
            lines = f.readlines()
        
        # Parse classification summary
        classification_summary = {}
        in_summary = False
        
        # Parse testing progress
        current_agent = None
        questions_processed = 0
        total_questions = 0
        current_question = None
        
        for line in lines:
            line = line.strip()
            
            # Classification summary section
            if "CLASSIFICATION SUMMARY:" in line:
                in_summary = True
                continue
            elif in_summary and ":" in line and "questions" in line:
                parts = line.split(":")
                if len(parts) == 2:
                    agent = parts[0].strip()
                    count_part = parts[1].strip()
                    if "(" in count_part:
                        count = int(count_part.split()[0])
                        classification_summary[agent] = count
            elif in_summary and "Testing agent types:" in line:
                in_summary = False
            
            # Current testing progress
            if "TESTING" in line and "AGENT" in line:
                current_agent = line.split("TESTING")[1].split("AGENT")[0].strip()
            elif "Questions to test:" in line:
                total_questions = int(line.split(":")[-1].strip())
            elif "Testing" in line and "/" in line and "]" in line:
                # Extract current question number [X/Y]
                bracket_part = line.split("[")[1].split("]")[0]
                current_num = int(bracket_part.split("/")[0])
                questions_processed = current_num - 1  # Since this is the one being processed
                current_question = line.split("Testing")[1].split("...")[0].strip()
        
        return {
            'log_file': str(log_file),
            'last_modified': datetime.fromtimestamp(log_file.stat().st_mtime),
            'classification_summary': classification_summary,
            'current_agent': current_agent,
            'questions_processed': questions_processed,
            'total_questions': total_questions,
            'current_question': current_question,
            'progress_percentage': (questions_processed / total_questions * 100) if total_questions > 0 else 0
        }
        
    except Exception as e:
        return {'error': str(e)}

def get_latest_results():
    """Get the latest test results file"""
    result_files = list(Path(".").glob("gaia_classification_test_results_*.json"))
    if not result_files:
        return None
    
    latest_file = max(result_files, key=lambda x: x.stat().st_mtime)
    
    try:
        with open(latest_file, 'r') as f:
            data = json.load(f)
        return {
            'file': str(latest_file),
            'metadata': data.get('test_metadata', {}),
            'overall_stats': data.get('overall_stats', {}),
            'agent_performance': data.get('agent_performance', {})
        }
    except:
        return None

def display_status(progress, results, watch_mode=False):
    """Display current test status"""
    
    if watch_mode:
        # Clear screen in watch mode
        os.system('clear' if os.name == 'posix' else 'cls')
    
    print("🔍 GAIA TEST MONITORING DASHBOARD")
    print("=" * 60)
    print(f"📅 Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    if progress and 'error' not in progress:
        print(f"\n📊 CURRENT PROGRESS:")
        print(f"🗂️  Log File: {Path(progress['log_file']).name}")
        print(f"⏰ Last Modified: {progress['last_modified'].strftime('%H:%M:%S')}")
        
        if progress['current_agent']:
            print(f"\n🤖 Currently Testing: {progress['current_agent'].upper()} AGENT")
            print(f"📈 Progress: {progress['questions_processed']}/{progress['total_questions']} ({progress['progress_percentage']:.1f}%)")
            
            # Progress bar
            bar_length = 30
            filled_length = int(bar_length * progress['progress_percentage'] / 100)
            bar = "█" * filled_length + "░" * (bar_length - filled_length)
            print(f"▓ Progress: [{bar}] {progress['progress_percentage']:.1f}%")
            
            if progress['current_question']:
                print(f"🧩 Current Question: {progress['current_question']}...")
        
        if progress['classification_summary']:
            print(f"\n📊 CLASSIFICATION BREAKDOWN:")
            total_questions = sum(progress['classification_summary'].values())
            for agent, count in sorted(progress['classification_summary'].items()):
                percentage = (count / total_questions) * 100 if total_questions > 0 else 0
                print(f"  {agent}: {count} questions ({percentage:.1f}%)")
    
    elif progress and 'error' in progress:
        print(f"\n❌ ERROR reading log file: {progress['error']}")
    else:
        print(f"\n⚠️  No active test logs found")
    
    if results:
        print(f"\n📋 LATEST COMPLETED RESULTS:")
        print(f"📄 Results File: {Path(results['file']).name}")
        
        overall = results.get('overall_stats', {})
        if overall:
            print(f"✅ Success Rate: {overall.get('success_rate', 0):.1f}%")
            print(f"📊 Total Questions: {overall.get('total_questions', 0)}")
            print(f"✅ Successful: {overall.get('successful', 0)}")
            print(f"❌ Errors: {overall.get('errors', 0)}")
        
        agent_perf = results.get('agent_performance', {})
        if agent_perf:
            print(f"\n🎯 AGENT PERFORMANCE:")
            for agent, stats in sorted(agent_perf.items(), key=lambda x: x[1]['success_rate'], reverse=True):
                success_rate = stats['success_rate']
                status_emoji = "🟢" if success_rate >= 90 else "🟡" if success_rate >= 70 else "🔴"
                print(f"  {status_emoji} {agent}: {success_rate:.1f}% ({stats['successful']}/{stats['total_questions']})")
    
    print(f"\n🔍 MONITORING OPTIONS:")
    print(f"  Watch mode: python tests/monitor_tests.py --watch")
    print(f"  Analyze results: python tests/analyze_test_results.py <results_file>")
    print(f"  Run new test: python tests/test_by_classification.py --agent-types <type>")

def main():
    """Main monitoring interface"""
    parser = argparse.ArgumentParser(description="Monitor GAIA test progress")
    parser.add_argument('--watch', action='store_true', help='Watch mode (auto-refresh every 10s)')
    parser.add_argument('--interval', type=int, default=10, help='Refresh interval in seconds for watch mode')
    
    args = parser.parse_args()
    
    if args.watch:
        print("👀 Starting watch mode... (Press Ctrl+C to stop)")
        try:
            while True:
                progress = parse_log_progress(get_latest_log_file())
                results = get_latest_results()
                display_status(progress, results, watch_mode=True)
                print(f"\n⏱️  Refreshing in {args.interval}s... (Ctrl+C to stop)")
                time.sleep(args.interval)
        except KeyboardInterrupt:
            print(f"\n👋 Monitoring stopped.")
    else:
        progress = parse_log_progress(get_latest_log_file())
        results = get_latest_results()
        display_status(progress, results, watch_mode=False)

if __name__ == "__main__":
    main()