Spaces:

schoolkithub
/

multi-agent-gaia-system

Runtime error

Omachoko commited on 23 days ago

Commit

25a9c05

1 Parent(s): e9d5104

🧹 Remove redundant test file

- Remove test_simple.py (redundant functionality)
- Keep test_gaia.py (comprehensive GAIA compliance testing)
- Cleaner repository structure

Files changed (1) hide show

test_simple.py +0 -114

test_simple.py DELETED Viewed

@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple GAIA System Test - No Gradio Dependencies
-"""
-import os
-import sys
-import re
-# Add current directory to path
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from gaia_system import BasicAgent
-def clean_for_api_submission(answer: str) -> str:
-    """
-    Final cleanup of agent answers for GAIA API submission
-    Ensures exact match compliance
-    """
-    if not answer:
-        return "I cannot determine the answer"
-    # Remove any remaining formatting artifacts
-    answer = answer.strip()
-    # Remove markdown formatting
-    answer = answer.replace('**', '').replace('*', '').replace('`', '')
-    # Remove any "Answer:" prefixes that might have slipped through
-    answer = answer.replace('Answer:', '').replace('ANSWER:', '').strip()
-    # Remove any trailing periods for factual answers (but keep for sentences)
-    if len(answer.split()) == 1 or answer.replace('.', '').replace(',', '').isdigit():
-        answer = answer.rstrip('.')
-    return answer
-def test_gaia_agent():
-    """Test the GAIA agent with benchmark-style questions"""
-    print("🎯 GAIA Agent Test")
-    print("=" * 50)
-    # GAIA-style test questions
-    test_cases = [
-        ("What is 15 + 27?", "42"),
-        ("What is 100 / 4?", "25"),
-        ("What is 6 * 7?", "42"),
-        ("Calculate 125 * 8", "1000"),
-        ("What is 2 to the power of 5?", "32"),
-        ("What is the capital of France?", "Paris"),
-        ("What is the capital of Germany?", "Berlin"),
-        ("What is the capital of Brazil?", "Brasília"),
-        ("How many planets are in our solar system?", "8"),
-        ("What is the speed of light?", "299792458"),
-        ("What is the formula for water?", "H2O"),
-    ]
-    try:
-        agent = BasicAgent()
-        print("✅ Agent initialized successfully\n")
-        correct = 0
-        total = 0
-        for question, expected in test_cases:
-            try:
-                raw_answer = agent(question)
-                cleaned_answer = clean_for_api_submission(raw_answer)
-                print(f"Q: {question}")
-                print(f"A: {cleaned_answer}")
-                if cleaned_answer == expected:
-                    print("✅ PERFECT MATCH")
-                    correct += 1
-                else:
-                    print(f"⚠️  Expected: {expected}")
-                total += 1
-                print("-" * 30)
-            except Exception as e:
-                print(f"❌ Error: {e}")
-                print("-" * 30)
-                total += 1
-        print(f"\n📊 Results: {correct}/{total} correct ({correct/total*100:.1f}%)")
-        if correct/total >= 0.8:
-            print("🎉 EXCELLENT! System is GAIA-ready!")
-        elif correct/total >= 0.6:
-            print("✅ GOOD! Minor improvements needed")
-        else:
-            print("⚠️  Needs improvement")
-        return correct/total >= 0.6
-    except Exception as e:
-        print(f"❌ Test failed: {e}")
-        return False
-if __name__ == "__main__":
-    print("🧪 Simple GAIA System Test")
-    print("=" * 60)
-    success = test_gaia_agent()
-    if success:
-        print("\n🚀 System is ready for GAIA benchmark!")
-    else:
-        print("\n❌ System needs improvements")
-    sys.exit(0 if success else 1)