Omachoko commited on
Commit
25a9c05
·
1 Parent(s): e9d5104

🧹 Remove redundant test file

Browse files

- Remove test_simple.py (redundant functionality)
- Keep test_gaia.py (comprehensive GAIA compliance testing)
- Cleaner repository structure

Files changed (1) hide show
  1. test_simple.py +0 -114
test_simple.py DELETED
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Simple GAIA System Test - No Gradio Dependencies
4
- """
5
-
6
- import os
7
- import sys
8
- import re
9
-
10
- # Add current directory to path
11
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
12
-
13
- from gaia_system import BasicAgent
14
-
15
- def clean_for_api_submission(answer: str) -> str:
16
- """
17
- Final cleanup of agent answers for GAIA API submission
18
- Ensures exact match compliance
19
- """
20
- if not answer:
21
- return "I cannot determine the answer"
22
-
23
- # Remove any remaining formatting artifacts
24
- answer = answer.strip()
25
-
26
- # Remove markdown formatting
27
- answer = answer.replace('**', '').replace('*', '').replace('`', '')
28
-
29
- # Remove any "Answer:" prefixes that might have slipped through
30
- answer = answer.replace('Answer:', '').replace('ANSWER:', '').strip()
31
-
32
- # Remove any trailing periods for factual answers (but keep for sentences)
33
- if len(answer.split()) == 1 or answer.replace('.', '').replace(',', '').isdigit():
34
- answer = answer.rstrip('.')
35
-
36
- return answer
37
-
38
- def test_gaia_agent():
39
- """Test the GAIA agent with benchmark-style questions"""
40
-
41
- print("🎯 GAIA Agent Test")
42
- print("=" * 50)
43
-
44
- # GAIA-style test questions
45
- test_cases = [
46
- ("What is 15 + 27?", "42"),
47
- ("What is 100 / 4?", "25"),
48
- ("What is 6 * 7?", "42"),
49
- ("Calculate 125 * 8", "1000"),
50
- ("What is 2 to the power of 5?", "32"),
51
- ("What is the capital of France?", "Paris"),
52
- ("What is the capital of Germany?", "Berlin"),
53
- ("What is the capital of Brazil?", "Brasília"),
54
- ("How many planets are in our solar system?", "8"),
55
- ("What is the speed of light?", "299792458"),
56
- ("What is the formula for water?", "H2O"),
57
- ]
58
-
59
- try:
60
- agent = BasicAgent()
61
- print("✅ Agent initialized successfully\n")
62
-
63
- correct = 0
64
- total = 0
65
-
66
- for question, expected in test_cases:
67
- try:
68
- raw_answer = agent(question)
69
- cleaned_answer = clean_for_api_submission(raw_answer)
70
-
71
- print(f"Q: {question}")
72
- print(f"A: {cleaned_answer}")
73
-
74
- if cleaned_answer == expected:
75
- print("✅ PERFECT MATCH")
76
- correct += 1
77
- else:
78
- print(f"⚠️ Expected: {expected}")
79
-
80
- total += 1
81
- print("-" * 30)
82
-
83
- except Exception as e:
84
- print(f"❌ Error: {e}")
85
- print("-" * 30)
86
- total += 1
87
-
88
- print(f"\n📊 Results: {correct}/{total} correct ({correct/total*100:.1f}%)")
89
-
90
- if correct/total >= 0.8:
91
- print("🎉 EXCELLENT! System is GAIA-ready!")
92
- elif correct/total >= 0.6:
93
- print("✅ GOOD! Minor improvements needed")
94
- else:
95
- print("⚠️ Needs improvement")
96
-
97
- return correct/total >= 0.6
98
-
99
- except Exception as e:
100
- print(f"❌ Test failed: {e}")
101
- return False
102
-
103
- if __name__ == "__main__":
104
- print("🧪 Simple GAIA System Test")
105
- print("=" * 60)
106
-
107
- success = test_gaia_agent()
108
-
109
- if success:
110
- print("\n🚀 System is ready for GAIA benchmark!")
111
- else:
112
- print("\n❌ System needs improvements")
113
-
114
- sys.exit(0 if success else 1)