Final_Assignment_Template / test_agent.py
schoolkithub's picture
Upload 8 files
64a54ba verified
#!/usr/bin/env python3
"""
Test script to verify GAIA agent setup and functionality.
"""
from agent import GAIAAgent
from tools import web_search, read_file, calculate_simple_math
def test_api_connection():
"""Test xAI API connection."""
print("Testing xAI API connection...")
agent = GAIAAgent()
try:
response = agent.test_grok()
print(f"API Response: {response}")
if "error" in response.lower():
print("❌ API test failed")
return False
else:
print("βœ… API connection successful")
return True
except Exception as e:
print(f"❌ API test error: {e}")
return False
def test_basic_reasoning():
"""Test basic reasoning capabilities."""
print("\nTesting basic reasoning...")
agent = GAIAAgent()
test_cases = [
{
"task_id": "test_math",
"question": "What is 25 + 17?",
"expected": "42"
},
{
"task_id": "test_general",
"question": "What is the capital of Japan?",
"expected": "tokyo"
}
]
for test_case in test_cases:
print(f"\nTest: {test_case['question']}")
try:
response = agent.process_task(test_case)
predicted = agent.extract_final_answer(response)
print(f"Response: {predicted}")
# Simple comparison
if test_case['expected'].lower() in predicted.lower():
print("βœ… Test passed")
else:
print("❌ Test failed")
except Exception as e:
print(f"❌ Test error: {e}")
def test_tools():
"""Test individual tools."""
print("\nTesting tools...")
# Test math calculation
print("\n1. Testing math calculation:")
result = calculate_simple_math("15 + 27")
print(f"15 + 27 = {result}")
# Test web search (fallback)
print("\n2. Testing web search:")
search_result = web_search("capital of France", None)
print(f"Search result: {search_result[:100]}...")
# Test file reading (with non-existent file)
print("\n3. Testing file reading:")
file_result = read_file("nonexistent.txt")
print(f"File read result: {file_result}")
def test_sample_task():
"""Test with a sample GAIA-like task."""
print("\nTesting sample GAIA task...")
agent = GAIAAgent()
sample_task = {
"task_id": "sample_test",
"question": "If a store has 150 apples and sells 87 of them, how many apples are left?",
"answer": "63",
"file_name": None
}
try:
print(f"Question: {sample_task['question']}")
response = agent.process_task(sample_task)
predicted = agent.extract_final_answer(response)
expected = sample_task['answer']
print(f"Expected: {expected}")
print(f"Predicted: {predicted}")
if predicted.strip() == expected:
print("βœ… Sample task passed")
else:
print("❌ Sample task failed")
except Exception as e:
print(f"❌ Sample task error: {e}")
def main():
"""Run all tests."""
print("GAIA Agent Test Suite")
print("=" * 50)
# Test API connection first
api_ok = test_api_connection()
if not api_ok:
print("\n❌ API connection failed. Cannot proceed with other tests.")
print("Please check your API key and internet connection.")
return
# Run other tests
test_basic_reasoning()
test_tools()
test_sample_task()
print("\n" + "=" * 50)
print("Test suite completed!")
print("If all tests passed, you can run: python evaluate.py")
if __name__ == "__main__":
main()