Spaces:
Running
Running
File size: 9,022 Bytes
37cadfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
#!/usr/bin/env python3
"""
GAIA Question Loader - Web API version
Fetch questions directly from GAIA API instead of local files
"""
import json
import time
import logging
from typing import List, Dict, Optional
import requests
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
# Configure logging
logger = logging.getLogger(__name__)
def retry_with_backoff(max_retries: int = 3, initial_delay: float = 1.0, backoff_factor: float = 2.0):
"""Decorator to retry a function call with exponential backoff"""
def decorator(func):
def wrapper(*args, **kwargs):
retries = 0
delay = initial_delay
last_exception = None
while retries < max_retries:
try:
return func(*args, **kwargs)
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
last_exception = e
retries += 1
if retries < max_retries:
logger.warning(f"Retry {retries}/{max_retries} for {func.__name__} due to {type(e).__name__}. Delaying {delay:.2f}s")
time.sleep(delay)
delay *= backoff_factor
else:
logger.error(f"Max retries reached for {func.__name__}")
raise last_exception
except requests.exceptions.HTTPError as e:
if e.response and e.response.status_code in (500, 502, 503, 504):
last_exception = e
retries += 1
if retries < max_retries:
logger.warning(f"Retry {retries}/{max_retries} for {func.__name__} due to HTTP {e.response.status_code}. Delaying {delay:.2f}s")
time.sleep(delay)
delay *= backoff_factor
else:
logger.error(f"Max retries reached for {func.__name__}")
raise last_exception
else:
raise
return func(*args, **kwargs)
return wrapper
return decorator
class GAIAQuestionLoaderWeb:
"""Load and manage GAIA questions from the web API"""
def __init__(self, api_base: Optional[str] = None, username: Optional[str] = None):
self.api_base = api_base or os.getenv("GAIA_API_BASE", "https://agents-course-unit4-scoring.hf.space")
self.username = username or os.getenv("GAIA_USERNAME", "tonthatthienvu")
self.questions: List[Dict] = []
self._load_questions()
@retry_with_backoff()
def _make_request(self, method: str, endpoint: str, params: Optional[Dict] = None,
payload: Optional[Dict] = None, timeout: int = 15) -> requests.Response:
"""Make HTTP request with retry logic"""
url = f"{self.api_base}/{endpoint.lstrip('/')}"
logger.info(f"Request: {method.upper()} {url}")
try:
response = requests.request(method, url, params=params, json=payload, timeout=timeout)
response.raise_for_status()
return response
except requests.exceptions.HTTPError as e:
logger.error(f"HTTPError: {e.response.status_code} for {method.upper()} {url}")
if e.response:
logger.error(f"Response: {e.response.text[:200]}")
raise
except requests.exceptions.Timeout:
logger.error(f"Timeout: Request to {url} timed out after {timeout}s")
raise
except requests.exceptions.ConnectionError as e:
logger.error(f"ConnectionError: Could not connect to {url}. Details: {e}")
raise
def _load_questions(self):
"""Fetch all questions from the GAIA API"""
try:
logger.info(f"Fetching questions from GAIA API: {self.api_base}/questions")
response = self._make_request("get", "questions", timeout=15)
self.questions = response.json()
print(f"✅ Loaded {len(self.questions)} GAIA questions from web API")
logger.info(f"Successfully retrieved {len(self.questions)} questions from API")
except requests.exceptions.RequestException as e:
logger.error(f"Failed to fetch questions from API: {e}")
print(f"❌ Failed to load questions from web API: {e}")
self.questions = []
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {e}")
print(f"❌ Failed to parse questions from web API: {e}")
self.questions = []
def get_random_question(self) -> Optional[Dict]:
"""Get a random question from the API"""
try:
logger.info(f"Getting random question from: {self.api_base}/random-question")
response = self._make_request("get", "random-question", timeout=15)
question = response.json()
task_id = question.get('task_id', 'Unknown')
logger.info(f"Successfully retrieved random question: {task_id}")
return question
except requests.exceptions.RequestException as e:
logger.error(f"Failed to get random question: {e}")
# Fallback to local random selection
import random
return random.choice(self.questions) if self.questions else None
except json.JSONDecodeError as e:
logger.error(f"Failed to parse random question response: {e}")
return None
def get_question_by_id(self, task_id: str) -> Optional[Dict]:
"""Get a specific question by task ID"""
return next((q for q in self.questions if q.get('task_id') == task_id), None)
def get_questions_by_level(self, level: str) -> List[Dict]:
"""Get all questions of a specific difficulty level"""
return [q for q in self.questions if q.get('Level') == level]
def get_questions_with_files(self) -> List[Dict]:
"""Get all questions that have associated files"""
return [q for q in self.questions if q.get('file_name')]
def get_questions_without_files(self) -> List[Dict]:
"""Get all questions that don't have associated files"""
return [q for q in self.questions if not q.get('file_name')]
def count_by_level(self) -> Dict[str, int]:
"""Count questions by difficulty level"""
levels = {}
for q in self.questions:
level = q.get('Level', 'Unknown')
levels[level] = levels.get(level, 0) + 1
return levels
def summary(self) -> Dict:
"""Get a summary of loaded questions"""
return {
'total_questions': len(self.questions),
'with_files': len(self.get_questions_with_files()),
'without_files': len(self.get_questions_without_files()),
'by_level': self.count_by_level(),
'api_base': self.api_base,
'username': self.username
}
def download_file(self, task_id: str, save_dir: str = "./downloads") -> Optional[str]:
"""Download a file associated with a question"""
try:
import os
from pathlib import Path
# Create download directory
Path(save_dir).mkdir(exist_ok=True)
logger.info(f"Downloading file for task: {task_id}")
response = self._make_request("get", f"files/{task_id}", timeout=30)
# Try to get filename from headers
filename = task_id
if 'content-disposition' in response.headers:
import re
match = re.search(r'filename="?([^"]+)"?', response.headers['content-disposition'])
if match:
filename = match.group(1)
# Save file
file_path = Path(save_dir) / filename
with open(file_path, 'wb') as f:
f.write(response.content)
logger.info(f"File downloaded successfully: {file_path}")
return str(file_path)
except requests.exceptions.RequestException as e:
logger.error(f"Failed to download file for task {task_id}: {e}")
return None
except Exception as e:
logger.error(f"Error saving file for task {task_id}: {e}")
return None
def test_api_connection(self) -> bool:
"""Test connectivity to the GAIA API"""
try:
logger.info(f"Testing API connection to: {self.api_base}")
response = self._make_request("get", "questions", timeout=10)
logger.info("✅ API connection successful")
return True
except Exception as e:
logger.error(f"❌ API connection failed: {e}")
return False |