#!/usr/bin/env python3 """ Test for YouTube question processing in GAIA system """ import os import sys import json from pathlib import Path import importlib import asyncio import re # Import the module containing the YouTube video analysis tool import gaia_tools from main import GAIASolver, CodeAgent, GAIA_TOOLS from question_classifier import QuestionClassifier from async_complete_test_hf import HFAsyncGAIATestSystem # Original analyze_youtube_video function original_analyze_youtube_video = gaia_tools.analyze_youtube_video # Create a mock analyze_youtube_video function def mock_analyze_youtube_video(video_url, question, max_frames=10): """Mock implementation that returns a predefined answer for bird species question""" print(f"šŸ“¹ Mock analyzing YouTube video: {video_url}") # Clean the URL in case there's a trailing comma cleaned_url = video_url.rstrip(',') # For the specific URL in the GAIA task if "L1vXCYZAYYM" in cleaned_url: return """ **šŸŽ„ Gemini 2.0 Flash Video+Audio Analysis** **Title:** Bird Identification Challenge: Backyard Birds in Spring **Duration:** 3:42 **File Size:** 45.2MB **Question:** What is the highest number of bird species to be on camera simultaneously? **Analysis Results:** After careful frame-by-frame analysis of the video, the highest number of different bird species visible simultaneously is 3. This occurs at approximately 1:23 into the video, where we can see: 1. American Robin 2. Northern Cardinal 3. Blue Jay These three species are clearly visible in the same frame at this timestamp. """ # Generic response for other URLs return """ **šŸŽ„ Gemini 2.0 Flash Video+Audio Analysis** **Title:** Unknown Video **Duration:** Unknown **File Size:** Unknown **Question:** Unknown **Analysis Results:** Unable to analyze the video content. Please provide a valid YouTube URL. """ # YouTube URL regex pattern YOUTUBE_URL_PATTERN = r'(https?://)?(www\.)?(youtube\.com|youtu\.?be)/.+?(?=\s|$)' def extract_youtube_url(text): """Extract YouTube URL from text""" match = re.search(YOUTUBE_URL_PATTERN, text) if match: return match.group(0) return None def direct_force_tools_execution(solver, youtube_url, question_text): """Directly execute the YouTube analysis tool via the solver's agent""" # Create a direct prompt that forces the YouTube analysis force_prompt = f""" You need to analyze a YouTube video to answer a specific question. YOUTUBE VIDEO URL: {youtube_url} QUESTION: {question_text} CRITICAL INSTRUCTIONS: 1. Use the analyze_youtube_video tool with the provided URL 2. Extract the answer from the tool's response 3. Provide ONLY the final numerical answer """ # Create a fresh agent using the same approach as in GAIASolver print("šŸ¤– Creating fresh agent for direct execution...") agent = CodeAgent( model=solver.model, tools=GAIA_TOOLS, max_steps=12, verbosity_level=1 # Lower verbosity for cleaner output ) # Run the agent with the forcing prompt print("šŸ” Running direct analysis...") response = agent.run(force_prompt) return str(response) def test_direct_youtube_question(): """Test processing of YouTube question directly""" # Create question with the YouTube URL question = { 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6', 'Question': 'In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?', 'Final Answer': '3' # Assuming this is the correct answer based on GAIA metadata } # Replace the function in the module with our mock print("šŸ”„ Replacing YouTube analysis tool with mock implementation...") gaia_tools.analyze_youtube_video = mock_analyze_youtube_video try: # Initialize components after patching solver = GAIASolver() classifier = QuestionClassifier() # Classify the question print("🧩 Classifying question...") classification = classifier.classify_question(question['Question']) print(f"šŸ“‹ Classification: {classification['primary_agent']}") print(f"šŸ”§ Tools needed: {classification.get('tools_needed', [])}") # Extract YouTube URL from question youtube_url = extract_youtube_url(question['Question']) if youtube_url: # Remove any trailing comma youtube_url = youtube_url.rstrip(',') print(f"šŸ”— Extracted YouTube URL: {youtube_url}") # Use a direct approach to force tool execution print("\n🧠 Processing question with direct YouTube analyzer execution...") try: direct_result = direct_force_tools_execution( solver, youtube_url, "What is the highest number of bird species to be on camera simultaneously?" ) print(f"\nšŸ” Direct result: {direct_result}") except Exception as e: print(f"\nāš ļø Direct test error: {e}") direct_result = "Error in direct execution" # Also try the normal processing path print("\n🧠 Processing question with standard solver...") try: result = solver.solve_question(question) print(f"\nāœ… Standard result: {result}") except Exception as e: print(f"\nāš ļø Standard test error: {e}") result = "Error in standard execution" # Validate result expected = str(question['Final Answer']).strip().lower() actual = str(result).strip().lower() validation_status = "āœ“ correct" if expected == actual else "āœ— incorrect" print(f"šŸ”Ž Validation: {validation_status}") # If direct result contains the answer, check that too if "3" in direct_result: print(f"šŸ”Ž Direct validation: āœ“ correct") else: print(f"šŸ”Ž Direct validation: āœ— incorrect") finally: # Restore original function print("šŸ”„ Restoring original YouTube analysis tool...") gaia_tools.analyze_youtube_video = original_analyze_youtube_video async def test_async_youtube_question(): """Test processing of YouTube question using the async test system""" # Replace the function in the module with our mock print("šŸ”„ Replacing YouTube analysis tool with mock implementation in async test...") gaia_tools.analyze_youtube_video = mock_analyze_youtube_video try: # Create async test system system = HFAsyncGAIATestSystem( max_concurrent=1, timeout_seconds=60, output_dir="/tmp/async_youtube_test" ) # Create a single question test questions = [ { 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6', 'Question': 'In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?', 'Final Answer': '3' } ] # Override the load_gaia_questions method to use our single question async def mock_load_questions(*args, **kwargs): return questions # Save the original method and replace it original_load_method = system.load_gaia_questions system.load_gaia_questions = mock_load_questions # Create a capturing wrapper for the solve_question method # Instead of replacing the solve_question method, we'll just run the test # Create a wrapper that ensures the mocking is active async def solving_wrapper(): # Make extra sure the mock is in place during the test gaia_tools.analyze_youtube_video = mock_analyze_youtube_video # Print confirmation of active mock print("šŸ“¹ Mock is active for async test - will analyze YouTube video") # Just call our wrapper to set up the mock await solving_wrapper() # Run the test print("šŸš€ Running async test with YouTube question...") result = await system.run_comprehensive_test(question_limit=1) # Print results print("\nšŸ“Š Async Test Results:") print(f"Total questions processed: {result['total_questions']}") print(f"Status counts: {result['status_counts']}") # Check answer from the first question question_id = questions[0]['task_id'] if question_id in result['results']: question_result = result['results'][question_id] answer = question_result.get('answer', 'No answer') validation = question_result.get('validation_status', 'unknown') print(f"\nQuestion ID: {question_id}") print(f"Answer: {answer}") print(f"Validation: {validation}") else: print(f"No results found for question ID {question_id}") # Restore the original method system.load_gaia_questions = original_load_method finally: # Restore original function print("šŸ”„ Restoring original YouTube analysis tool...") gaia_tools.analyze_youtube_video = original_analyze_youtube_video async def main(): """Run both tests""" print("šŸš€ Starting direct YouTube question test...") test_direct_youtube_question() print("\n\nšŸš€ Starting async YouTube question test...") await test_async_youtube_question() print("\nāœ… All tests completed!") if __name__ == "__main__": asyncio.run(main())