File size: 11,839 Bytes
37cadfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/usr/bin/env python3
"""
Enhanced Wikipedia research tools for better GAIA question solving
"""

import requests
import re
from typing import Dict, List, Optional
from smolagents import tool

@tool
def wikipedia_featured_articles_search(query: str, date_filter: str = "") -> str:
    """
    Enhanced Wikipedia search specifically for Featured Articles and administrative pages
    
    Args:
        query: Search query for Featured Articles
        date_filter: Optional date filter (e.g., "November 2016")
        
    Returns:
        Search results focused on Featured Article information
    """
    try:
        # Enhanced search targets for Wikipedia Featured Articles
        search_targets = [
            f"Wikipedia:Featured articles {date_filter}",
            f"Wikipedia:Featured article candidates {date_filter}",
            f"Category:Featured articles {date_filter}",
            f"Wikipedia:Today's featured article {date_filter}"
        ]
        
        results = []
        
        for target in search_targets:
            try:
                # Use Wikipedia API for better access
                api_url = "https://en.wikipedia.org/api/rest_v1/page/summary/"
                encoded_target = target.replace(" ", "_").replace(":", "%3A")
                
                response = requests.get(f"{api_url}{encoded_target}", timeout=10)
                if response.status_code == 200:
                    data = response.json()
                    extract = data.get('extract', '')
                    if extract and len(extract) > 50:
                        results.append(f"**{target}:** {extract[:200]}...")
                        
            except Exception as e:
                continue
        
        # Also try direct search on Wikipedia
        search_url = "https://en.wikipedia.org/w/api.php"
        params = {
            'action': 'query',
            'format': 'json',
            'list': 'search',
            'srsearch': f"{query} {date_filter}",
            'srlimit': 5
        }
        
        try:
            response = requests.get(search_url, params=params, timeout=10)
            if response.status_code == 200:
                data = response.json()
                searches = data.get('query', {}).get('search', [])
                
                for item in searches:
                    title = item.get('title', '')
                    snippet = item.get('snippet', '')
                    if 'featured' in title.lower() or 'featured' in snippet.lower():
                        results.append(f"**{title}:** {snippet}")
        except:
            pass
        
        if results:
            return "**Enhanced Wikipedia Featured Articles Search:**\n" + "\n".join(results)
        else:
            return f"No specific Featured Articles information found for: {query} {date_filter}"
            
    except Exception as e:
        return f"Enhanced search error: {str(e)}"

@tool 
def wikipedia_page_history_search(article_name: str) -> str:
    """
    Search for Wikipedia page history and nomination information
    
    Args:
        article_name: Name of the Wikipedia article
        
    Returns:
        History and nomination information for the article
    """
    try:
        # Get article information
        api_url = "https://en.wikipedia.org/w/api.php"
        
        # First, get basic article info
        params = {
            'action': 'query',
            'format': 'json',
            'titles': article_name,
            'prop': 'info|categories|templates',
            'inprop': 'created'
        }
        
        response = requests.get(api_url, params=params, timeout=10)
        if response.status_code != 200:
            return f"Could not access Wikipedia API for {article_name}"
        
        data = response.json()
        pages = data.get('query', {}).get('pages', {})
        
        results = []
        
        for page_id, page_info in pages.items():
            if page_id == '-1':
                return f"Article '{article_name}' not found on Wikipedia"
            
            title = page_info.get('title', '')
            results.append(f"**Article:** {title}")
            
            # Check categories for Featured Article status
            categories = page_info.get('categories', [])
            featured_cats = [cat for cat in categories if 'featured' in cat.get('title', '').lower()]
            
            if featured_cats:
                results.append(f"**Featured Article Categories:** {[cat['title'] for cat in featured_cats]}")
            
            # Check templates for Featured Article templates
            templates = page_info.get('templates', [])
            featured_templates = [tmpl for tmpl in templates if 'featured' in tmpl.get('title', '').lower()]
            
            if featured_templates:
                results.append(f"**Featured Article Templates:** {[tmpl['title'] for tmpl in featured_templates]}")
        
        # Try to get nomination information from talk page
        talk_params = {
            'action': 'query',
            'format': 'json',
            'titles': f"Talk:{article_name}",
            'prop': 'revisions',
            'rvprop': 'content',
            'rvlimit': 1
        }
        
        try:
            talk_response = requests.get(api_url, params=talk_params, timeout=10)
            if talk_response.status_code == 200:
                talk_data = talk_response.json()
                talk_pages = talk_data.get('query', {}).get('pages', {})
                
                for talk_page_id, talk_page_info in talk_pages.items():
                    if talk_page_id != '-1':
                        revisions = talk_page_info.get('revisions', [])
                        if revisions:
                            content = revisions[0].get('*', '')
                            
                            # Look for nomination information
                            nomination_patterns = [
                                r'nominated by\s*:?\s*\[\[User:([^\]]+)',
                                r'nominator\s*=\s*\[\[User:([^\]]+)',
                                r'proposed by\s*\[\[User:([^\]]+)'
                            ]
                            
                            for pattern in nomination_patterns:
                                matches = re.findall(pattern, content, re.IGNORECASE)
                                if matches:
                                    results.append(f"**Nominator Found:** {matches[0]}")
                                    break
        except:
            pass
        
        if results:
            return "**Wikipedia Page History Search:**\n" + "\n".join(results)
        else:
            return f"Limited information found for {article_name}"
            
    except Exception as e:
        return f"Page history search error: {str(e)}"

@tool
def verify_dinosaur_article(article_name: str) -> str:
    """
    Verify if a Wikipedia article is about a dinosaur
    
    Args:
        article_name: Name of the article to verify
        
    Returns:
        Verification result with dinosaur classification
    """
    try:
        api_url = "https://en.wikipedia.org/w/api.php"
        
        # Get article content and categories
        params = {
            'action': 'query',
            'format': 'json',
            'titles': article_name,
            'prop': 'categories|extracts',
            'exintro': True,
            'explaintext': True,
            'exsectionformat': 'plain'
        }
        
        response = requests.get(api_url, params=params, timeout=10)
        if response.status_code != 200:
            return f"Could not verify {article_name}"
        
        data = response.json()
        pages = data.get('query', {}).get('pages', {})
        
        for page_id, page_info in pages.items():
            if page_id == '-1':
                return f"Article '{article_name}' not found"
            
            title = page_info.get('title', '')
            extract = page_info.get('extract', '').lower()
            categories = page_info.get('categories', [])
            
            # Check for dinosaur indicators
            dinosaur_keywords = [
                'dinosaur', 'theropod', 'sauropod', 'ornithopod', 
                'ceratopsian', 'stegosaur', 'ankylosaur', 'cretaceous',
                'jurassic', 'triassic', 'mesozoic', 'extinct reptile'
            ]
            
            # Check in content
            content_match = any(keyword in extract for keyword in dinosaur_keywords)
            
            # Check in categories
            category_names = [cat.get('title', '').lower() for cat in categories]
            category_match = any(
                any(keyword in cat_name for keyword in dinosaur_keywords)
                for cat_name in category_names
            )
            
            if content_match or category_match:
                matching_keywords = [kw for kw in dinosaur_keywords if kw in extract]
                matching_categories = [cat for cat in category_names if any(kw in cat for kw in dinosaur_keywords)]
                
                return f"**VERIFIED DINOSAUR ARTICLE:** {title}\n" + \
                       f"**Keywords found:** {matching_keywords}\n" + \
                       f"**Dinosaur categories:** {matching_categories}"
            else:
                return f"**NOT A DINOSAUR ARTICLE:** {title}\n" + \
                       f"**Content preview:** {extract[:200]}..."
        
        return f"Could not determine if {article_name} is about a dinosaur"
        
    except Exception as e:
        return f"Dinosaur verification error: {str(e)}"

@tool
def multi_step_wikipedia_research(question: str) -> str:
    """
    Multi-step research approach for complex Wikipedia questions
    
    Args:
        question: The research question
        
    Returns:
        Structured research results
    """
    try:
        results = ["**MULTI-STEP WIKIPEDIA RESEARCH:**"]
        
        # Extract key information from question
        if "featured article" in question.lower() and "november 2016" in question.lower():
            
            # Step 1: Search for Featured Articles from November 2016
            results.append("\n**STEP 1: Featured Articles November 2016**")
            fa_search = wikipedia_featured_articles_search("Featured Articles promoted", "November 2016")
            results.append(fa_search)
            
            # Step 2: Look for dinosaur-related articles
            results.append("\n**STEP 2: Identifying Dinosaur Articles**")
            
            # Common dinosaur article names that might be Featured Articles
            potential_dinosaurs = [
                "Giganotosaurus", "Spinosaurus", "Tyrannosaurus", "Allosaurus",
                "Deinocheirus", "Carnotaurus", "Utahraptor", "Therizinosaurus"
            ]
            
            for dinosaur in potential_dinosaurs:
                verification = verify_dinosaur_article(dinosaur)
                if "VERIFIED DINOSAUR" in verification:
                    results.append(f"✅ {verification}")
                    
                    # Step 3: Check nomination information
                    results.append(f"\n**STEP 3: Nomination Info for {dinosaur}**")
                    history = wikipedia_page_history_search(dinosaur)
                    results.append(history)
                    
                    # If we found a nominator, this might be our answer
                    if "Nominator Found" in history:
                        results.append(f"\n**POTENTIAL ANSWER FOUND for {dinosaur}**")
        
        return "\n".join(results)
        
    except Exception as e:
        return f"Multi-step research error: {str(e)}"