|
#!/usr/bin/env node |
|
|
|
|
|
async function testEnhancedValidation() { |
|
console.log('π Testing Enhanced URL Validation...\n'); |
|
|
|
const testUrls = [ |
|
|
|
'https://arxiv.org/abs/2001.08361', |
|
'https://arxiv.org/abs/1706.03762', |
|
|
|
|
|
'https://arxiv.org/abs/2024.rag.advances', |
|
'https://arxiv.org/abs/2024.fake.paper', |
|
'https://arxiv.org/abs/9999.99999', |
|
|
|
|
|
'https://vldb.org/vector-db-2024', |
|
'https://cvpr.org', |
|
]; |
|
|
|
console.log('π§ͺ Testing individual URLs with enhanced validation...\n'); |
|
|
|
for (const url of testUrls) { |
|
try { |
|
console.log(`Testing: ${url}`); |
|
|
|
|
|
const urlObj = new URL(url); |
|
|
|
if (urlObj.hostname.includes('arxiv.org')) { |
|
|
|
const match = url.match(/arxiv\.org\/abs\/(.+)$/); |
|
if (match) { |
|
const paperId = match[1]; |
|
console.log(` ArXiv ID: ${paperId}`); |
|
|
|
|
|
const validFormats = [ |
|
/^\d{4}\.\d{4,5}$/, |
|
/^[a-z-]+(\.[A-Z]{2})?\/\d{7}$/, |
|
]; |
|
|
|
const hasValidFormat = validFormats.some(regex => regex.test(paperId)); |
|
console.log(` Format valid: ${hasValidFormat}`); |
|
|
|
if (!hasValidFormat) { |
|
console.log(` Result: β INVALID (bad format)`); |
|
console.log(''); |
|
continue; |
|
} |
|
} |
|
} |
|
|
|
|
|
const response = await fetch(url, { |
|
method: 'GET', |
|
signal: AbortSignal.timeout(5000), |
|
headers: { |
|
'User-Agent': 'Knowledge-Base-Browser/1.0 (Enhanced Validator)' |
|
} |
|
}); |
|
|
|
console.log(` Status: ${response.status}`); |
|
|
|
if (!response.ok) { |
|
console.log(` Result: β INVALID (${response.status})`); |
|
} else { |
|
|
|
const content = await response.text(); |
|
const errorIndicators = [ |
|
'not recognized', |
|
'might instead try to search', |
|
'article identifier', |
|
'not found', |
|
'error' |
|
]; |
|
|
|
const hasError = errorIndicators.some(indicator => |
|
content.toLowerCase().includes(indicator.toLowerCase()) |
|
); |
|
|
|
if (hasError) { |
|
console.log(` Content: Contains error messages`); |
|
console.log(` Result: β INVALID (error content)`); |
|
} else { |
|
console.log(` Content: Valid`); |
|
console.log(` Result: β
VALID`); |
|
} |
|
} |
|
|
|
} catch (error) { |
|
console.log(` Error: ${error.message}`); |
|
console.log(` Result: β INVALID (network error)`); |
|
} |
|
console.log(''); |
|
} |
|
|
|
console.log('π Testing search with enhanced validation...\n'); |
|
|
|
|
|
try { |
|
const response = await fetch('http://localhost:5000/api/search', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ |
|
query: 'rag', |
|
searchType: 'semantic', |
|
limit: 10 |
|
}) |
|
}); |
|
|
|
if (response.ok) { |
|
const data = await response.json(); |
|
console.log(`Search for "rag" returned ${data.results.length} results:`); |
|
|
|
data.results.forEach((result, index) => { |
|
console.log(`${index + 1}. ${result.title}`); |
|
console.log(` URL: ${result.url}`); |
|
|
|
|
|
if (result.url.includes('2024.rag.advances')) { |
|
console.log(` β οΈ This should have been filtered out!`); |
|
} else { |
|
console.log(` β
Valid URL`); |
|
} |
|
console.log(''); |
|
}); |
|
|
|
} else { |
|
console.log('β Search request failed'); |
|
} |
|
|
|
} catch (error) { |
|
console.log('β Search test failed:', error.message); |
|
} |
|
|
|
console.log('π― Enhanced Validation Test Complete!'); |
|
} |
|
|
|
testEnhancedValidation(); |