#!/usr/bin/env node // Test URL validation system async function testUrlValidation() { console.log('๐Ÿ” Testing URL Validation System...\n'); const testUrls = [ // Known working URLs 'https://github.com/microsoft/vscode', 'https://en.wikipedia.org/wiki/Machine_learning', 'https://arxiv.org/abs/2001.08361', // Known broken/problematic URLs 'https://vldb.org/vector-db-2024', // The URL you mentioned as broken 'https://cvpr.org', // The URL you mentioned with issues 'https://nonexistent-domain-12345.com', 'https://httpstat.us/404', // Returns 404 'https://httpstat.us/500' // Returns 500 ]; console.log('๐Ÿงช Testing individual URL validation...\n'); for (const url of testUrls) { try { console.log(`Testing: ${url}`); const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000), headers: { 'User-Agent': 'Knowledge-Base-Browser/1.0 (URL Validator)' } }); const isValid = response.status >= 200 && response.status < 400; console.log(` Status: ${response.status} - ${isValid ? 'โœ… VALID' : 'โŒ INVALID'}`); } catch (error) { console.log(` Error: ${error.message} - โŒ INVALID`); } console.log(''); } console.log('๐Ÿ”Ž Testing search with URL validation...\n'); // Test the search endpoint try { const searchQuery = 'vector embedding generation'; console.log(`Searching for: "${searchQuery}"`); const response = await fetch('http://localhost:5000/api/search', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ query: searchQuery, searchType: 'semantic', limit: 5 }) }); if (response.ok) { const data = await response.json(); console.log(`Found ${data.results.length} validated results:`); data.results.forEach((result, index) => { console.log(`${index + 1}. ${result.title}`); console.log(` URL: ${result.url}`); console.log(` Source: ${result.source || result.sourceType}`); console.log(''); }); console.log('โœ… All returned URLs should now be accessible!'); } else { console.log('โŒ Search request failed'); } } catch (error) { console.log('โŒ Search test failed:', error.message); } console.log('\n๐ŸŽฏ URL Validation Test Complete!'); console.log('๐Ÿ’ก The system now filters out broken/inaccessible websites'); } testUrlValidation();