File size: 2,630 Bytes
7c012de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env node

// Test URL validation system
async function testUrlValidation() {
  console.log('πŸ” Testing URL Validation System...\n');
  
  const testUrls = [
    // Known working URLs
    'https://github.com/microsoft/vscode',
    'https://en.wikipedia.org/wiki/Machine_learning',
    'https://arxiv.org/abs/2001.08361',
    
    // Known broken/problematic URLs
    'https://vldb.org/vector-db-2024',  // The URL you mentioned as broken
    'https://cvpr.org',                 // The URL you mentioned with issues
    'https://nonexistent-domain-12345.com',
    'https://httpstat.us/404',          // Returns 404
    'https://httpstat.us/500'           // Returns 500
  ];
  
  console.log('πŸ§ͺ Testing individual URL validation...\n');
  
  for (const url of testUrls) {
    try {
      console.log(`Testing: ${url}`);
      const response = await fetch(url, {
        method: 'HEAD',
        signal: AbortSignal.timeout(5000),
        headers: {
          'User-Agent': 'Knowledge-Base-Browser/1.0 (URL Validator)'
        }
      });
      
      const isValid = response.status >= 200 && response.status < 400;
      console.log(`  Status: ${response.status} - ${isValid ? 'βœ… VALID' : '❌ INVALID'}`);
      
    } catch (error) {
      console.log(`  Error: ${error.message} - ❌ INVALID`);
    }
    console.log('');
  }
  
  console.log('πŸ”Ž Testing search with URL validation...\n');
  
  // Test the search endpoint
  try {
    const searchQuery = 'vector embedding generation';
    console.log(`Searching for: "${searchQuery}"`);
    
    const response = await fetch('http://localhost:5000/api/search', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        query: searchQuery,
        searchType: 'semantic',
        limit: 5
      })
    });
    
    if (response.ok) {
      const data = await response.json();
      console.log(`Found ${data.results.length} validated results:`);
      
      data.results.forEach((result, index) => {
        console.log(`${index + 1}. ${result.title}`);
        console.log(`   URL: ${result.url}`);
        console.log(`   Source: ${result.source || result.sourceType}`);
        console.log('');
      });
      
      console.log('βœ… All returned URLs should now be accessible!');
    } else {
      console.log('❌ Search request failed');
    }
    
  } catch (error) {
    console.log('❌ Search test failed:', error.message);
  }
  
  console.log('\n🎯 URL Validation Test Complete!');
  console.log('πŸ’‘ The system now filters out broken/inaccessible websites');
}

testUrlValidation();