File size: 2,520 Bytes
1a1b3eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Using built-in fetch (Node.js 18+)

async function generateAllEmbeddings() {
  console.log('πŸš€ Starting to generate embeddings for all documents...');
  
  try {
    // Get all documents
    const response = await fetch('http://localhost:5000/api/documents');
    const documents = await response.json();
    
    console.log(`πŸ“š Found ${documents.length} documents`);
    
    // Generate embeddings for each document
    for (const doc of documents) {
      console.log(`\nπŸ“„ Processing: ${doc.title} (ID: ${doc.id})`);
      
      // Generate embedding using the content
      const embeddingResponse = await fetch('http://localhost:5000/api/embeddings', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ input: doc.content.substring(0, 8000) }) // Limit content length
      });
      
      if (embeddingResponse.ok) {
        const embeddingResult = await embeddingResponse.json();
        const embedding = embeddingResult.data[0].embedding;
        
        console.log(`βœ… Generated embedding with ${embedding.length} dimensions`);
        
        // Update document with embedding
        const updateResponse = await fetch(`http://localhost:5000/api/documents/process/${doc.id}`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({ 
            operations: ['generate_embedding'],
            embedding: embedding 
          })
        });
        
        if (updateResponse.ok) {
          console.log(`βœ… Updated document ${doc.id} with embedding`);
        } else {
          console.log(`❌ Failed to update document ${doc.id}`);
        }
        
      } else {
        console.log(`❌ Failed to generate embedding for ${doc.title}`);
      }
      
      // Small delay to avoid overwhelming the API
      await new Promise(resolve => setTimeout(resolve, 1000));
    }
    
    console.log('\nπŸŽ‰ Embedding generation completed!');
    console.log('\nπŸ” Now you can test vector search with these queries:');
    console.log('- "attention mechanism transformer architecture"');
    console.log('- "multimodal language model GPT"');
    console.log('- "constitutional AI safety alignment"');
    console.log('- "mixtral mixture of experts"');
    console.log('- "retrieval augmented generation knowledge"');
    
  } catch (error) {
    console.error('❌ Error:', error.message);
  }
}

// Run the function
generateAllEmbeddings().catch(console.error);