fazeel007 commited on
Commit
c55df02
Β·
1 Parent(s): b5baf9e

generate embeddings

Browse files
Files changed (1) hide show
  1. generate_embeddings.js +69 -0
generate_embeddings.js ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Quick script to generate embeddings for existing documents
2
+ import fs from 'fs';
3
+
4
+ async function generateEmbeddings() {
5
+ // Document contents to generate embeddings for
6
+ const documents = [
7
+ {
8
+ id: 1,
9
+ title: "Attention Is All You Need",
10
+ content: "The Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality."
11
+ },
12
+ {
13
+ id: 2,
14
+ title: "GPT-4 Technical Report",
15
+ content: "We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks."
16
+ },
17
+ {
18
+ id: 3,
19
+ title: "Constitutional AI",
20
+ content: "As AI systems become more capable, we would like to enlist their help to supervise other AI systems. We experiment with methods for training a harmless AI assistant through self-improvement, without any human labels identifying harmful outputs."
21
+ },
22
+ {
23
+ id: 4,
24
+ title: "Retrieval-Augmented Generation",
25
+ content: "Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited."
26
+ }
27
+ ];
28
+
29
+ console.log('Generating embeddings for documents...');
30
+
31
+ for (const doc of documents) {
32
+ try {
33
+ console.log(`Processing document ${doc.id}: ${doc.title}`);
34
+
35
+ // Generate embedding
36
+ const response = await fetch('http://localhost:5000/api/embeddings', {
37
+ method: 'POST',
38
+ headers: { 'Content-Type': 'application/json' },
39
+ body: JSON.stringify({ input: doc.content })
40
+ });
41
+
42
+ if (response.ok) {
43
+ const result = await response.json();
44
+ console.log(`βœ… Generated embedding for ${doc.title} (${result.data[0].embedding.length} dimensions)`);
45
+
46
+ // Note: In a real implementation, you would update the database here
47
+ // For now, just log success
48
+ } else {
49
+ console.log(`❌ Failed to generate embedding for ${doc.title}`);
50
+ }
51
+
52
+ // Small delay to avoid overwhelming the API
53
+ await new Promise(resolve => setTimeout(resolve, 1000));
54
+
55
+ } catch (error) {
56
+ console.log(`❌ Error processing ${doc.title}: ${error.message}`);
57
+ }
58
+ }
59
+
60
+ console.log('βœ… Embedding generation completed!');
61
+ console.log('\nπŸ” Now you can test vector search with these queries:');
62
+ console.log('- "attention mechanism transformer architecture"');
63
+ console.log('- "multimodal language model GPT"');
64
+ console.log('- "constitutional AI safety alignment"');
65
+ console.log('- "retrieval augmented generation knowledge"');
66
+ }
67
+
68
+ // Run the function
69
+ generateEmbeddings().catch(console.error);