|
|
|
import fs from 'fs'; |
|
|
|
async function generateEmbeddings() { |
|
|
|
const documents = [ |
|
{ |
|
id: 1, |
|
title: "Attention Is All You Need", |
|
content: "The Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality." |
|
}, |
|
{ |
|
id: 2, |
|
title: "GPT-4 Technical Report", |
|
content: "We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks." |
|
}, |
|
{ |
|
id: 3, |
|
title: "Constitutional AI", |
|
content: "As AI systems become more capable, we would like to enlist their help to supervise other AI systems. We experiment with methods for training a harmless AI assistant through self-improvement, without any human labels identifying harmful outputs." |
|
}, |
|
{ |
|
id: 4, |
|
title: "Retrieval-Augmented Generation", |
|
content: "Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited." |
|
} |
|
]; |
|
|
|
console.log('Generating embeddings for documents...'); |
|
|
|
for (const doc of documents) { |
|
try { |
|
console.log(`Processing document ${doc.id}: ${doc.title}`); |
|
|
|
|
|
const response = await fetch('http://localhost:5000/api/embeddings', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ input: doc.content }) |
|
}); |
|
|
|
if (response.ok) { |
|
const result = await response.json(); |
|
console.log(`β
Generated embedding for ${doc.title} (${result.data[0].embedding.length} dimensions)`); |
|
|
|
|
|
|
|
} else { |
|
console.log(`β Failed to generate embedding for ${doc.title}`); |
|
} |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000)); |
|
|
|
} catch (error) { |
|
console.log(`β Error processing ${doc.title}: ${error.message}`); |
|
} |
|
} |
|
|
|
console.log('β
Embedding generation completed!'); |
|
console.log('\nπ Now you can test vector search with these queries:'); |
|
console.log('- "attention mechanism transformer architecture"'); |
|
console.log('- "multimodal language model GPT"'); |
|
console.log('- "constitutional AI safety alignment"'); |
|
console.log('- "retrieval augmented generation knowledge"'); |
|
} |
|
|
|
|
|
generateEmbeddings().catch(console.error); |