Spaces:

Agents-MCP-Hackathon
/

KnowledgeBridge

Running

App Files Files Community

fazeel007 commited on Jun 10

Commit

c55df02

1 Parent(s): b5baf9e

generate embeddings

Browse files

Files changed (1) hide show

generate_embeddings.js +69 -0

generate_embeddings.js ADDED Viewed

	@@ -0,0 +1,69 @@

+// Quick script to generate embeddings for existing documents
+import fs from 'fs';
+async function generateEmbeddings() {
+  // Document contents to generate embeddings for
+  const documents = [
+    {
+      id: 1,
+      title: "Attention Is All You Need",
+      content: "The Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality."
+    },
+    {
+      id: 2,
+      title: "GPT-4 Technical Report",
+      content: "We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks."
+    },
+    {
+      id: 3,
+      title: "Constitutional AI",
+      content: "As AI systems become more capable, we would like to enlist their help to supervise other AI systems. We experiment with methods for training a harmless AI assistant through self-improvement, without any human labels identifying harmful outputs."
+    },
+    {
+      id: 4,
+      title: "Retrieval-Augmented Generation",
+      content: "Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited."
+    }
+  ];
+  console.log('Generating embeddings for documents...');
+  for (const doc of documents) {
+    try {
+      console.log(`Processing document ${doc.id}: ${doc.title}`);
+      // Generate embedding
+      const response = await fetch('http://localhost:5000/api/embeddings', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ input: doc.content })
+      });
+      if (response.ok) {
+        const result = await response.json();
+        console.log(`✅ Generated embedding for ${doc.title} (${result.data[0].embedding.length} dimensions)`);
+        // Note: In a real implementation, you would update the database here
+        // For now, just log success
+      } else {
+        console.log(`❌ Failed to generate embedding for ${doc.title}`);
+      }
+      // Small delay to avoid overwhelming the API
+      await new Promise(resolve => setTimeout(resolve, 1000));
+    } catch (error) {
+      console.log(`❌ Error processing ${doc.title}: ${error.message}`);
+    }
+  }
+  console.log('✅ Embedding generation completed!');
+  console.log('\n🔍 Now you can test vector search with these queries:');
+  console.log('- "attention mechanism transformer architecture"');
+  console.log('- "multimodal language model GPT"');
+  console.log('- "constitutional AI safety alignment"');
+  console.log('- "retrieval augmented generation knowledge"');
+}
+// Run the function
+generateEmbeddings().catch(console.error);