import React, { useState, useEffect } from 'react';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
import { Button } from '@/components/ui/button';
import { Badge } from '@/components/ui/badge';
import { Progress } from '@/components/ui/progress';
import {
Search,
Database,
Brain,
ArrowRight,
FileText,
Zap,
GitBranch,
Target,
Layers,
RotateCcw
} from 'lucide-react';
interface FlowStep {
id: string;
title: string;
description: string;
icon: React.ReactNode;
details: string[];
tech: string[];
active: boolean;
completed: boolean;
}
const SystemFlowDiagram: React.FC = () => {
const [currentStep, setCurrentStep] = useState(0);
const [isPlaying, setIsPlaying] = useState(false);
const [progress, setProgress] = useState(0);
const [userQuery, setUserQuery] = useState("How does semantic search work?");
// Generate realistic embedding values for demonstration
const generateEmbedding = (text: string) => {
const seed = text.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
const random = (s: number) => {
const x = Math.sin(s) * 10000;
return x - Math.floor(x);
};
return Array.from({length: 8}, (_, i) =>
Number((random(seed + i) * 2 - 1).toFixed(3))
);
};
const flowSteps: FlowStep[] = [
{
id: 'input',
title: '1. Document Upload / Query Input',
description: 'Upload documents or enter search query',
icon: ,
details: [
'Upload PDFs, images, text files with drag-and-drop',
'OCR processing via Modal for images and PDFs',
`Search query: "${userQuery}"`,
'Real-time file validation and error handling'
],
tech: ['Modal OCR', 'Multer Upload', 'File Validation', 'React'],
active: false,
completed: false
},
{
id: 'processing',
title: '2. Document Processing',
description: 'Extract text and generate embeddings',
icon: ,
details: [
'Modal serverless functions for heavy processing',
'PyPDF2 for PDF text extraction',
'Tesseract OCR for images',
'Nebius AI embedding generation (BAAI/bge-en-icl)',
'SQLite storage with metadata tracking'
],
tech: ['Modal', 'PyPDF2', 'Tesseract', 'Nebius AI', 'SQLite'],
active: false,
completed: false
},
{
id: 'indexing',
title: '3. Vector Index Building',
description: 'Build FAISS vector index for semantic search',
icon: ,
details: [
'FAISS IndexFlatIP for cosine similarity',
'Sentence Transformers (all-MiniLM-L6-v2)',
'Modal distributed computing for large datasets',
'Persistent storage with fallback paths',
'Batch processing optimization'
],
tech: ['FAISS', 'Modal', 'SentenceTransformers', 'Vector Storage'],
active: false,
completed: false
},
{
id: 'enhancement',
title: '4. AI Query Enhancement',
description: 'Enhance query with AI (optional)',
icon: ,
details: [
`Nebius AI analyzes "${userQuery}"`,
'DeepSeek-R1-0528 model provides query improvements',
'Suggests keywords and alternative phrasings',
'Intent detection and query expansion'
],
tech: ['Nebius AI', 'DeepSeek-R1-0528', 'Query Analysis'],
active: false,
completed: false
},
{
id: 'search',
title: '5. Hybrid Multi-Source Search',
description: 'Search across vector index and external sources',
icon: ,
details: [
'Vector similarity search in uploaded documents',
'Parallel search across GitHub, Wikipedia, ArXiv',
'Smart query routing based on content type',
'Relevance scoring and result ranking'
],
tech: ['Vector Search', 'GitHub API', 'Wikipedia API', 'ArXiv API'],
active: false,
completed: false
},
{
id: 'validation',
title: '6. URL Validation & Filtering',
description: 'Validate and verify result URLs',
icon: ,
details: [
'Smart URL validation with ArXiv format checking',
'Content verification to detect error pages',
'Concurrent processing with rate limits',
'Trusted domain fast-path for reliable sources'
],
tech: ['URL Validation', 'Content Verification', 'Rate Limiting'],
active: false,
completed: false
},
{
id: 'analysis',
title: '7. AI-Powered Analysis',
description: 'Generate insights and explanations',
icon: ,
details: [
'Nebius DeepSeek-R1 analyzes document content',
'Research synthesis across multiple sources',
'Audio-friendly explanations generation',
'Knowledge graph relationship mapping'
],
tech: ['Nebius AI', 'DeepSeek-R1', 'Research Synthesis'],
active: false,
completed: false
},
{
id: 'display',
title: '8. Results & Visualization',
description: 'Present results with interactive features',
icon: ,
details: [
'Interactive knowledge graph visualization',
'Relevance-scored result cards with snippets',
'Citation tracking and source attribution',
'Real-time AI explanations and insights'
],
tech: ['D3.js', 'React', 'Knowledge Graph', 'UI Components'],
active: false,
completed: false
}
];
const [steps, setSteps] = useState(flowSteps);
useEffect(() => {
if (isPlaying) {
const interval = setInterval(() => {
setCurrentStep((prev) => {
if (prev < steps.length - 1) {
return prev + 1;
} else {
setIsPlaying(false);
return prev;
}
});
}, 2000);
return () => clearInterval(interval);
}
}, [isPlaying, steps.length]);
useEffect(() => {
setSteps(prevSteps =>
prevSteps.map((step, index) => ({
...step,
active: index === currentStep,
completed: index < currentStep
}))
);
setProgress(((currentStep + 1) / steps.length) * 100);
}, [currentStep, steps.length]);
const resetAnimation = () => {
setCurrentStep(0);
setIsPlaying(false);
setProgress(0);
};
const playAnimation = () => {
if (currentStep === steps.length - 1) {
resetAnimation();
}
setIsPlaying(true);
};
return (
{/* Header */}
KnowledgeBridge System Flow
How your query becomes intelligent multi-source research with AI enhancement
{/* Query Input */}
setUserQuery(e.target.value)}
className="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-800 dark:border-gray-600 dark:text-white"
placeholder="Enter your query to see the process"
/>
{/* Controls */}
{/* Progress Bar */}
Step {currentStep + 1} of {steps.length}
{/* Flow Diagram */}
{steps.map((step, index) => (
{/* Connection Arrow */}
{index < steps.length - 1 && (
)}
{/* Step Card */}
setCurrentStep(index)}
>
{step.icon}
{step.completed && (
)}
{step.title}
{step.description}
{/* Technology Tags */}
{step.tech.map((tech) => (
{tech}
))}
{/* Details (shown when active) */}
{step.active && (
Process Details:
{step.details.map((detail, i) => (
-
•
{detail}
))}
{/* Special visualization for embeddings step */}
{step.id === 'embeddings' && (
Vector: [{generateEmbedding(userQuery).slice(0, 4).join(', ')}, ...]
Dimensions: 1536 | Magnitude: {Math.sqrt(generateEmbedding(userQuery).reduce((sum, val) => sum + val * val, 0)).toFixed(3)}
)}
{/* Special visualization for validation step */}
{step.id === 'validation' && (
{[
{ doc: 'github.com/research/ai', status: 'valid' },
{ doc: 'arxiv.org/abs/2024.12345', status: 'verified' },
{ doc: 'invalid-url.broken', status: 'filtered' }
].map((result, i) => (
{result.doc}
{result.status}
))}
)}
)}
))}
{/* Live Embedding Demo */}
Live Embedding Calculator
{/* Text to Vector */}
Text → Vector Conversion
setUserQuery(e.target.value)}
className="w-full font-mono text-sm bg-gray-100 dark:bg-gray-700 p-2 rounded border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-2 focus:ring-purple-500 dark:text-gray-100"
placeholder="Enter text to generate embeddings..."
/>
Embedding (first 8 dims):
[{generateEmbedding(userQuery).join(', ')}]
Vector magnitude: {Math.sqrt(generateEmbedding(userQuery).reduce((sum, val) => sum + val * val, 0)).toFixed(3)}
{/* Similarity Calculations */}
Similarity Scores
{[
{ doc: 'AI Research Paper', vector: [0.2, 0.8, -0.1, 0.5, 0.3, -0.4, 0.7, 0.1] },
{ doc: 'GitHub Repository', vector: [0.1, 0.6, 0.2, -0.3, 0.8, 0.4, -0.2, 0.5] },
{ doc: 'Wikipedia Article', vector: [-0.3, 0.4, 0.7, 0.2, -0.1, 0.6, 0.3, -0.5] }
].map((doc, i) => {
const queryVec = generateEmbedding(userQuery);
const dotProduct = queryVec.reduce((sum, val, idx) => sum + val * doc.vector[idx], 0);
const queryMag = Math.sqrt(queryVec.reduce((sum, val) => sum + val * val, 0));
const docMag = Math.sqrt(doc.vector.reduce((sum, val) => sum + val * val, 0));
const similarity = dotProduct / (queryMag * docMag);
return (
{doc.doc}
0.3 ? 'bg-green-400' : similarity > 0.1 ? 'bg-yellow-400' : 'bg-gray-300'}`}
style={{width: `${Math.max(20, Math.abs(similarity) * 60)}px`}}>
{similarity.toFixed(2)}
);
})}
{/* Key Concepts */}
Embeddings
Nebius BAAI/bge-en-icl generates semantic vectors. Similar concepts have similar vector values.
"AI research" → [0.1, 0.3, 0.8, ...]
"machine learning" → [0.2, 0.4, 0.7, ...]
Vector Search
Multi-source search across GitHub, ArXiv, Wikipedia with smart URL validation.
GitHub API: repositories + code
ArXiv API: academic papers
Wikipedia: authoritative content
AI Pipeline
KnowledgeBridge combines multi-source search with Nebius AI for intelligent research synthesis.
Query → Enhance → Search → Validate → Analyze
);
};
export default SystemFlowDiagram;