import React, { useState, useEffect } from 'react'; import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; import { Button } from '@/components/ui/button'; import { Badge } from '@/components/ui/badge'; import { Progress } from '@/components/ui/progress'; import { Search, Database, Brain, ArrowRight, FileText, Zap, GitBranch, Target, Layers, RotateCcw } from 'lucide-react'; interface FlowStep { id: string; title: string; description: string; icon: React.ReactNode; details: string[]; tech: string[]; active: boolean; completed: boolean; } const SystemFlowDiagram: React.FC = () => { const [currentStep, setCurrentStep] = useState(0); const [isPlaying, setIsPlaying] = useState(false); const [progress, setProgress] = useState(0); const [userQuery, setUserQuery] = useState("How does semantic search work?"); // Generate realistic embedding values for demonstration const generateEmbedding = (text: string) => { const seed = text.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0); const random = (s: number) => { const x = Math.sin(s) * 10000; return x - Math.floor(x); }; return Array.from({length: 8}, (_, i) => Number((random(seed + i) * 2 - 1).toFixed(3)) ); }; const flowSteps: FlowStep[] = [ { id: 'input', title: '1. Document Upload / Query Input', description: 'Upload documents or enter search query', icon: , details: [ 'Upload PDFs, images, text files with drag-and-drop', 'OCR processing via Modal for images and PDFs', `Search query: "${userQuery}"`, 'Real-time file validation and error handling' ], tech: ['Modal OCR', 'Multer Upload', 'File Validation', 'React'], active: false, completed: false }, { id: 'processing', title: '2. Document Processing', description: 'Extract text and generate embeddings', icon: , details: [ 'Modal serverless functions for heavy processing', 'PyPDF2 for PDF text extraction', 'Tesseract OCR for images', 'Nebius AI embedding generation (BAAI/bge-en-icl)', 'SQLite storage with metadata tracking' ], tech: ['Modal', 'PyPDF2', 'Tesseract', 'Nebius AI', 'SQLite'], active: false, completed: false }, { id: 'indexing', title: '3. Vector Index Building', description: 'Build FAISS vector index for semantic search', icon: , details: [ 'FAISS IndexFlatIP for cosine similarity', 'Sentence Transformers (all-MiniLM-L6-v2)', 'Modal distributed computing for large datasets', 'Persistent storage with fallback paths', 'Batch processing optimization' ], tech: ['FAISS', 'Modal', 'SentenceTransformers', 'Vector Storage'], active: false, completed: false }, { id: 'enhancement', title: '4. AI Query Enhancement', description: 'Enhance query with AI (optional)', icon: , details: [ `Nebius AI analyzes "${userQuery}"`, 'DeepSeek-R1-0528 model provides query improvements', 'Suggests keywords and alternative phrasings', 'Intent detection and query expansion' ], tech: ['Nebius AI', 'DeepSeek-R1-0528', 'Query Analysis'], active: false, completed: false }, { id: 'search', title: '5. Hybrid Multi-Source Search', description: 'Search across vector index and external sources', icon: , details: [ 'Vector similarity search in uploaded documents', 'Parallel search across GitHub, Wikipedia, ArXiv', 'Smart query routing based on content type', 'Relevance scoring and result ranking' ], tech: ['Vector Search', 'GitHub API', 'Wikipedia API', 'ArXiv API'], active: false, completed: false }, { id: 'validation', title: '6. URL Validation & Filtering', description: 'Validate and verify result URLs', icon: , details: [ 'Smart URL validation with ArXiv format checking', 'Content verification to detect error pages', 'Concurrent processing with rate limits', 'Trusted domain fast-path for reliable sources' ], tech: ['URL Validation', 'Content Verification', 'Rate Limiting'], active: false, completed: false }, { id: 'analysis', title: '7. AI-Powered Analysis', description: 'Generate insights and explanations', icon: , details: [ 'Nebius DeepSeek-R1 analyzes document content', 'Research synthesis across multiple sources', 'Audio-friendly explanations generation', 'Knowledge graph relationship mapping' ], tech: ['Nebius AI', 'DeepSeek-R1', 'Research Synthesis'], active: false, completed: false }, { id: 'display', title: '8. Results & Visualization', description: 'Present results with interactive features', icon: , details: [ 'Interactive knowledge graph visualization', 'Relevance-scored result cards with snippets', 'Citation tracking and source attribution', 'Real-time AI explanations and insights' ], tech: ['D3.js', 'React', 'Knowledge Graph', 'UI Components'], active: false, completed: false } ]; const [steps, setSteps] = useState(flowSteps); useEffect(() => { if (isPlaying) { const interval = setInterval(() => { setCurrentStep((prev) => { if (prev < steps.length - 1) { return prev + 1; } else { setIsPlaying(false); return prev; } }); }, 2000); return () => clearInterval(interval); } }, [isPlaying, steps.length]); useEffect(() => { setSteps(prevSteps => prevSteps.map((step, index) => ({ ...step, active: index === currentStep, completed: index < currentStep })) ); setProgress(((currentStep + 1) / steps.length) * 100); }, [currentStep, steps.length]); const resetAnimation = () => { setCurrentStep(0); setIsPlaying(false); setProgress(0); }; const playAnimation = () => { if (currentStep === steps.length - 1) { resetAnimation(); } setIsPlaying(true); }; return (
{/* Header */}

KnowledgeBridge System Flow

How your query becomes intelligent multi-source research with AI enhancement

{/* Query Input */}
setUserQuery(e.target.value)} className="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-800 dark:border-gray-600 dark:text-white" placeholder="Enter your query to see the process" />
{/* Controls */}
{/* Progress Bar */}

Step {currentStep + 1} of {steps.length}

{/* Flow Diagram */}
{steps.map((step, index) => (
{/* Connection Arrow */} {index < steps.length - 1 && (
)} {/* Step Card */} setCurrentStep(index)} >
{step.icon}
{step.completed && (
)}
{step.title}

{step.description}

{/* Technology Tags */}
{step.tech.map((tech) => ( {tech} ))}
{/* Details (shown when active) */} {step.active && (

Process Details:

    {step.details.map((detail, i) => (
  • {detail}
  • ))}
{/* Special visualization for embeddings step */} {step.id === 'embeddings' && (
Vector: [{generateEmbedding(userQuery).slice(0, 4).join(', ')}, ...]
Dimensions: 1536 | Magnitude: {Math.sqrt(generateEmbedding(userQuery).reduce((sum, val) => sum + val * val, 0)).toFixed(3)}
)} {/* Special visualization for validation step */} {step.id === 'validation' && (
{[ { doc: 'github.com/research/ai', status: 'valid' }, { doc: 'arxiv.org/abs/2024.12345', status: 'verified' }, { doc: 'invalid-url.broken', status: 'filtered' } ].map((result, i) => (
{result.doc} {result.status}
))}
)}
)}
))}
{/* Live Embedding Demo */}

Live Embedding Calculator

{/* Text to Vector */}

Text → Vector Conversion

setUserQuery(e.target.value)} className="w-full font-mono text-sm bg-gray-100 dark:bg-gray-700 p-2 rounded border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-2 focus:ring-purple-500 dark:text-gray-100" placeholder="Enter text to generate embeddings..." />
Embedding (first 8 dims):
[{generateEmbedding(userQuery).join(', ')}]
Vector magnitude: {Math.sqrt(generateEmbedding(userQuery).reduce((sum, val) => sum + val * val, 0)).toFixed(3)}
{/* Similarity Calculations */}

Similarity Scores

{[ { doc: 'AI Research Paper', vector: [0.2, 0.8, -0.1, 0.5, 0.3, -0.4, 0.7, 0.1] }, { doc: 'GitHub Repository', vector: [0.1, 0.6, 0.2, -0.3, 0.8, 0.4, -0.2, 0.5] }, { doc: 'Wikipedia Article', vector: [-0.3, 0.4, 0.7, 0.2, -0.1, 0.6, 0.3, -0.5] } ].map((doc, i) => { const queryVec = generateEmbedding(userQuery); const dotProduct = queryVec.reduce((sum, val, idx) => sum + val * doc.vector[idx], 0); const queryMag = Math.sqrt(queryVec.reduce((sum, val) => sum + val * val, 0)); const docMag = Math.sqrt(doc.vector.reduce((sum, val) => sum + val * val, 0)); const similarity = dotProduct / (queryMag * docMag); return (
{doc.doc}
0.3 ? 'bg-green-400' : similarity > 0.1 ? 'bg-yellow-400' : 'bg-gray-300'}`} style={{width: `${Math.max(20, Math.abs(similarity) * 60)}px`}}>
{similarity.toFixed(2)}
); })}
{/* Key Concepts */}
Embeddings

Nebius BAAI/bge-en-icl generates semantic vectors. Similar concepts have similar vector values.

"AI research" → [0.1, 0.3, 0.8, ...]
"machine learning" → [0.2, 0.4, 0.7, ...]
Vector Search

Multi-source search across GitHub, ArXiv, Wikipedia with smart URL validation.

GitHub API: repositories + code
ArXiv API: academic papers
Wikipedia: authoritative content
AI Pipeline

KnowledgeBridge combines multi-source search with Nebius AI for intelligent research synthesis.

Query → Enhance → Search → Validate → Analyze
); }; export default SystemFlowDiagram;