""" Test suite for Knowledge Base Browser Gradio Component """ import pytest import json import tempfile from pathlib import Path from unittest.mock import Mock, patch from kb_browser.retriever import KnowledgeRetriever from kb_browser import KnowledgeBrowser class TestKnowledgeRetriever: """Test cases for the KnowledgeRetriever class""" def setup_method(self): """Setup test environment""" self.temp_dir = tempfile.mkdtemp() self.retriever = KnowledgeRetriever(index_path=self.temp_dir) def test_initialization(self): """Test retriever initialization""" assert self.retriever is not None assert len(self.retriever.documents) > 0 assert self.retriever.index_path == Path(self.temp_dir) def test_text_search_functionality(self): """Test text-based search fallback""" results = self.retriever.search( query="retrieval augmented generation", search_type="keyword", k=3 ) assert "documents" in results assert "search_time" in results assert "query" in results assert "total_count" in results assert results["query"] == "retrieval augmented generation" assert results["total_count"] >= 0 assert len(results["documents"]) <= 3 if results["documents"]: doc = results["documents"][0] assert "id" in doc assert "title" in doc assert "content" in doc assert "snippet" in doc assert "relevance_score" in doc def test_semantic_search_with_openai(self): """Test semantic search with OpenAI embeddings""" # This will use the actual OpenAI API if available results = self.retriever.search( query="vector databases", search_type="semantic", k=2 ) assert results["total_count"] >= 0 assert len(results["documents"]) <= 2 def test_snippet_extraction(self): """Test snippet extraction functionality""" content = "This is a long document about retrieval augmented generation and vector databases." query = "retrieval" snippet = self.retriever._extract_snippet(content, query, max_length=50) assert "retrieval" in snippet.lower() assert len(snippet) <= 60 # Accounting for ellipsis def test_text_scoring(self): """Test text relevance scoring""" doc = { "title": "Retrieval Augmented Generation", "content": "This document discusses RAG and retrieval methods." } score = self.retriever._calculate_text_score(doc, "retrieval") assert 0 <= score <= 1 assert score > 0 # Should match the word "retrieval" class TestKnowledgeBrowser: """Test cases for the KnowledgeBrowser Gradio component""" def setup_method(self): """Setup test environment""" self.temp_dir = tempfile.mkdtemp() self.kb_browser = KnowledgeBrowser(index_path=self.temp_dir) def test_component_initialization(self): """Test component initialization""" assert self.kb_browser is not None assert self.kb_browser.query == "" assert self.kb_browser.results == [] assert self.kb_browser.search_type == "semantic" assert self.kb_browser.max_results == 10 def test_preprocess_method(self): """Test payload preprocessing""" payload = { "query": "test query", "search_type": "hybrid", "max_results": 5 } processed = self.kb_browser.preprocess(payload) assert processed["query"] == "test query" assert processed["search_type"] == "hybrid" assert processed["max_results"] == 5 assert "filters" in processed def test_postprocess_method(self): """Test value postprocessing""" value = { "query": "test query", "results": [{"title": "Test Doc", "snippet": "Test content"}], "search_type": "semantic", "total_count": 1, "search_time": 0.1 } processed = self.kb_browser.postprocess(value) assert processed["query"] == "test query" assert len(processed["results"]) == 1 assert processed["search_type"] == "semantic" assert processed["total_count"] == 1 assert processed["search_time"] == 0.1 def test_api_info(self): """Test API information structure""" api_info = self.kb_browser.api_info() assert "info" in api_info assert "type" in api_info["info"] assert "properties" in api_info["info"] properties = api_info["info"]["properties"] assert "query" in properties assert "results" in properties assert "search_type" in properties def test_example_inputs(self): """Test example inputs""" examples = self.kb_browser.example_inputs() assert "query" in examples assert "search_type" in examples assert "max_results" in examples assert examples["query"] == "retrieval augmented generation" assert examples["search_type"] == "semantic" assert examples["max_results"] == 5 def test_search_method(self): """Test component search functionality""" results = self.kb_browser.search( query="vector search", search_type="semantic", max_results=3 ) assert "query" in results assert "results" in results assert "search_type" in results assert "total_count" in results assert "search_time" in results assert results["query"] == "vector search" assert results["search_type"] == "semantic" assert len(results["results"]) <= 3 class TestIntegration: """Integration tests for the complete system""" def test_end_to_end_search(self): """Test complete search workflow""" kb_browser = KnowledgeBrowser() # Perform search results = kb_browser.search("LlamaIndex", search_type="semantic", max_results=2) # Verify structure assert isinstance(results, dict) assert "documents" in results or "results" in results assert "search_time" in results # Verify content if results exist documents = results.get("documents") or results.get("results", []) if documents: doc = documents[0] assert "title" in doc assert "snippet" in doc assert "relevance_score" in doc @patch('kb_browser.retriever.LLAMA_INDEX_AVAILABLE', False) def test_fallback_when_llama_index_unavailable(self): """Test system falls back gracefully when LlamaIndex is unavailable""" retriever = KnowledgeRetriever() results = retriever.search("test query", k=1) assert "documents" in results assert results["total_count"] >= 0 def test_sample_data_integrity(): """Test that sample data is properly structured""" retriever = KnowledgeRetriever() for doc in retriever.documents: assert "id" in doc assert "title" in doc assert "content" in doc assert "source" in doc assert "source_type" in doc # Verify required fields are non-empty assert doc["title"].strip() assert doc["content"].strip() assert doc["source"].strip() assert doc["source_type"] in ["pdf", "web", "academic", "code"] def run_manual_tests(): """Run manual tests for development""" print("Running manual tests...") # Test retriever print("\n1. Testing KnowledgeRetriever...") retriever = KnowledgeRetriever() results = retriever.search("RAG", k=2) print(f" Found {results['total_count']} results in {results['search_time']:.3f}s") # Test component print("\n2. Testing KnowledgeBrowser component...") kb_browser = KnowledgeBrowser() search_results = kb_browser.search("vector databases", max_results=1) print(f" Component search returned {len(search_results.get('results', []))} results") # Test API info print("\n3. Testing API info...") api_info = kb_browser.api_info() print(f" API info has {len(api_info['info']['properties'])} properties") print("\nAll manual tests completed successfully!") if __name__ == "__main__": # Run manual tests if called directly run_manual_tests()