|
""" |
|
Test suite for Knowledge Base Browser Gradio Component |
|
""" |
|
|
|
import pytest |
|
import json |
|
import tempfile |
|
from pathlib import Path |
|
from unittest.mock import Mock, patch |
|
|
|
from kb_browser.retriever import KnowledgeRetriever |
|
from kb_browser import KnowledgeBrowser |
|
|
|
|
|
class TestKnowledgeRetriever: |
|
"""Test cases for the KnowledgeRetriever class""" |
|
|
|
def setup_method(self): |
|
"""Setup test environment""" |
|
self.temp_dir = tempfile.mkdtemp() |
|
self.retriever = KnowledgeRetriever(index_path=self.temp_dir) |
|
|
|
def test_initialization(self): |
|
"""Test retriever initialization""" |
|
assert self.retriever is not None |
|
assert len(self.retriever.documents) > 0 |
|
assert self.retriever.index_path == Path(self.temp_dir) |
|
|
|
def test_text_search_functionality(self): |
|
"""Test text-based search fallback""" |
|
results = self.retriever.search( |
|
query="retrieval augmented generation", |
|
search_type="keyword", |
|
k=3 |
|
) |
|
|
|
assert "documents" in results |
|
assert "search_time" in results |
|
assert "query" in results |
|
assert "total_count" in results |
|
|
|
assert results["query"] == "retrieval augmented generation" |
|
assert results["total_count"] >= 0 |
|
assert len(results["documents"]) <= 3 |
|
|
|
if results["documents"]: |
|
doc = results["documents"][0] |
|
assert "id" in doc |
|
assert "title" in doc |
|
assert "content" in doc |
|
assert "snippet" in doc |
|
assert "relevance_score" in doc |
|
|
|
def test_semantic_search_with_openai(self): |
|
"""Test semantic search with OpenAI embeddings""" |
|
|
|
results = self.retriever.search( |
|
query="vector databases", |
|
search_type="semantic", |
|
k=2 |
|
) |
|
|
|
assert results["total_count"] >= 0 |
|
assert len(results["documents"]) <= 2 |
|
|
|
def test_snippet_extraction(self): |
|
"""Test snippet extraction functionality""" |
|
content = "This is a long document about retrieval augmented generation and vector databases." |
|
query = "retrieval" |
|
|
|
snippet = self.retriever._extract_snippet(content, query, max_length=50) |
|
assert "retrieval" in snippet.lower() |
|
assert len(snippet) <= 60 |
|
|
|
def test_text_scoring(self): |
|
"""Test text relevance scoring""" |
|
doc = { |
|
"title": "Retrieval Augmented Generation", |
|
"content": "This document discusses RAG and retrieval methods." |
|
} |
|
|
|
score = self.retriever._calculate_text_score(doc, "retrieval") |
|
assert 0 <= score <= 1 |
|
assert score > 0 |
|
|
|
|
|
class TestKnowledgeBrowser: |
|
"""Test cases for the KnowledgeBrowser Gradio component""" |
|
|
|
def setup_method(self): |
|
"""Setup test environment""" |
|
self.temp_dir = tempfile.mkdtemp() |
|
self.kb_browser = KnowledgeBrowser(index_path=self.temp_dir) |
|
|
|
def test_component_initialization(self): |
|
"""Test component initialization""" |
|
assert self.kb_browser is not None |
|
assert self.kb_browser.query == "" |
|
assert self.kb_browser.results == [] |
|
assert self.kb_browser.search_type == "semantic" |
|
assert self.kb_browser.max_results == 10 |
|
|
|
def test_preprocess_method(self): |
|
"""Test payload preprocessing""" |
|
payload = { |
|
"query": "test query", |
|
"search_type": "hybrid", |
|
"max_results": 5 |
|
} |
|
|
|
processed = self.kb_browser.preprocess(payload) |
|
|
|
assert processed["query"] == "test query" |
|
assert processed["search_type"] == "hybrid" |
|
assert processed["max_results"] == 5 |
|
assert "filters" in processed |
|
|
|
def test_postprocess_method(self): |
|
"""Test value postprocessing""" |
|
value = { |
|
"query": "test query", |
|
"results": [{"title": "Test Doc", "snippet": "Test content"}], |
|
"search_type": "semantic", |
|
"total_count": 1, |
|
"search_time": 0.1 |
|
} |
|
|
|
processed = self.kb_browser.postprocess(value) |
|
|
|
assert processed["query"] == "test query" |
|
assert len(processed["results"]) == 1 |
|
assert processed["search_type"] == "semantic" |
|
assert processed["total_count"] == 1 |
|
assert processed["search_time"] == 0.1 |
|
|
|
def test_api_info(self): |
|
"""Test API information structure""" |
|
api_info = self.kb_browser.api_info() |
|
|
|
assert "info" in api_info |
|
assert "type" in api_info["info"] |
|
assert "properties" in api_info["info"] |
|
|
|
properties = api_info["info"]["properties"] |
|
assert "query" in properties |
|
assert "results" in properties |
|
assert "search_type" in properties |
|
|
|
def test_example_inputs(self): |
|
"""Test example inputs""" |
|
examples = self.kb_browser.example_inputs() |
|
|
|
assert "query" in examples |
|
assert "search_type" in examples |
|
assert "max_results" in examples |
|
|
|
assert examples["query"] == "retrieval augmented generation" |
|
assert examples["search_type"] == "semantic" |
|
assert examples["max_results"] == 5 |
|
|
|
def test_search_method(self): |
|
"""Test component search functionality""" |
|
results = self.kb_browser.search( |
|
query="vector search", |
|
search_type="semantic", |
|
max_results=3 |
|
) |
|
|
|
assert "query" in results |
|
assert "results" in results |
|
assert "search_type" in results |
|
assert "total_count" in results |
|
assert "search_time" in results |
|
|
|
assert results["query"] == "vector search" |
|
assert results["search_type"] == "semantic" |
|
assert len(results["results"]) <= 3 |
|
|
|
|
|
class TestIntegration: |
|
"""Integration tests for the complete system""" |
|
|
|
def test_end_to_end_search(self): |
|
"""Test complete search workflow""" |
|
kb_browser = KnowledgeBrowser() |
|
|
|
|
|
results = kb_browser.search("LlamaIndex", search_type="semantic", max_results=2) |
|
|
|
|
|
assert isinstance(results, dict) |
|
assert "documents" in results or "results" in results |
|
assert "search_time" in results |
|
|
|
|
|
documents = results.get("documents") or results.get("results", []) |
|
if documents: |
|
doc = documents[0] |
|
assert "title" in doc |
|
assert "snippet" in doc |
|
assert "relevance_score" in doc |
|
|
|
@patch('kb_browser.retriever.LLAMA_INDEX_AVAILABLE', False) |
|
def test_fallback_when_llama_index_unavailable(self): |
|
"""Test system falls back gracefully when LlamaIndex is unavailable""" |
|
retriever = KnowledgeRetriever() |
|
results = retriever.search("test query", k=1) |
|
|
|
assert "documents" in results |
|
assert results["total_count"] >= 0 |
|
|
|
|
|
def test_sample_data_integrity(): |
|
"""Test that sample data is properly structured""" |
|
retriever = KnowledgeRetriever() |
|
|
|
for doc in retriever.documents: |
|
assert "id" in doc |
|
assert "title" in doc |
|
assert "content" in doc |
|
assert "source" in doc |
|
assert "source_type" in doc |
|
|
|
|
|
assert doc["title"].strip() |
|
assert doc["content"].strip() |
|
assert doc["source"].strip() |
|
assert doc["source_type"] in ["pdf", "web", "academic", "code"] |
|
|
|
|
|
def run_manual_tests(): |
|
"""Run manual tests for development""" |
|
print("Running manual tests...") |
|
|
|
|
|
print("\n1. Testing KnowledgeRetriever...") |
|
retriever = KnowledgeRetriever() |
|
results = retriever.search("RAG", k=2) |
|
print(f" Found {results['total_count']} results in {results['search_time']:.3f}s") |
|
|
|
|
|
print("\n2. Testing KnowledgeBrowser component...") |
|
kb_browser = KnowledgeBrowser() |
|
search_results = kb_browser.search("vector databases", max_results=1) |
|
print(f" Component search returned {len(search_results.get('results', []))} results") |
|
|
|
|
|
print("\n3. Testing API info...") |
|
api_info = kb_browser.api_info() |
|
print(f" API info has {len(api_info['info']['properties'])} properties") |
|
|
|
print("\nAll manual tests completed successfully!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
run_manual_tests() |