Markit_v2 / src /ui /components /query_ranker.py
AnseMin's picture
Refactor UI components for modular architecture and enhance functionality
6ea41ec
"""Query ranker UI component and logic."""
import gradio as gr
import logging
from src.core.logging_config import get_logger
from src.rag.vector_store import vector_store_manager
from src.rag import document_ingestion_service
logger = get_logger(__name__)
def handle_query_search(query, method, k_value):
"""Handle query search and return formatted results."""
if not query or not query.strip():
return """
<div class="ranker-container">
<div class="ranker-placeholder">
<h3>πŸ” Query Ranker</h3>
<p>Enter a search query to find relevant document chunks with similarity scores.</p>
</div>
</div>
"""
try:
logger.info(f"Query search: '{query[:50]}...' using method: {method}")
# Get results based on method
results = []
if method == "similarity":
retriever = vector_store_manager.get_retriever("similarity", {"k": k_value})
docs = retriever.invoke(query)
# Try to get actual similarity scores
try:
vector_store = vector_store_manager.get_vector_store()
if hasattr(vector_store, 'similarity_search_with_score'):
docs_with_scores = vector_store.similarity_search_with_score(query, k=k_value)
for i, (doc, score) in enumerate(docs_with_scores):
similarity_score = max(0, 1 - score) if score is not None else 0.8
results.append(_format_ranker_result(doc, similarity_score, i + 1))
else:
# Fallback without scores
for i, doc in enumerate(docs):
score = 0.85 - (i * 0.05)
results.append(_format_ranker_result(doc, score, i + 1))
except Exception as e:
logger.warning(f"Could not get similarity scores: {e}")
for i, doc in enumerate(docs):
score = 0.85 - (i * 0.05)
results.append(_format_ranker_result(doc, score, i + 1))
elif method == "mmr":
retriever = vector_store_manager.get_retriever("mmr", {"k": k_value, "fetch_k": k_value * 2, "lambda_mult": 0.5})
docs = retriever.invoke(query)
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for MMR
elif method == "bm25":
retriever = vector_store_manager.get_bm25_retriever(k=k_value)
docs = retriever.invoke(query)
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for BM25
elif method == "hybrid":
retriever = vector_store_manager.get_hybrid_retriever(k=k_value, semantic_weight=0.7, keyword_weight=0.3)
docs = retriever.invoke(query)
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for hybrid
logger.info(f"Retrieved {len(results)} results for query using {method}")
return _format_ranker_results_html(results, query, method)
except Exception as e:
error_msg = f"Error during search: {str(e)}"
logger.error(error_msg)
return f"""
<div class="ranker-container">
<div class="ranker-error">
<h3>❌ Search Error</h3>
<p>{error_msg}</p>
<p class="error-hint">Make sure documents are uploaded and the system is ready.</p>
</div>
</div>
"""
def _format_ranker_result(doc, score, rank):
"""Format a single search result."""
# Extract metadata
metadata = doc.metadata
source = metadata.get("source", "Unknown")
page = metadata.get("page", "N/A")
chunk_id = metadata.get("chunk_id", "Unknown")
# Calculate content length and create indicator
content_length = len(doc.page_content)
if content_length < 200:
length_indicator = f"πŸ“ {content_length} chars"
elif content_length < 500:
length_indicator = f"πŸ“„ {content_length} chars"
else:
length_indicator = f"πŸ“š {content_length} chars"
# Calculate confidence based on rank (high confidence for top results)
if rank <= 2:
confidence = "High"
confidence_color = "#28a745"
confidence_icon = "πŸ”₯"
elif rank <= 4:
confidence = "Medium"
confidence_color = "#ffc107"
confidence_icon = "⭐"
else:
confidence = "Low"
confidence_color = "#6c757d"
confidence_icon = "πŸ’‘"
result = {
"rank": rank,
"content": doc.page_content,
"source": source,
"page": page,
"chunk_id": chunk_id,
"length_indicator": length_indicator,
"has_score": score is not None,
"confidence": confidence,
"confidence_color": confidence_color,
"confidence_icon": confidence_icon
}
# Only add score if we have a real score (similarity search only)
if score is not None:
result["score"] = round(score, 3)
return result
def _format_ranker_results_html(results, query, method):
"""Format search results as HTML."""
if not results:
return """
<div class="ranker-container">
<div class="ranker-no-results">
<h3>πŸ” No Results Found</h3>
<p>No relevant documents found for your query.</p>
<p class="no-results-hint">Try different keywords or check if documents are uploaded.</p>
</div>
</div>
"""
# Method display names
method_labels = {
"similarity": "🎯 Similarity Search",
"mmr": "πŸ”€ MMR (Diverse)",
"bm25": "πŸ” BM25 (Keywords)",
"hybrid": "πŸ”— Hybrid (Recommended)"
}
method_display = method_labels.get(method, method)
# Start building HTML
html_parts = [f"""
<div class="ranker-container">
<div class="ranker-header">
<div class="ranker-title">
<h3>πŸ” Search Results</h3>
<div class="query-display">"{query}"</div>
</div>
<div class="ranker-meta">
<span class="method-badge">{method_display}</span>
<span class="result-count">{len(results)} results</span>
</div>
</div>
"""]
# Add results
for result in results:
rank_emoji = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"][result["rank"] - 1] if result["rank"] <= 3 else f"#{result['rank']}"
# Escape content for safe HTML inclusion and JavaScript
escaped_content = result['content'].replace('"', '&quot;').replace("'", "&#39;").replace('\n', '\\n')
# Build score info - always show confidence, only show score for similarity search
score_info_parts = [f"""
<span class="confidence-badge" style="color: {result['confidence_color']}">
{result['confidence_icon']} {result['confidence']}
</span>"""]
# Only add score value if we have real scores (similarity search)
if result.get('has_score', False):
score_info_parts.append(f'<span class="score-value">🎯 {result["score"]}</span>')
score_info_html = f"""
<div class="score-info">
{''.join(score_info_parts)}
</div>"""
html_parts.append(f"""
<div class="result-card">
<div class="result-header">
<div class="rank-info">
<span class="rank-badge">{rank_emoji} Rank {result['rank']}</span>
<span class="source-info">πŸ“„ {result['source']}</span>
{f"<span class='page-info'>Page {result['page']}</span>" if result['page'] != 'N/A' else ""}
<span class="length-info">{result['length_indicator']}</span>
</div>
{score_info_html}
</div>
<div class="result-content">
<div class="content-text">{result['content']}</div>
</div>
</div>
""")
html_parts.append("</div>")
return "".join(html_parts)
def get_ranker_status():
"""Get current ranker system status."""
try:
# Get collection info
collection_info = vector_store_manager.get_collection_info()
document_count = collection_info.get("document_count", 0)
# Get available methods
available_methods = ["similarity", "mmr", "bm25", "hybrid"]
# Check if system is ready
ingestion_status = document_ingestion_service.get_ingestion_status()
system_ready = ingestion_status.get('system_ready', False)
status_html = f"""
<div class="status-card">
<div class="status-header">
<h3>πŸ” Query Ranker Status</h3>
<div class="status-indicator {'status-ready' if system_ready else 'status-not-ready'}">
{'🟒 READY' if system_ready else 'πŸ”΄ NOT READY'}
</div>
</div>
<div class="status-grid">
<div class="status-item">
<div class="status-label">Available Documents</div>
<div class="status-value">{document_count}</div>
</div>
<div class="status-item">
<div class="status-label">Retrieval Methods</div>
<div class="status-value">{len(available_methods)}</div>
</div>
<div class="status-item">
<div class="status-label">Vector Store</div>
<div class="status-value">{'Ready' if system_ready else 'Not Ready'}</div>
</div>
</div>
<div class="ranker-methods">
<div class="methods-label">Available Methods:</div>
<div class="methods-list">
<span class="method-tag">🎯 Similarity</span>
<span class="method-tag">πŸ”€ MMR</span>
<span class="method-tag">πŸ” BM25</span>
<span class="method-tag">πŸ”— Hybrid</span>
</div>
</div>
</div>
"""
return status_html
except Exception as e:
error_msg = f"Error getting ranker status: {str(e)}"
logger.error(error_msg)
return f"""
<div class="status-card status-error">
<div class="status-header">
<h3>❌ System Error</h3>
</div>
<p class="error-message">{error_msg}</p>
</div>
"""
def create_query_ranker_tab():
"""Create the query ranker tab UI."""
with gr.TabItem("πŸ” Query Ranker"):
with gr.Column(elem_classes=["ranker-container"]):
# Header
gr.HTML("""
<div class="chat-header">
<h2>πŸ” Query Ranker</h2>
<p>Search and rank document chunks with transparency into retrieval methods</p>
</div>
""")
# Status display
status_display = gr.HTML(value=get_ranker_status())
# Control buttons
with gr.Row(elem_classes=["control-buttons"]):
refresh_ranker_status_btn = gr.Button("πŸ”„ Refresh Status", elem_classes=["control-btn", "btn-refresh"])
clear_results_btn = gr.Button("πŸ—‘οΈ Clear Results", elem_classes=["control-btn", "btn-clear-data"])
# Search controls
with gr.Column(elem_classes=["ranker-controls"]):
with gr.Row(elem_classes=["ranker-input-row"]):
query_input = gr.Textbox(
placeholder="Enter your search query...",
show_label=False,
elem_classes=["ranker-query-input"],
scale=4
)
search_btn = gr.Button("πŸ” Search", elem_classes=["ranker-search-btn"], scale=0)
with gr.Row(elem_classes=["ranker-options-row"]):
method_dropdown = gr.Dropdown(
choices=[
("🎯 Similarity Search", "similarity"),
("πŸ”€ MMR (Diverse)", "mmr"),
("πŸ” BM25 (Keywords)", "bm25"),
("πŸ”— Hybrid (Recommended)", "hybrid")
],
value="hybrid",
label="Retrieval Method",
scale=2
)
k_slider = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="Number of Results",
scale=1
)
# Results display
results_display = gr.HTML(
value=handle_query_search("", "hybrid", 5), # Initial placeholder
elem_classes=["ranker-results-container"]
)
# Event handlers
query_input.submit(
handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
search_btn.click(
handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
# Control button handlers
def clear_ranker_results():
"""Clear the search results and reset to placeholder."""
return handle_query_search("", "hybrid", 5), ""
def refresh_ranker_status():
"""Refresh the ranker status display."""
return get_ranker_status()
refresh_ranker_status_btn.click(
fn=refresh_ranker_status,
inputs=[],
outputs=[status_display]
)
clear_results_btn.click(
fn=clear_ranker_results,
inputs=[],
outputs=[results_display, query_input]
)
# Update results when method or k changes
method_dropdown.change(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
k_slider.change(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)