Spaces:
Running
on
Zero
Running
on
Zero
"""Query ranker UI component and logic.""" | |
import gradio as gr | |
import logging | |
from src.core.logging_config import get_logger | |
from src.rag.vector_store import vector_store_manager | |
from src.rag import document_ingestion_service | |
logger = get_logger(__name__) | |
def handle_query_search(query, method, k_value): | |
"""Handle query search and return formatted results.""" | |
if not query or not query.strip(): | |
return """ | |
<div class="ranker-container"> | |
<div class="ranker-placeholder"> | |
<h3>π Query Ranker</h3> | |
<p>Enter a search query to find relevant document chunks with similarity scores.</p> | |
</div> | |
</div> | |
""" | |
try: | |
logger.info(f"Query search: '{query[:50]}...' using method: {method}") | |
# Get results based on method | |
results = [] | |
if method == "similarity": | |
retriever = vector_store_manager.get_retriever("similarity", {"k": k_value}) | |
docs = retriever.invoke(query) | |
# Try to get actual similarity scores | |
try: | |
vector_store = vector_store_manager.get_vector_store() | |
if hasattr(vector_store, 'similarity_search_with_score'): | |
docs_with_scores = vector_store.similarity_search_with_score(query, k=k_value) | |
for i, (doc, score) in enumerate(docs_with_scores): | |
similarity_score = max(0, 1 - score) if score is not None else 0.8 | |
results.append(_format_ranker_result(doc, similarity_score, i + 1)) | |
else: | |
# Fallback without scores | |
for i, doc in enumerate(docs): | |
score = 0.85 - (i * 0.05) | |
results.append(_format_ranker_result(doc, score, i + 1)) | |
except Exception as e: | |
logger.warning(f"Could not get similarity scores: {e}") | |
for i, doc in enumerate(docs): | |
score = 0.85 - (i * 0.05) | |
results.append(_format_ranker_result(doc, score, i + 1)) | |
elif method == "mmr": | |
retriever = vector_store_manager.get_retriever("mmr", {"k": k_value, "fetch_k": k_value * 2, "lambda_mult": 0.5}) | |
docs = retriever.invoke(query) | |
for i, doc in enumerate(docs): | |
results.append(_format_ranker_result(doc, None, i + 1)) # No score for MMR | |
elif method == "bm25": | |
retriever = vector_store_manager.get_bm25_retriever(k=k_value) | |
docs = retriever.invoke(query) | |
for i, doc in enumerate(docs): | |
results.append(_format_ranker_result(doc, None, i + 1)) # No score for BM25 | |
elif method == "hybrid": | |
retriever = vector_store_manager.get_hybrid_retriever(k=k_value, semantic_weight=0.7, keyword_weight=0.3) | |
docs = retriever.invoke(query) | |
for i, doc in enumerate(docs): | |
results.append(_format_ranker_result(doc, None, i + 1)) # No score for hybrid | |
logger.info(f"Retrieved {len(results)} results for query using {method}") | |
return _format_ranker_results_html(results, query, method) | |
except Exception as e: | |
error_msg = f"Error during search: {str(e)}" | |
logger.error(error_msg) | |
return f""" | |
<div class="ranker-container"> | |
<div class="ranker-error"> | |
<h3>β Search Error</h3> | |
<p>{error_msg}</p> | |
<p class="error-hint">Make sure documents are uploaded and the system is ready.</p> | |
</div> | |
</div> | |
""" | |
def _format_ranker_result(doc, score, rank): | |
"""Format a single search result.""" | |
# Extract metadata | |
metadata = doc.metadata | |
source = metadata.get("source", "Unknown") | |
page = metadata.get("page", "N/A") | |
chunk_id = metadata.get("chunk_id", "Unknown") | |
# Calculate content length and create indicator | |
content_length = len(doc.page_content) | |
if content_length < 200: | |
length_indicator = f"π {content_length} chars" | |
elif content_length < 500: | |
length_indicator = f"π {content_length} chars" | |
else: | |
length_indicator = f"π {content_length} chars" | |
# Calculate confidence based on rank (high confidence for top results) | |
if rank <= 2: | |
confidence = "High" | |
confidence_color = "#28a745" | |
confidence_icon = "π₯" | |
elif rank <= 4: | |
confidence = "Medium" | |
confidence_color = "#ffc107" | |
confidence_icon = "β" | |
else: | |
confidence = "Low" | |
confidence_color = "#6c757d" | |
confidence_icon = "π‘" | |
result = { | |
"rank": rank, | |
"content": doc.page_content, | |
"source": source, | |
"page": page, | |
"chunk_id": chunk_id, | |
"length_indicator": length_indicator, | |
"has_score": score is not None, | |
"confidence": confidence, | |
"confidence_color": confidence_color, | |
"confidence_icon": confidence_icon | |
} | |
# Only add score if we have a real score (similarity search only) | |
if score is not None: | |
result["score"] = round(score, 3) | |
return result | |
def _format_ranker_results_html(results, query, method): | |
"""Format search results as HTML.""" | |
if not results: | |
return """ | |
<div class="ranker-container"> | |
<div class="ranker-no-results"> | |
<h3>π No Results Found</h3> | |
<p>No relevant documents found for your query.</p> | |
<p class="no-results-hint">Try different keywords or check if documents are uploaded.</p> | |
</div> | |
</div> | |
""" | |
# Method display names | |
method_labels = { | |
"similarity": "π― Similarity Search", | |
"mmr": "π MMR (Diverse)", | |
"bm25": "π BM25 (Keywords)", | |
"hybrid": "π Hybrid (Recommended)" | |
} | |
method_display = method_labels.get(method, method) | |
# Start building HTML | |
html_parts = [f""" | |
<div class="ranker-container"> | |
<div class="ranker-header"> | |
<div class="ranker-title"> | |
<h3>π Search Results</h3> | |
<div class="query-display">"{query}"</div> | |
</div> | |
<div class="ranker-meta"> | |
<span class="method-badge">{method_display}</span> | |
<span class="result-count">{len(results)} results</span> | |
</div> | |
</div> | |
"""] | |
# Add results | |
for result in results: | |
rank_emoji = ["π₯", "π₯", "π₯"][result["rank"] - 1] if result["rank"] <= 3 else f"#{result['rank']}" | |
# Escape content for safe HTML inclusion and JavaScript | |
escaped_content = result['content'].replace('"', '"').replace("'", "'").replace('\n', '\\n') | |
# Build score info - always show confidence, only show score for similarity search | |
score_info_parts = [f""" | |
<span class="confidence-badge" style="color: {result['confidence_color']}"> | |
{result['confidence_icon']} {result['confidence']} | |
</span>"""] | |
# Only add score value if we have real scores (similarity search) | |
if result.get('has_score', False): | |
score_info_parts.append(f'<span class="score-value">π― {result["score"]}</span>') | |
score_info_html = f""" | |
<div class="score-info"> | |
{''.join(score_info_parts)} | |
</div>""" | |
html_parts.append(f""" | |
<div class="result-card"> | |
<div class="result-header"> | |
<div class="rank-info"> | |
<span class="rank-badge">{rank_emoji} Rank {result['rank']}</span> | |
<span class="source-info">π {result['source']}</span> | |
{f"<span class='page-info'>Page {result['page']}</span>" if result['page'] != 'N/A' else ""} | |
<span class="length-info">{result['length_indicator']}</span> | |
</div> | |
{score_info_html} | |
</div> | |
<div class="result-content"> | |
<div class="content-text">{result['content']}</div> | |
</div> | |
</div> | |
""") | |
html_parts.append("</div>") | |
return "".join(html_parts) | |
def get_ranker_status(): | |
"""Get current ranker system status.""" | |
try: | |
# Get collection info | |
collection_info = vector_store_manager.get_collection_info() | |
document_count = collection_info.get("document_count", 0) | |
# Get available methods | |
available_methods = ["similarity", "mmr", "bm25", "hybrid"] | |
# Check if system is ready | |
ingestion_status = document_ingestion_service.get_ingestion_status() | |
system_ready = ingestion_status.get('system_ready', False) | |
status_html = f""" | |
<div class="status-card"> | |
<div class="status-header"> | |
<h3>π Query Ranker Status</h3> | |
<div class="status-indicator {'status-ready' if system_ready else 'status-not-ready'}"> | |
{'π’ READY' if system_ready else 'π΄ NOT READY'} | |
</div> | |
</div> | |
<div class="status-grid"> | |
<div class="status-item"> | |
<div class="status-label">Available Documents</div> | |
<div class="status-value">{document_count}</div> | |
</div> | |
<div class="status-item"> | |
<div class="status-label">Retrieval Methods</div> | |
<div class="status-value">{len(available_methods)}</div> | |
</div> | |
<div class="status-item"> | |
<div class="status-label">Vector Store</div> | |
<div class="status-value">{'Ready' if system_ready else 'Not Ready'}</div> | |
</div> | |
</div> | |
<div class="ranker-methods"> | |
<div class="methods-label">Available Methods:</div> | |
<div class="methods-list"> | |
<span class="method-tag">π― Similarity</span> | |
<span class="method-tag">π MMR</span> | |
<span class="method-tag">π BM25</span> | |
<span class="method-tag">π Hybrid</span> | |
</div> | |
</div> | |
</div> | |
""" | |
return status_html | |
except Exception as e: | |
error_msg = f"Error getting ranker status: {str(e)}" | |
logger.error(error_msg) | |
return f""" | |
<div class="status-card status-error"> | |
<div class="status-header"> | |
<h3>β System Error</h3> | |
</div> | |
<p class="error-message">{error_msg}</p> | |
</div> | |
""" | |
def create_query_ranker_tab(): | |
"""Create the query ranker tab UI.""" | |
with gr.TabItem("π Query Ranker"): | |
with gr.Column(elem_classes=["ranker-container"]): | |
# Header | |
gr.HTML(""" | |
<div class="chat-header"> | |
<h2>π Query Ranker</h2> | |
<p>Search and rank document chunks with transparency into retrieval methods</p> | |
</div> | |
""") | |
# Status display | |
status_display = gr.HTML(value=get_ranker_status()) | |
# Control buttons | |
with gr.Row(elem_classes=["control-buttons"]): | |
refresh_ranker_status_btn = gr.Button("π Refresh Status", elem_classes=["control-btn", "btn-refresh"]) | |
clear_results_btn = gr.Button("ποΈ Clear Results", elem_classes=["control-btn", "btn-clear-data"]) | |
# Search controls | |
with gr.Column(elem_classes=["ranker-controls"]): | |
with gr.Row(elem_classes=["ranker-input-row"]): | |
query_input = gr.Textbox( | |
placeholder="Enter your search query...", | |
show_label=False, | |
elem_classes=["ranker-query-input"], | |
scale=4 | |
) | |
search_btn = gr.Button("π Search", elem_classes=["ranker-search-btn"], scale=0) | |
with gr.Row(elem_classes=["ranker-options-row"]): | |
method_dropdown = gr.Dropdown( | |
choices=[ | |
("π― Similarity Search", "similarity"), | |
("π MMR (Diverse)", "mmr"), | |
("π BM25 (Keywords)", "bm25"), | |
("π Hybrid (Recommended)", "hybrid") | |
], | |
value="hybrid", | |
label="Retrieval Method", | |
scale=2 | |
) | |
k_slider = gr.Slider( | |
minimum=1, | |
maximum=10, | |
value=5, | |
step=1, | |
label="Number of Results", | |
scale=1 | |
) | |
# Results display | |
results_display = gr.HTML( | |
value=handle_query_search("", "hybrid", 5), # Initial placeholder | |
elem_classes=["ranker-results-container"] | |
) | |
# Event handlers | |
query_input.submit( | |
handle_query_search, | |
inputs=[query_input, method_dropdown, k_slider], | |
outputs=[results_display] | |
) | |
search_btn.click( | |
handle_query_search, | |
inputs=[query_input, method_dropdown, k_slider], | |
outputs=[results_display] | |
) | |
# Control button handlers | |
def clear_ranker_results(): | |
"""Clear the search results and reset to placeholder.""" | |
return handle_query_search("", "hybrid", 5), "" | |
def refresh_ranker_status(): | |
"""Refresh the ranker status display.""" | |
return get_ranker_status() | |
refresh_ranker_status_btn.click( | |
fn=refresh_ranker_status, | |
inputs=[], | |
outputs=[status_display] | |
) | |
clear_results_btn.click( | |
fn=clear_ranker_results, | |
inputs=[], | |
outputs=[results_display, query_input] | |
) | |
# Update results when method or k changes | |
method_dropdown.change( | |
fn=handle_query_search, | |
inputs=[query_input, method_dropdown, k_slider], | |
outputs=[results_display] | |
) | |
k_slider.change( | |
fn=handle_query_search, | |
inputs=[query_input, method_dropdown, k_slider], | |
outputs=[results_display] | |
) |