Markit_v2 / src /ui /ui_backup.py
AnseMin's picture
Refactor UI components for modular architecture and enhance functionality
6ea41ec
import gradio as gr
import markdown
import threading
import time
import logging
from pathlib import Path
from src.core.converter import convert_file, set_cancellation_flag, is_conversion_in_progress
from src.parsers.parser_registry import ParserRegistry
from src.core.config import config
from src.core.exceptions import (
DocumentProcessingError,
UnsupportedFileTypeError,
FileSizeLimitError,
ConfigurationError
)
from src.core.logging_config import get_logger
from src.rag import rag_chat_service, document_ingestion_service
from src.rag.vector_store import vector_store_manager
from src.services.data_clearing_service import data_clearing_service
# Use centralized logging
logger = get_logger(__name__)
# Import MarkItDown to check if it's available
try:
from markitdown import MarkItDown
HAS_MARKITDOWN = True
logger.info("MarkItDown is available for use")
except ImportError:
HAS_MARKITDOWN = False
logger.warning("MarkItDown is not available")
# Add a global variable to track cancellation state
conversion_cancelled = threading.Event()
# Pass the cancellation flag to the converter module
set_cancellation_flag(conversion_cancelled)
# Add a background thread to monitor cancellation
def monitor_cancellation():
"""Background thread to monitor cancellation and update UI if needed"""
logger.info("Starting cancellation monitor thread")
while is_conversion_in_progress():
if conversion_cancelled.is_set():
logger.info("Cancellation detected by monitor thread")
time.sleep(0.1) # Check every 100ms
logger.info("Cancellation monitor thread ending")
def update_ui_for_file_count(files):
"""Update UI components based on the number of files uploaded."""
if not files or len(files) == 0:
return (
gr.update(visible=False), # processing_type_selector
"<div style='color: #666; font-style: italic;'>Upload documents to begin</div>" # file_status_text
)
if len(files) == 1:
file_name = files[0].name if hasattr(files[0], 'name') else str(files[0])
return (
gr.update(visible=False), # processing_type_selector (hidden for single file)
f"<div style='color: #2563eb; font-weight: 500;'>πŸ“„ Single document: {file_name}</div>"
)
else:
# Calculate total size for validation display
total_size = 0
try:
for file in files:
if hasattr(file, 'size'):
total_size += file.size
elif hasattr(file, 'name'):
# For file paths, get size from filesystem
total_size += Path(file.name).stat().st_size
except:
pass # Size calculation is optional for display
size_display = f" ({total_size / (1024*1024):.1f}MB)" if total_size > 0 else ""
# Check if within limits
if len(files) > 5:
status_color = "#dc2626" # red
status_text = f"⚠️ Too many files: {len(files)}/5 (max 5 files allowed)"
elif total_size > 20 * 1024 * 1024: # 20MB
status_color = "#dc2626" # red
status_text = f"⚠️ Files too large{size_display} (max 20MB combined)"
else:
status_color = "#059669" # green
status_text = f"πŸ“‚ Batch mode: {len(files)} files{size_display}"
return (
gr.update(visible=True), # processing_type_selector (visible for multiple files)
f"<div style='color: {status_color}; font-weight: 500;'>{status_text}</div>"
)
def validate_file_for_parser(file_path, parser_name):
"""Validate if the file type is supported by the selected parser."""
if not file_path:
return True, "" # No file selected yet
try:
file_path_obj = Path(file_path)
file_ext = file_path_obj.suffix.lower()
# Check file size
if file_path_obj.exists():
file_size = file_path_obj.stat().st_size
if file_size > config.app.max_file_size:
size_mb = file_size / (1024 * 1024)
max_mb = config.app.max_file_size / (1024 * 1024)
return False, f"File size ({size_mb:.1f}MB) exceeds maximum allowed size ({max_mb:.1f}MB)"
# Check file extension
if file_ext not in config.app.allowed_extensions:
return False, f"File type '{file_ext}' is not supported. Allowed types: {', '.join(config.app.allowed_extensions)}"
# Parser-specific validation
if "GOT-OCR" in parser_name:
if file_ext not in ['.jpg', '.jpeg', '.png']:
return False, "GOT-OCR only supports JPG and PNG formats."
return True, ""
except Exception as e:
logger.error(f"Error validating file: {e}")
return False, f"Error validating file: {e}"
def format_markdown_content(content):
if not content:
return content
# Convert the content to HTML using markdown library
html_content = markdown.markdown(str(content), extensions=['tables'])
return html_content
def render_latex_to_html(latex_content):
"""Convert LaTeX content to HTML using Mathpix Markdown like GOT-OCR demo."""
import json
# Clean up the content similar to GOT-OCR demo
content = latex_content.strip()
if content.endswith("<|im_end|>"):
content = content[:-len("<|im_end|>")]
# Fix unbalanced delimiters exactly like GOT-OCR demo
right_num = content.count("\\right")
left_num = content.count("\\left")
if right_num != left_num:
content = (
content.replace("\\left(", "(")
.replace("\\right)", ")")
.replace("\\left[", "[")
.replace("\\right]", "]")
.replace("\\left{", "{")
.replace("\\right}", "}")
.replace("\\left|", "|")
.replace("\\right|", "|")
.replace("\\left.", ".")
.replace("\\right.", ".")
)
# Process content like GOT-OCR demo: remove $ signs and replace quotes
content = content.replace('"', "``").replace("$", "")
# Split into lines and create JavaScript string like GOT-OCR demo
outputs_list = content.split("\n")
js_text_parts = []
for line in outputs_list:
# Escape backslashes and add line break
escaped_line = line.replace("\\", "\\\\")
js_text_parts.append(f'"{escaped_line}\\n"')
# Join with + like in GOT-OCR demo
js_text = " + ".join(js_text_parts)
# Create HTML using Mathpix Markdown like GOT-OCR demo
html_content = f"""<!DOCTYPE html>
<html lang="en" data-lt-installed="true">
<head>
<meta charset="UTF-8">
<title>LaTeX Content</title>
<script>
const text = {js_text};
</script>
<style>
#content {{
max-width: 800px;
margin: auto;
padding: 20px;
}}
body {{
font-family: 'Times New Roman', serif;
line-height: 1.6;
background-color: #ffffff;
color: #333;
}}
table {{
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}}
td, th {{
border: 1px solid #333;
padding: 8px 12px;
text-align: center;
vertical-align: middle;
}}
</style>
<script>
let script = document.createElement('script');
script.src = "https://cdn.jsdelivr.net/npm/mathpix-markdown-it@1.3.6/es5/bundle.js";
document.head.append(script);
script.onload = function() {{
const isLoaded = window.loadMathJax();
if (isLoaded) {{
console.log('Styles loaded!')
}}
const el = window.document.getElementById('content-text');
if (el) {{
const options = {{
htmlTags: true
}};
const html = window.render(text, options);
el.outerHTML = html;
}}
}};
</script>
</head>
<body>
<div id="content">
<div id="content-text"></div>
</div>
</body>
</html>"""
return html_content
def format_latex_content(content):
"""Format LaTeX content for display in UI using MathJax rendering like GOT-OCR demo."""
if not content:
return content
try:
# Generate rendered HTML
rendered_html = render_latex_to_html(content)
# Encode for iframe display (similar to GOT-OCR demo)
import base64
encoded_html = base64.b64encode(rendered_html.encode("utf-8")).decode("utf-8")
iframe_src = f"data:text/html;base64,{encoded_html}"
# Create the display with both rendered and raw views
formatted_content = f"""
<div style="background-color: #f8f9fa; border-radius: 8px; border: 1px solid #e9ecef; margin: 10px 0;">
<div style="background-color: #e9ecef; padding: 10px; border-radius: 8px 8px 0 0; font-weight: bold; color: #495057;">
πŸ“„ LaTeX Content (Rendered with MathJax)
</div>
<div style="padding: 0;">
<iframe src="{iframe_src}" width="100%" height="500px" style="border: none; border-radius: 0 0 8px 8px;"></iframe>
</div>
<div style="background-color: #e9ecef; padding: 8px 15px; border-radius: 0; font-size: 12px; color: #6c757d; border-top: 1px solid #dee2e6;">
πŸ’‘ LaTeX content rendered with MathJax. Tables and formulas are displayed as they would appear in a LaTeX document.
</div>
<details style="margin: 0; border-top: 1px solid #dee2e6;">
<summary style="padding: 8px 15px; background-color: #e9ecef; cursor: pointer; font-size: 12px; color: #6c757d;">
πŸ“ View Raw LaTeX Source
</summary>
<div style="padding: 15px; background-color: #f8f9fa;">
<pre style="background-color: transparent; margin: 0; padding: 0;
font-family: 'Courier New', monospace; font-size: 12px; line-height: 1.4;
white-space: pre-wrap; word-wrap: break-word; color: #2c3e50; max-height: 200px; overflow-y: auto;">
{content}
</pre>
</div>
</details>
</div>
"""
except Exception as e:
# Fallback to simple formatting if rendering fails
import html
escaped_content = html.escape(str(content))
formatted_content = f"""
<div style="background-color: #f8f9fa; border-radius: 8px; border: 1px solid #e9ecef; margin: 10px 0;">
<div style="background-color: #e9ecef; padding: 10px; border-radius: 8px 8px 0 0; font-weight: bold; color: #495057;">
πŸ“„ LaTeX Content (Fallback View)
</div>
<div style="padding: 15px;">
<pre style="background-color: transparent; margin: 0; padding: 0;
font-family: 'Courier New', monospace; font-size: 14px; line-height: 1.4;
white-space: pre-wrap; word-wrap: break-word; color: #2c3e50;">
{escaped_content}
</pre>
</div>
<div style="background-color: #e9ecef; padding: 8px 15px; border-radius: 0 0 8px 8px; font-size: 12px; color: #6c757d;">
⚠️ Rendering failed, showing raw LaTeX. Error: {str(e)}
</div>
</div>
"""
return formatted_content
# Function to run conversion in a separate thread
def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format):
"""Run the conversion in a separate thread and return the thread object"""
global conversion_cancelled
# Reset the cancellation flag
conversion_cancelled.clear()
# Create a container for the results
results = {"content": None, "download_file": None, "error": None}
def conversion_worker():
try:
content, download_file = convert_file(file_path, parser_name, ocr_method_name, output_format)
results["content"] = content
results["download_file"] = download_file
except Exception as e:
logger.error(f"Error during conversion: {str(e)}")
results["error"] = str(e)
# Create and start the thread
thread = threading.Thread(target=conversion_worker)
thread.daemon = True
thread.start()
return thread, results
def run_conversion_thread_multi(file_paths, parser_name, ocr_method_name, output_format, processing_type):
"""Run the conversion in a separate thread for multiple files."""
import threading
from src.services.document_service import DocumentService
# Results will be shared between threads
results = {"content": None, "download_file": None, "error": None}
def conversion_worker():
try:
logger.info(f"Starting multi-file conversion thread for {len(file_paths)} files")
# Use the new document service unified method
document_service = DocumentService()
document_service.set_cancellation_flag(conversion_cancelled)
# Call the unified convert_documents method
content, output_file = document_service.convert_documents(
file_paths=file_paths,
parser_name=parser_name,
ocr_method_name=ocr_method_name,
output_format=output_format,
processing_type=processing_type
)
logger.info(f"Multi-file conversion completed successfully for {len(file_paths)} files")
results["content"] = content
results["download_file"] = output_file
except Exception as e:
logger.error(f"Error during multi-file conversion: {str(e)}")
results["error"] = str(e)
# Create and start the thread
thread = threading.Thread(target=conversion_worker)
thread.daemon = True
thread.start()
return thread, results
def handle_convert(files, parser_name, ocr_method_name, output_format, processing_type, is_cancelled):
"""Handle file conversion for single or multiple files."""
global conversion_cancelled
# Check if we should cancel before starting
if is_cancelled:
logger.info("Conversion cancelled before starting")
return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
# Validate files input
if not files or len(files) == 0:
error_msg = "No files uploaded. Please upload at least one document."
logger.error(error_msg)
return f"Error: {error_msg}", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
# Convert Gradio file objects to file paths
file_paths = []
for file in files:
if hasattr(file, 'name'):
file_paths.append(file.name)
else:
file_paths.append(str(file))
# Validate file types for the selected parser
for file_path in file_paths:
is_valid, error_msg = validate_file_for_parser(file_path, parser_name)
if not is_valid:
logger.error(f"File validation error: {error_msg}")
return f"Error: {error_msg}", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
logger.info(f"Starting conversion of {len(file_paths)} file(s) with cancellation flag cleared")
# Start the conversion in a separate thread
thread, results = run_conversion_thread_multi(file_paths, parser_name, ocr_method_name, output_format, processing_type)
# Start the monitoring thread
monitor_thread = threading.Thread(target=monitor_cancellation)
monitor_thread.daemon = True
monitor_thread.start()
# Wait for the thread to complete or be cancelled
while thread.is_alive():
# Check if cancellation was requested
if conversion_cancelled.is_set():
logger.info("Cancellation detected, waiting for thread to finish")
# Give the thread a chance to clean up
thread.join(timeout=0.5)
if thread.is_alive():
logger.warning("Thread did not finish within timeout")
return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
# Sleep briefly to avoid busy waiting
time.sleep(0.1)
# Thread has completed, check results
if results["error"]:
return f"Error: {results['error']}", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
content = results["content"]
download_file = results["download_file"]
# If conversion returned a cancellation message
if content == "Conversion cancelled.":
logger.info("Converter returned cancellation message")
return content, None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
# Format the content based on parser type
if "GOT-OCR" in parser_name:
# For GOT-OCR, display as LaTeX
formatted_content = format_latex_content(str(content))
html_output = f"<div class='output-container'>{formatted_content}</div>"
else:
# For other parsers, display as Markdown
formatted_content = format_markdown_content(str(content))
html_output = f"<div class='output-container'>{formatted_content}</div>"
logger.info("Conversion completed successfully")
# Auto-ingest the converted document for RAG
try:
# Read original file content for proper deduplication hashing
original_file_content = None
if file_path and Path(file_path).exists():
try:
with open(file_path, 'rb') as f:
original_file_content = f.read().decode('utf-8', errors='ignore')
except Exception as e:
logger.warning(f"Could not read original file content: {e}")
conversion_result = {
"markdown_content": content,
"original_filename": Path(file_path).name if file_path else "unknown",
"conversion_method": parser_name,
"file_size": Path(file_path).stat().st_size if file_path and Path(file_path).exists() else 0,
"conversion_time": 0, # Could be tracked if needed
"original_file_content": original_file_content
}
success, ingestion_msg, stats = document_ingestion_service.ingest_from_conversion_result(conversion_result)
if success:
logger.info(f"Document auto-ingested for RAG: {ingestion_msg}")
else:
logger.warning(f"Document ingestion failed: {ingestion_msg}")
except Exception as e:
logger.error(f"Error during auto-ingestion: {e}")
return html_output, download_file, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
def handle_chat_message(message, history):
"""Handle a new chat message with streaming response."""
if not message or not message.strip():
return "", history, gr.update()
try:
# Add user message to history
history = history or []
history.append({"role": "user", "content": message})
# Add assistant message placeholder
history.append({"role": "assistant", "content": ""})
# Get response from RAG service
response_text = ""
for chunk in rag_chat_service.chat_stream(message):
response_text += chunk
# Update the last message in history with the current response
history[-1]["content"] = response_text
# Update status in real-time during streaming
updated_status = get_chat_status()
yield "", history, updated_status
logger.info(f"Chat response completed for message: {message[:50]}...")
# Final status update after message completion
final_status = get_chat_status()
yield "", history, final_status
except Exception as e:
error_msg = f"Error generating response: {str(e)}"
logger.error(error_msg)
if history and len(history) > 0:
history[-1]["content"] = f"❌ {error_msg}"
else:
history = [
{"role": "user", "content": message},
{"role": "assistant", "content": f"❌ {error_msg}"}
]
# Update status even on error
error_status = get_chat_status()
yield "", history, error_status
def start_new_chat_session():
"""Start a new chat session."""
try:
session_id = rag_chat_service.start_new_session()
logger.info(f"Started new chat session: {session_id}")
return [], f"βœ… New chat session started: {session_id}"
except Exception as e:
error_msg = f"Error starting new session: {str(e)}"
logger.error(error_msg)
return [], f"❌ {error_msg}"
def handle_clear_all_data():
"""Handle clearing all RAG data (vector store + chat history)."""
try:
# Clear all data using the data clearing service
success, message, stats = data_clearing_service.clear_all_data()
if success:
# Reset chat session after clearing data
session_id = rag_chat_service.start_new_session()
# Get updated status
updated_status = get_chat_status()
# Create success message with stats
if stats.get("total_cleared_documents", 0) > 0 or stats.get("total_cleared_files", 0) > 0:
clear_msg = f"βœ… {message}"
session_msg = f"πŸ†• Started new session: {session_id}"
combined_msg = f'{clear_msg}<br/><div class="session-info">{session_msg}</div>'
else:
combined_msg = f'ℹ️ {message}<br/><div class="session-info">πŸ†• Started new session: {session_id}</div>'
logger.info(f"Data cleared successfully: {message}")
return [], combined_msg, updated_status
else:
error_msg = f"❌ {message}"
logger.error(f"Data clearing failed: {message}")
# Still get updated status even on error
updated_status = get_chat_status()
return None, f'<div class="session-info">{error_msg}</div>', updated_status
except Exception as e:
error_msg = f"Error clearing data: {str(e)}"
logger.error(error_msg)
# Get current status
current_status = get_chat_status()
return None, f'<div class="session-info">❌ {error_msg}</div>', current_status
def handle_query_search(query, method, k_value):
"""Handle query search and return formatted results."""
if not query or not query.strip():
return """
<div class="ranker-container">
<div class="ranker-placeholder">
<h3>πŸ” Query Ranker</h3>
<p>Enter a search query to find relevant document chunks with similarity scores.</p>
</div>
</div>
"""
try:
logger.info(f"Query search: '{query[:50]}...' using method: {method}")
# Get results based on method
results = []
if method == "similarity":
retriever = vector_store_manager.get_retriever("similarity", {"k": k_value})
docs = retriever.invoke(query)
# Try to get actual similarity scores
try:
vector_store = vector_store_manager.get_vector_store()
if hasattr(vector_store, 'similarity_search_with_score'):
docs_with_scores = vector_store.similarity_search_with_score(query, k=k_value)
for i, (doc, score) in enumerate(docs_with_scores):
similarity_score = max(0, 1 - score) if score is not None else 0.8
results.append(_format_ranker_result(doc, similarity_score, i + 1))
else:
# Fallback without scores
for i, doc in enumerate(docs):
score = 0.85 - (i * 0.05)
results.append(_format_ranker_result(doc, score, i + 1))
except Exception as e:
logger.warning(f"Could not get similarity scores: {e}")
for i, doc in enumerate(docs):
score = 0.85 - (i * 0.05)
results.append(_format_ranker_result(doc, score, i + 1))
elif method == "mmr":
retriever = vector_store_manager.get_retriever("mmr", {"k": k_value, "fetch_k": k_value * 2, "lambda_mult": 0.5})
docs = retriever.invoke(query)
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for MMR
elif method == "bm25":
retriever = vector_store_manager.get_bm25_retriever(k=k_value)
docs = retriever.invoke(query)
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for BM25
elif method == "hybrid":
retriever = vector_store_manager.get_hybrid_retriever(k=k_value, semantic_weight=0.7, keyword_weight=0.3)
docs = retriever.invoke(query)
# Explicitly limit results to k_value since EnsembleRetriever may return more
docs = docs[:k_value]
for i, doc in enumerate(docs):
results.append(_format_ranker_result(doc, None, i + 1)) # No score for Hybrid
return _format_ranker_results_html(results, query, method)
except Exception as e:
error_msg = f"Error during search: {str(e)}"
logger.error(error_msg)
return f"""
<div class="ranker-container">
<div class="ranker-error">
<h3>❌ Search Error</h3>
<p>{error_msg}</p>
<p class="error-hint">Please check if documents are uploaded and the system is ready.</p>
</div>
</div>
"""
def _format_ranker_result(doc, score, rank):
"""Format a single document result for the ranker."""
metadata = doc.metadata or {}
# Extract metadata
source = metadata.get("source", "Unknown Document")
page = metadata.get("page", "N/A")
chunk_id = metadata.get("chunk_id", f"chunk_{rank}")
# Content length indicator
content_length = len(doc.page_content)
if content_length < 200:
length_indicator = "πŸ“„ Short"
elif content_length < 500:
length_indicator = "πŸ“„ Medium"
else:
length_indicator = "πŸ“„ Long"
# Rank-based confidence levels (applies to all methods)
if rank <= 3:
confidence = "High"
confidence_color = "#22c55e"
confidence_icon = "🟒"
elif rank <= 6:
confidence = "Medium"
confidence_color = "#f59e0b"
confidence_icon = "🟑"
else:
confidence = "Low"
confidence_color = "#ef4444"
confidence_icon = "πŸ”΄"
result = {
"rank": rank,
"content": doc.page_content,
"source": source,
"page": page,
"chunk_id": chunk_id,
"length_indicator": length_indicator,
"has_score": score is not None,
"confidence": confidence,
"confidence_color": confidence_color,
"confidence_icon": confidence_icon
}
# Only add score if we have a real score (similarity search only)
if score is not None:
result["score"] = round(score, 3)
return result
def _format_ranker_results_html(results, query, method):
"""Format search results as HTML."""
if not results:
return """
<div class="ranker-container">
<div class="ranker-no-results">
<h3>πŸ” No Results Found</h3>
<p>No relevant documents found for your query.</p>
<p class="no-results-hint">Try different keywords or check if documents are uploaded.</p>
</div>
</div>
"""
# Method display names
method_labels = {
"similarity": "🎯 Similarity Search",
"mmr": "πŸ”€ MMR (Diverse)",
"bm25": "πŸ” BM25 (Keywords)",
"hybrid": "πŸ”— Hybrid (Recommended)"
}
method_display = method_labels.get(method, method)
# Start building HTML
html_parts = [f"""
<div class="ranker-container">
<div class="ranker-header">
<div class="ranker-title">
<h3>πŸ” Search Results</h3>
<div class="query-display">"{query}"</div>
</div>
<div class="ranker-meta">
<span class="method-badge">{method_display}</span>
<span class="result-count">{len(results)} results</span>
</div>
</div>
"""]
# Add results
for result in results:
rank_emoji = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"][result["rank"] - 1] if result["rank"] <= 3 else f"#{result['rank']}"
# Escape content for safe HTML inclusion and JavaScript
escaped_content = result['content'].replace('"', '&quot;').replace("'", "&#39;").replace('\n', '\\n')
# Build score info - always show confidence, only show score for similarity search
score_info_parts = [f"""
<span class="confidence-badge" style="color: {result['confidence_color']}">
{result['confidence_icon']} {result['confidence']}
</span>"""]
# Only add score value if we have real scores (similarity search)
if result.get('has_score', False):
score_info_parts.append(f'<span class="score-value">🎯 {result["score"]}</span>')
score_info_html = f"""
<div class="score-info">
{''.join(score_info_parts)}
</div>"""
html_parts.append(f"""
<div class="result-card">
<div class="result-header">
<div class="rank-info">
<span class="rank-badge">{rank_emoji} Rank {result['rank']}</span>
<span class="source-info">πŸ“„ {result['source']}</span>
{f"<span class='page-info'>Page {result['page']}</span>" if result['page'] != 'N/A' else ""}
<span class="length-info">{result['length_indicator']}</span>
</div>
{score_info_html}
</div>
<div class="result-content">
<div class="content-text">{result['content']}</div>
</div>
</div>
""")
html_parts.append("</div>")
return "".join(html_parts)
def get_ranker_status():
"""Get current ranker system status."""
try:
# Get collection info
collection_info = vector_store_manager.get_collection_info()
document_count = collection_info.get("document_count", 0)
# Get available methods
available_methods = ["similarity", "mmr", "bm25", "hybrid"]
# Check if system is ready
ingestion_status = document_ingestion_service.get_ingestion_status()
system_ready = ingestion_status.get('system_ready', False)
status_html = f"""
<div class="status-card">
<div class="status-header">
<h3>πŸ” Query Ranker Status</h3>
<div class="status-indicator {'status-ready' if system_ready else 'status-not-ready'}">
{'🟒 READY' if system_ready else 'πŸ”΄ NOT READY'}
</div>
</div>
<div class="status-grid">
<div class="status-item">
<div class="status-label">Available Documents</div>
<div class="status-value">{document_count}</div>
</div>
<div class="status-item">
<div class="status-label">Retrieval Methods</div>
<div class="status-value">{len(available_methods)}</div>
</div>
<div class="status-item">
<div class="status-label">Vector Store</div>
<div class="status-value">{'Ready' if system_ready else 'Not Ready'}</div>
</div>
</div>
<div class="ranker-methods">
<div class="methods-label">Available Methods:</div>
<div class="methods-list">
<span class="method-tag">🎯 Similarity</span>
<span class="method-tag">πŸ”€ MMR</span>
<span class="method-tag">πŸ” BM25</span>
<span class="method-tag">πŸ”— Hybrid</span>
</div>
</div>
</div>
"""
return status_html
except Exception as e:
error_msg = f"Error getting ranker status: {str(e)}"
logger.error(error_msg)
return f"""
<div class="status-card status-error">
<div class="status-header">
<h3>❌ System Error</h3>
</div>
<p class="error-message">{error_msg}</p>
</div>
"""
def get_chat_status():
"""Get current chat system status."""
try:
# Check ingestion status
ingestion_status = document_ingestion_service.get_ingestion_status()
# Check usage stats
usage_stats = rag_chat_service.get_usage_stats()
# Get data status for additional context
data_status = data_clearing_service.get_data_status()
# Modern status card design with better styling
status_html = f"""
<div class="status-card">
<div class="status-header">
<h3>πŸ’¬ Chat System Status</h3>
<div class="status-indicator {'status-ready' if ingestion_status.get('system_ready', False) else 'status-not-ready'}">
{'🟒 READY' if ingestion_status.get('system_ready', False) else 'πŸ”΄ NOT READY'}
</div>
</div>
<div class="status-grid">
<div class="status-item">
<div class="status-label">Vector Store Docs</div>
<div class="status-value">{data_status.get('vector_store', {}).get('document_count', 0)}</div>
</div>
<div class="status-item">
<div class="status-label">Chat History Files</div>
<div class="status-value">{data_status.get('chat_history', {}).get('file_count', 0)}</div>
</div>
<div class="status-item">
<div class="status-label">Session Usage</div>
<div class="status-value">{usage_stats.get('session_messages', 0)}/{usage_stats.get('session_limit', 50)}</div>
</div>
<div class="status-item">
<div class="status-label">Environment</div>
<div class="status-value">{'HF Space' if data_status.get('environment') == 'hf_space' else 'Local'}</div>
</div>
</div>
<div class="status-services">
<div class="service-status {'service-ready' if ingestion_status.get('embedding_model_available', False) else 'service-error'}">
<span class="service-icon">🧠</span>
<span>Embedding Model</span>
<span class="service-indicator">{'βœ…' if ingestion_status.get('embedding_model_available', False) else '❌'}</span>
</div>
<div class="service-status {'service-ready' if ingestion_status.get('vector_store_available', False) else 'service-error'}">
<span class="service-icon">πŸ—„οΈ</span>
<span>Vector Store</span>
<span class="service-indicator">{'βœ…' if ingestion_status.get('vector_store_available', False) else '❌'}</span>
</div>
</div>
</div>
"""
return status_html
except Exception as e:
error_msg = f"Error getting chat status: {str(e)}"
logger.error(error_msg)
return f"""
<div class="status-card status-error">
<div class="status-header">
<h3>❌ System Error</h3>
</div>
<p class="error-message">{error_msg}</p>
</div>
"""
def create_ui():
with gr.Blocks(css="""
/* Global styles */
.gradio-container {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
/* Document converter styles */
.output-container {
max-height: 420px;
overflow-y: auto;
border: 1px solid #ddd;
padding: 10px;
}
.gradio-container .prose {
overflow: visible;
}
.processing-controls {
display: flex;
justify-content: center;
gap: 10px;
margin-top: 10px;
}
.provider-options-row {
margin-top: 15px;
margin-bottom: 15px;
}
/* Chat Tab Styles - Complete redesign */
.chat-tab-container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.chat-header {
text-align: center;
margin-bottom: 30px;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 15px;
color: white;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.chat-header h2 {
margin: 0;
font-size: 1.8em;
font-weight: 600;
}
.chat-header p {
margin: 10px 0 0 0;
opacity: 0.9;
font-size: 1.1em;
}
/* Status Card Styling */
.status-card {
background: #ffffff;
border: 1px solid #e1e5e9;
border-radius: 12px;
padding: 20px;
margin-bottom: 25px;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
transition: all 0.3s ease;
}
.status-card:hover {
box-shadow: 0 4px 20px rgba(0,0,0,0.1);
}
.status-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 2px solid #f0f2f5;
}
.status-header h3 {
margin: 0;
color: #2c3e50;
font-size: 1.3em;
font-weight: 600;
}
.status-indicator {
padding: 8px 16px;
border-radius: 25px;
font-weight: 600;
font-size: 0.9em;
letter-spacing: 0.5px;
}
.status-ready {
background: #d4edda;
color: #155724;
border: 1px solid #c3e6cb;
}
.status-not-ready {
background: #f8d7da;
color: #721c24;
border: 1px solid #f5c6cb;
}
.status-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 15px;
margin-bottom: 20px;
}
.status-item {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
text-align: center;
border: 1px solid #e9ecef;
}
.status-label {
font-size: 0.85em;
color: #6c757d;
margin-bottom: 5px;
font-weight: 500;
}
.status-value {
font-size: 1.4em;
font-weight: 700;
color: #495057;
}
.status-services {
display: flex;
gap: 15px;
flex-wrap: wrap;
}
.service-status {
display: flex;
align-items: center;
gap: 8px;
padding: 10px 15px;
border-radius: 8px;
font-weight: 500;
flex: 1;
min-width: 200px;
color: #2c3e50 !important;
}
.service-status span {
color: #2c3e50 !important;
}
.service-ready {
background: #d4edda;
color: #2c3e50 !important;
border: 1px solid #c3e6cb;
}
.service-ready span {
color: #2c3e50 !important;
}
.service-error {
background: #f8d7da;
color: #2c3e50 !important;
border: 1px solid #f5c6cb;
}
.service-error span {
color: #2c3e50 !important;
}
.service-icon {
font-size: 1.2em;
}
.service-indicator {
margin-left: auto;
}
.status-error {
border-color: #dc3545;
background: #f8d7da;
}
.error-message {
color: #721c24;
margin: 0;
font-weight: 500;
}
/* Control buttons styling */
.control-buttons {
display: flex;
gap: 12px;
justify-content: flex-end;
margin-bottom: 25px;
}
.control-btn {
padding: 10px 20px;
border-radius: 8px;
font-weight: 500;
transition: all 0.3s ease;
border: none;
cursor: pointer;
}
.btn-refresh {
background: #17a2b8;
color: white;
}
.btn-refresh:hover {
background: #138496;
transform: translateY(-1px);
}
.btn-new-session {
background: #28a745;
color: white;
}
.btn-new-session:hover {
background: #218838;
transform: translateY(-1px);
}
.btn-clear-data {
background: #dc3545;
color: white;
}
.btn-clear-data:hover {
background: #c82333;
transform: translateY(-1px);
}
.btn-primary {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.btn-primary:hover {
transform: translateY(-1px);
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
}
/* Chat interface styling */
.chat-main-container {
background: #ffffff;
border-radius: 15px;
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
overflow: hidden;
margin-bottom: 25px;
}
.chat-container {
background: #ffffff;
border-radius: 12px;
border: 1px solid #e1e5e9;
overflow: hidden;
}
/* Custom chatbot styling */
.gradio-chatbot {
border: none !important;
background: #ffffff;
}
.gradio-chatbot .message {
padding: 15px 20px;
margin: 10px;
border-radius: 12px;
}
.gradio-chatbot .message.user {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
margin-left: 50px;
}
.gradio-chatbot .message.assistant {
background: #f8f9fa;
border: 1px solid #e9ecef;
margin-right: 50px;
}
/* Input area styling */
.chat-input-container {
background: #ffffff;
padding: 20px;
border-top: 1px solid #e1e5e9;
border-radius: 0 0 15px 15px;
}
.input-row {
display: flex;
gap: 12px;
align-items: center;
}
.message-input {
flex: 1;
border: 2px solid #e1e5e9;
border-radius: 25px;
padding: 12px 20px;
font-size: 1em;
transition: all 0.3s ease;
resize: none;
max-height: 120px;
min-height: 48px;
}
.message-input:focus {
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
outline: none;
}
.send-button {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
border-radius: 12px;
padding: 12px 24px;
min-width: 80px;
height: 48px;
margin-right: 10px;
cursor: pointer;
transition: all 0.3s ease;
display: flex;
align-items: center;
justify-content: center;
font-size: 1em;
font-weight: 600;
letter-spacing: 0.5px;
}
.send-button:hover {
transform: scale(1.05);
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
}
/* Session info styling */
.session-info {
background: #e7f3ff;
border: 1px solid #b3d9ff;
border-radius: 8px;
padding: 15px;
color: #0056b3;
font-weight: 500;
text-align: center;
}
/* Responsive design */
@media (max-width: 768px) {
.chat-tab-container {
padding: 10px;
}
.status-grid {
grid-template-columns: repeat(2, 1fr);
}
.service-status {
min-width: 100%;
}
.control-buttons {
flex-direction: column;
gap: 8px;
}
.gradio-chatbot .message.user {
margin-left: 20px;
}
.gradio-chatbot .message.assistant {
margin-right: 20px;
}
}
/* Query Ranker Styles */
.ranker-container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.ranker-placeholder {
text-align: center;
padding: 40px;
background: #f8f9fa;
border-radius: 12px;
border: 1px solid #e9ecef;
color: #6c757d;
}
.ranker-placeholder h3 {
color: #495057;
margin-bottom: 10px;
}
.ranker-error {
text-align: center;
padding: 30px;
background: #f8d7da;
border: 1px solid #f5c6cb;
border-radius: 12px;
color: #721c24;
}
.ranker-error h3 {
margin-bottom: 15px;
}
.error-hint {
font-style: italic;
margin-top: 10px;
opacity: 0.8;
}
.ranker-no-results {
text-align: center;
padding: 40px;
background: #ffffff;
border: 1px solid #e1e5e9;
border-radius: 12px;
color: #6c757d;
}
.ranker-no-results h3 {
color: #495057;
margin-bottom: 15px;
}
.no-results-hint {
font-style: italic;
margin-top: 10px;
opacity: 0.8;
}
.ranker-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 15px;
margin-bottom: 25px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.ranker-title h3 {
margin: 0 0 10px 0;
font-size: 1.4em;
font-weight: 600;
}
.query-display {
font-size: 1.1em;
opacity: 0.9;
font-style: italic;
margin-bottom: 15px;
}
.ranker-meta {
display: flex;
gap: 15px;
align-items: center;
flex-wrap: wrap;
}
.method-badge {
background: rgba(255, 255, 255, 0.2);
padding: 6px 12px;
border-radius: 20px;
font-weight: 500;
font-size: 0.9em;
}
.result-count {
background: rgba(255, 255, 255, 0.15);
padding: 6px 12px;
border-radius: 20px;
font-weight: 500;
font-size: 0.9em;
}
.result-card {
background: #ffffff;
border: 1px solid #e1e5e9;
border-radius: 12px;
margin-bottom: 20px;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
transition: all 0.3s ease;
overflow: hidden;
}
.result-card:hover {
box-shadow: 0 4px 20px rgba(0,0,0,0.1);
transform: translateY(-2px);
}
.result-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 15px 20px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
}
.rank-info {
display: flex;
gap: 10px;
align-items: center;
flex-wrap: wrap;
}
.rank-badge {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 4px 10px;
border-radius: 15px;
font-weight: 600;
font-size: 0.85em;
}
.source-info {
background: #e9ecef;
color: #495057;
padding: 4px 8px;
border-radius: 10px;
font-size: 0.85em;
font-weight: 500;
}
.page-info {
background: #d1ecf1;
color: #0c5460;
padding: 4px 8px;
border-radius: 10px;
font-size: 0.85em;
}
.length-info {
background: #f8f9fa;
color: #6c757d;
padding: 4px 8px;
border-radius: 10px;
font-size: 0.85em;
}
.score-info {
display: flex;
gap: 10px;
align-items: center;
}
.confidence-badge {
padding: 4px 8px;
border-radius: 10px;
font-weight: 600;
font-size: 0.85em;
}
.score-value {
background: #2c3e50;
color: white;
padding: 6px 12px;
border-radius: 15px;
font-weight: 600;
font-size: 0.9em;
}
.result-content {
padding: 20px;
}
.content-text {
line-height: 1.6;
color: #2c3e50;
border-left: 3px solid #667eea;
padding-left: 15px;
background: #f8f9fa;
padding: 15px;
border-radius: 0 8px 8px 0;
max-height: 300px;
overflow-y: auto;
}
.result-actions {
display: flex;
gap: 10px;
padding: 15px 20px;
background: #f8f9fa;
border-top: 1px solid #e9ecef;
}
.action-btn {
padding: 8px 16px;
border: none;
border-radius: 8px;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
font-size: 0.9em;
display: flex;
align-items: center;
gap: 5px;
}
.copy-btn {
background: #17a2b8;
color: white;
}
.copy-btn:hover {
background: #138496;
transform: translateY(-1px);
}
.info-btn {
background: #6c757d;
color: white;
}
.info-btn:hover {
background: #5a6268;
transform: translateY(-1px);
}
.ranker-methods {
margin-top: 20px;
padding-top: 15px;
border-top: 1px solid #e9ecef;
}
.methods-label {
font-weight: 600;
color: #495057;
margin-bottom: 10px;
font-size: 0.9em;
}
.methods-list {
display: flex;
gap: 8px;
flex-wrap: wrap;
}
.method-tag {
background: #e9ecef;
color: #495057;
padding: 4px 10px;
border-radius: 12px;
font-size: 0.8em;
font-weight: 500;
}
/* Ranker controls styling */
.ranker-controls {
background: #ffffff;
border: 1px solid #e1e5e9;
border-radius: 12px;
padding: 20px;
margin-bottom: 25px;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
}
.ranker-input-row {
display: flex;
gap: 15px;
align-items: end;
margin-bottom: 15px;
}
.ranker-query-input {
flex: 1;
border: 2px solid #e1e5e9;
border-radius: 25px;
padding: 12px 20px;
font-size: 1em;
transition: all 0.3s ease;
}
.ranker-query-input:focus {
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
outline: none;
}
.ranker-search-btn {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
border-radius: 12px;
padding: 12px 24px;
min-width: 100px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: 600;
font-size: 1em;
}
.ranker-search-btn:hover {
transform: scale(1.05);
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
}
.ranker-options-row {
display: flex;
gap: 15px;
align-items: center;
}
/* Responsive design for ranker */
@media (max-width: 768px) {
.ranker-container {
padding: 10px;
}
.ranker-input-row {
flex-direction: column;
gap: 10px;
}
.ranker-options-row {
flex-direction: column;
gap: 10px;
align-items: stretch;
}
.ranker-meta {
justify-content: center;
}
.rank-info {
flex-direction: column;
gap: 5px;
align-items: flex-start;
}
.result-header {
flex-direction: column;
gap: 10px;
align-items: flex-start;
}
.score-info {
align-self: flex-end;
}
.result-actions {
flex-direction: column;
gap: 8px;
}
}
""") as demo:
# Modern title with better styling
gr.Markdown("""
# πŸš€ Markit
## Document to Markdown Converter with RAG Chat
""")
with gr.Tabs():
# Document Converter Tab
with gr.TabItem("πŸ“„ Document Converter"):
with gr.Column(elem_classes=["chat-tab-container"]):
# Modern header matching other tabs
gr.HTML("""
<div class="chat-header">
<h2>πŸ“„ Document Converter</h2>
<p>Convert documents to Markdown format with advanced OCR and AI processing</p>
</div>
""")
# State to track if cancellation is requested
cancel_requested = gr.State(False)
# State to store the conversion thread
conversion_thread = gr.State(None)
# State to store the output format (fixed to Markdown)
output_format_state = gr.State("Markdown")
# Multi-file input (supports single and multiple files)
files_input = gr.Files(
label="Upload Document(s) - Single file or up to 5 files (20MB max combined)",
file_count="multiple",
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".webp", ".docx", ".doc", ".pptx", ".ppt", ".xlsx", ".xls", ".txt", ".md", ".html", ".htm"]
)
# Processing type selector (visible only for multiple files)
processing_type_selector = gr.Radio(
choices=["combined", "individual", "summary", "comparison"],
value="combined",
label="Multi-Document Processing Type",
info="How to process multiple documents together",
visible=False
)
# Status text to show file count and processing mode
file_status_text = gr.HTML(
value="<div style='color: #666; font-style: italic;'>Upload documents to begin</div>",
label=""
)
# Provider and OCR options below the file input
with gr.Row(elem_classes=["provider-options-row"]):
with gr.Column(scale=1):
parser_names = ParserRegistry.get_parser_names()
# Make MarkItDown the default parser if available
default_parser = next((p for p in parser_names if p == "MarkItDown"), parser_names[0] if parser_names else "PyPdfium")
provider_dropdown = gr.Dropdown(
label="Provider",
choices=parser_names,
value=default_parser,
interactive=True
)
with gr.Column(scale=1):
default_ocr_options = ParserRegistry.get_ocr_options(default_parser)
default_ocr = default_ocr_options[0] if default_ocr_options else "No OCR"
ocr_dropdown = gr.Dropdown(
label="OCR Options",
choices=default_ocr_options,
value=default_ocr,
interactive=True
)
# Processing controls row with consistent styling
with gr.Row(elem_classes=["control-buttons"]):
convert_button = gr.Button("πŸš€ Convert", elem_classes=["control-btn", "btn-primary"])
cancel_button = gr.Button("⏹️ Cancel", elem_classes=["control-btn", "btn-clear-data"], visible=False)
# Simple output container with just one scrollbar
file_display = gr.HTML(
value="<div class='output-container'></div>",
label="Converted Content"
)
file_download = gr.File(label="Download File")
# Event handlers for document converter
# Update UI when files are uploaded/changed
files_input.change(
fn=update_ui_for_file_count,
inputs=[files_input],
outputs=[processing_type_selector, file_status_text]
)
provider_dropdown.change(
lambda p: gr.Dropdown(
choices=["Plain Text", "Formatted Text"] if "GOT-OCR" in p else ParserRegistry.get_ocr_options(p),
value="Plain Text" if "GOT-OCR" in p else (ParserRegistry.get_ocr_options(p)[0] if ParserRegistry.get_ocr_options(p) else None)
),
inputs=[provider_dropdown],
outputs=[ocr_dropdown]
)
# Reset cancel flag when starting conversion
def start_conversion():
global conversion_cancelled
conversion_cancelled.clear()
logger.info("Starting conversion with cancellation flag cleared")
return gr.update(visible=False), gr.update(visible=True), False
# Set cancel flag and terminate thread when cancel button is clicked
def request_cancellation(thread):
global conversion_cancelled
conversion_cancelled.set()
logger.info("Cancel button clicked, cancellation flag set")
# Try to join the thread with a timeout
if thread is not None:
logger.info(f"Attempting to join conversion thread: {thread}")
thread.join(timeout=0.5)
if thread.is_alive():
logger.warning("Thread did not finish within timeout")
# Add immediate feedback to the user
return gr.update(visible=True), gr.update(visible=False), True, None
# Start conversion sequence
convert_button.click(
fn=start_conversion,
inputs=[],
outputs=[convert_button, cancel_button, cancel_requested],
queue=False # Execute immediately
).then(
fn=handle_convert,
inputs=[files_input, provider_dropdown, ocr_dropdown, output_format_state, processing_type_selector, cancel_requested],
outputs=[file_display, file_download, convert_button, cancel_button, conversion_thread]
)
# Handle cancel button click
cancel_button.click(
fn=request_cancellation,
inputs=[conversion_thread],
outputs=[convert_button, cancel_button, cancel_requested, conversion_thread],
queue=False # Execute immediately
)
# Chat Tab - Completely redesigned
with gr.TabItem("πŸ’¬ Chat with Documents"):
with gr.Column(elem_classes=["chat-tab-container"]):
# Modern header
gr.HTML("""
<div class="chat-header">
<h2>πŸ’¬ Chat with your converted documents</h2>
<p>Ask questions about your documents using advanced RAG technology</p>
</div>
""")
# Status section with modern design
status_display = gr.HTML(value=get_chat_status())
# Control buttons
with gr.Row(elem_classes=["control-buttons"]):
refresh_status_btn = gr.Button("πŸ”„ Refresh Status", elem_classes=["control-btn", "btn-refresh"])
new_session_btn = gr.Button("πŸ†• New Session", elem_classes=["control-btn", "btn-new-session"])
clear_data_btn = gr.Button("πŸ—‘οΈ Clear All Data", elem_classes=["control-btn", "btn-clear-data"], variant="stop")
# Main chat interface
with gr.Column(elem_classes=["chat-main-container"]):
chatbot = gr.Chatbot(
elem_classes=["chat-container"],
height=500,
show_label=False,
show_share_button=False,
bubble_full_width=False,
type="messages",
placeholder="Start a conversation by asking questions about your documents..."
)
# Input area
with gr.Row(elem_classes=["input-row"]):
msg_input = gr.Textbox(
placeholder="Ask questions about your documents...",
show_label=False,
scale=5,
lines=1,
max_lines=3,
elem_classes=["message-input"]
)
send_btn = gr.Button("Submit", elem_classes=["send-button"], scale=0)
# Session info with better styling
session_info = gr.HTML(
value='<div class="session-info">No active session - Click "New Session" to start</div>'
)
# Event handlers for chat
def clear_input():
return ""
# Send message when button clicked or Enter pressed
msg_input.submit(
fn=handle_chat_message,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot, status_display]
)
send_btn.click(
fn=handle_chat_message,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot, status_display]
)
# New session handler with improved feedback
def enhanced_new_session():
history, info = start_new_chat_session()
session_html = f'<div class="session-info">{info}</div>'
updated_status = get_chat_status()
return history, session_html, updated_status
new_session_btn.click(
fn=enhanced_new_session,
inputs=[],
outputs=[chatbot, session_info, status_display]
)
# Refresh status handler
refresh_status_btn.click(
fn=get_chat_status,
inputs=[],
outputs=[status_display]
)
# Clear all data handler
clear_data_btn.click(
fn=handle_clear_all_data,
inputs=[],
outputs=[chatbot, session_info, status_display]
)
# Query Ranker Tab
with gr.TabItem("πŸ” Query Ranker"):
with gr.Column(elem_classes=["ranker-container"]):
# Modern header
gr.HTML("""
<div class="chat-header">
<h2>πŸ” Query Ranker</h2>
<p>Search and rank document chunks with similarity scores</p>
</div>
""")
# Status section
ranker_status_display = gr.HTML(value=get_ranker_status())
# Control buttons
with gr.Row(elem_classes=["control-buttons"]):
refresh_ranker_status_btn = gr.Button("πŸ”„ Refresh Status", elem_classes=["control-btn", "btn-refresh"])
clear_results_btn = gr.Button("πŸ—‘οΈ Clear Results", elem_classes=["control-btn", "btn-clear-data"])
# Search controls
with gr.Column(elem_classes=["ranker-controls"]):
with gr.Row(elem_classes=["ranker-input-row"]):
query_input = gr.Textbox(
placeholder="Enter your search query...",
show_label=False,
elem_classes=["ranker-query-input"],
scale=4
)
search_btn = gr.Button("πŸ” Search", elem_classes=["ranker-search-btn"], scale=0)
with gr.Row(elem_classes=["ranker-options-row"]):
method_dropdown = gr.Dropdown(
choices=[
("🎯 Similarity Search", "similarity"),
("πŸ”€ MMR (Diverse)", "mmr"),
("πŸ” BM25 (Keywords)", "bm25"),
("πŸ”— Hybrid (Recommended)", "hybrid")
],
value="hybrid",
label="Retrieval Method",
scale=2
)
k_slider = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="Number of Results",
scale=1
)
# Results display
results_display = gr.HTML(
value=handle_query_search("", "hybrid", 5), # Initial placeholder
elem_classes=["ranker-results-container"]
)
# Event handlers for Query Ranker
def clear_ranker_results():
"""Clear the search results and reset to placeholder."""
return handle_query_search("", "hybrid", 5), ""
def refresh_ranker_status():
"""Refresh the ranker status display."""
return get_ranker_status()
# Search functionality
query_input.submit(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
search_btn.click(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
# Control button handlers
refresh_ranker_status_btn.click(
fn=refresh_ranker_status,
inputs=[],
outputs=[ranker_status_display]
)
clear_results_btn.click(
fn=clear_ranker_results,
inputs=[],
outputs=[results_display, query_input]
)
# Update results when method or k changes
method_dropdown.change(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
k_slider.change(
fn=handle_query_search,
inputs=[query_input, method_dropdown, k_slider],
outputs=[results_display]
)
return demo
def launch_ui(server_name="0.0.0.0", server_port=7860, share=False):
demo = create_ui()
demo.launch(
server_name=server_name,
server_port=server_port,
root_path="",
show_error=True,
share=share
)