import re import json def debug_text(text, label="Text"): """Helper function to debug text processing issues""" print(f"\n--- DEBUG {label} ---") print(f"Length: {len(text)}") print(f"First 100 chars: {text[:100]}") print(f"Contains highlight_start: {'[[highlight_start]]' in text}") print(f"Contains start_highlight: {'[[start_highlight]]' in text}") print("-------------------------\n") def clean_json_text(text): """ Handle text that came from JSON and might have JSON escaping. This handles the case of text like: "the sky isn\\'t falling" """ # First attempt to clean JSON-style escapes try: # Try to treat the string as if it were a JSON string if '\\' in text: # Create a valid JSON string with the text as content json_str = json.dumps({"text": text}) # Parse it back to get properly unescaped text parsed = json.loads(json_str) return parsed["text"] except Exception: # If that fails, continue with the original text pass return text def process_highlights(text): """ Process highlight markers in text to create HTML highlighted text. Handles both standard format and alternative format. Also properly handles escaped quotes. """ # Debug info # debug_text(text, "Before processing") # Clean JSON escaping text = clean_json_text(text) # Process highlight tags pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]' replacement = r'<span class="highlight">\1</span>' highlighted_text = re.sub(pattern1, replacement, text) pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]' highlighted_text = re.sub(pattern2, replacement, highlighted_text) # Debug info # debug_text(highlighted_text, "After processing") return highlighted_text def process_table_with_highlights(markdown_table): """ Special function to process markdown tables with highlights. Ensures the table structure is preserved while applying highlights. """ # First, split the table into lines lines = markdown_table.strip().split('\n') processed_lines = [] for line in lines: # Process highlights in each line processed_line = process_highlights(line) processed_lines.append(processed_line) return convert_markdown_table_to_html('\n'.join(processed_lines)) def convert_markdown_table_to_html(markdown_text): """ Converts a markdown table to an HTML table. """ # Clean JSON escaping markdown_text = clean_json_text(markdown_text) lines = markdown_text.strip().split('\n') table_lines = [line for line in lines if line.strip().startswith('|')] if len(table_lines) < 2: # Need at least header and separator return markdown_text # Return original if not a proper table html = '<table class="md-table">' # Check if we have a header row if len(table_lines) >= 2 and '---' in table_lines[1]: # Process header header_cells = table_lines[0].split('|')[1:-1] if table_lines[0].strip().endswith('|') else table_lines[0].split('|')[1:] html += '<thead><tr>' for cell in header_cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'<th>{processed_cell}</th>' html += '</tr></thead>' # Process data rows (skip the separator row at index 1) html += '<tbody>' for line in table_lines[2:]: if not line.strip(): continue cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] html += '<tr>' for cell in cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'<td>{processed_cell}</td>' html += '</tr>' html += '</tbody>' else: # No header row, treat all rows as data html += '<tbody>' for line in table_lines: if not line.strip(): continue cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] html += '<tr>' for cell in cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'<td>{processed_cell}</td>' html += '</tr>' html += '</tbody>' html += '</table>' return html def get_context_html(example, show_full=False): """ Formats the context chunks into an HTML string for display using specific CSS classes. Includes an alert for insufficient context and applies highlighting. Parameters: - example: The example data containing contexts - show_full: Boolean indicating whether to show full context """ html = "" # Add insufficient context warning if needed if example.get("insufficient", False): insufficient_reason = example.get("insufficient_reason", "") reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>" html += f""" <div class="insufficient-alert"> <strong> <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;"> <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path> <line x1="12" y1="9" x2="12" y2="13"></line> <line x1="12" y1="17" x2="12.01" y2="17"></line> </svg> Insufficient Context </strong> {reason_html} </div> """ # Create container div for all context items html += '<div class="context-items-container">' # Determine which context to display based on show_full flag if show_full and "full_contexts" in example and example["full_contexts"]: # If showing full context, create individual items for each chunk without headers for context_item in example["full_contexts"]: context_text = context_item.get('content', '') # Check for markdown table format (both standard and newline format) if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): # Process as a table html += f'<div class="context-item">{process_table_with_highlights(context_text)}</div>' else: # Regular text content - process highlights processed_text = process_highlights(context_text) html += f'<div class="context-item">{processed_text}</div>' else: # Show the highlighted context items if "contexts" in example and example["contexts"]: for context_item in example["contexts"]: chunk_num = context_item.get('chunk_num', '') context_text = context_item.get('content', '') is_primary = context_item.get('is_primary', False) # Add appropriate class for primary chunks extra_class = " primary-context" if is_primary else "" # Check for markdown table format if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): # Process as a table html += f'<div class="context-item{extra_class}">{process_table_with_highlights(context_text)}</div>' else: # Regular text with potential highlights processed_text = process_highlights(context_text) html += f'<div class="context-item{extra_class}">{processed_text}</div>' else: # If no contexts available, show a message html += '<div class="context-item">No context available. Try toggling to full context view.</div>' # Close the container div html += '</div>' return html