"""Content formatting and rendering utilities for the Markit application.""" import markdown import json import base64 import html import logging from src.core.logging_config import get_logger logger = get_logger(__name__) def format_markdown_content(content): """Convert markdown content to HTML.""" if not content: return content # Convert the content to HTML using markdown library html_content = markdown.markdown(str(content), extensions=['tables']) return html_content def render_latex_to_html(latex_content): """Convert LaTeX content to HTML using Mathpix Markdown like GOT-OCR demo.""" # Clean up the content similar to GOT-OCR demo content = latex_content.strip() if content.endswith("<|im_end|>"): content = content[:-len("<|im_end|>")] # Fix unbalanced delimiters exactly like GOT-OCR demo right_num = content.count("\\right") left_num = content.count("\\left") if right_num != left_num: content = ( content.replace("\\left(", "(") .replace("\\right)", ")") .replace("\\left[", "[") .replace("\\right]", "]") .replace("\\left{", "{") .replace("\\right}", "}") .replace("\\left|", "|") .replace("\\right|", "|") .replace("\\left.", ".") .replace("\\right.", ".") ) # Process content like GOT-OCR demo: remove $ signs and replace quotes content = content.replace('"', "``").replace("$", "") # Split into lines and create JavaScript string like GOT-OCR demo outputs_list = content.split("\n") js_text_parts = [] for line in outputs_list: # Escape backslashes and add line break escaped_line = line.replace("\\", "\\\\") js_text_parts.append(f'"{escaped_line}\\n"') # Join with + like in GOT-OCR demo js_text = " + ".join(js_text_parts) # Create HTML using Mathpix Markdown like GOT-OCR demo html_content = f"""
{content}
{escaped_content}