Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

File size: 62,631 Bytes

import gradio as gr
import logging
import re
import json
import time
from typing import Dict, List, Any, Tuple, Optional
from dataclasses import dataclass
from functools import wraps
from docx import Document
import io
import os
import traceback
from datetime import datetime
import textwrap
from colorama import init, Fore, Style

# Core data structures
@dataclass
class DocumentCheckResult:
    """Structured result for document checks."""
    success: bool
    issues: List[Dict[str, Any]]
    details: Optional[Dict[str, Any]] = None

def profile_performance(func):
    """Decorator to profile function performance."""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        logger = args[0].logger if hasattr(args[0], 'logger') else logging.getLogger(__name__)
        logger.info(
            f"Performance: {func.__name__} took {end_time - start_time:.4f} seconds"
        )
        return result
    return wrapper

class DocumentCheckerConfig:
    """Configuration management for document checks."""
    def __init__(self, config_path: Optional[str] = None):
        self.config = self._load_config(config_path)
        self.logger = self._setup_logger()

    def _load_config(self, config_path: Optional[str] = None) -> Dict[str, Any]:
        """Load configuration from JSON file or use default settings."""
        default_config = {
            "logging": {
                "level": "INFO",
                "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
            },
            "checks": {
                "acronyms": True,
                "terminology_check": True,
                "headings": True
            },
            "document_types": {
                "Advisory Circular": {
                    "required_headings": [
                        "Purpose.",
                        "Applicability.",
                        "Cancellation.",
                        "Related Material.",
                        "Definition of Key Terms."
                    ],
                    "skip_title_check": False
                },
                "Federal Register Notice": {
                    "required_headings": [
                        "Purpose of This Notice",
                        "Audience",
                        "Where can I Find This Notice"
                    ],
                    "skip_title_check": False
                },
                "Order": {
                    "required_headings": [
                        "Purpose of This Order.",
                        "Audience.",
                        "Where to Find This Order."
                    ],
                    "skip_title_check": False
                },
                "Policy Statement": {
                    "required_headings": [
                        "SUMMARY",
                        "CURRENT REGULATORY AND ADVISORY MATERIAL",
                        "RELEVANT PAST PRACTICE",
                        "POLICY",
                        "EFFECT OF POLICY",
                        "CONCLUSION"
                    ],
                    "skip_title_check": False
                },
                "Technical Standard Order": {
                    "required_headings": [
                        "PURPOSE.",
                        "APPLICABILITY.",
                        "REQUIREMENTS.",
                        "MARKING.",
                        "APPLICATION DATA REQUIREMENTS.",
                        "MANUFACTURER DATA REQUIREMENTS.",
                        "FURNISHED DATA REQUIREMENTS.",
                        "HOW TO GET REFERENCED DOCUMENTS."
                    ],
                    "skip_title_check": False
                },
                "Airworthiness Criteria": {
                    "required_headings": [],
                    "skip_title_check": True
                },
                "Deviation Memo": {
                    "required_headings": [],
                    "skip_title_check": True
                },
                "Exemption": {
                    "required_headings": [],
                    "skip_title_check": True
                },
                "Rule": {
                    "required_headings": [],
                    "skip_title_check": True
                },
                "Special Condition": {
                    "required_headings": [],
                    "skip_title_check": True
                },
                "Other": {
                    "required_headings": [],
                    "skip_title_check": True
                }
            }
        }

        if config_path and os.path.exists(config_path):
            try:
                with open(config_path, 'r') as f:
                    user_config = json.load(f)
                    self._deep_merge(default_config, user_config)
            except (json.JSONDecodeError, IOError) as e:
                logging.warning(f"Error loading config: {e}. Using default config.")

        return default_config

    def _deep_merge(self, base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
        """Recursively merge two dictionaries."""
        for key, value in update.items():
            if isinstance(value, dict) and key in base and isinstance(base[key], dict):
                self._deep_merge(base[key], value)
            else:
                base[key] = value
        return base

    def _setup_logger(self) -> logging.Logger:
        """Set up and configure logging based on configuration."""
        logger = logging.getLogger(__name__)
        log_level = getattr(logging, self.config['logging']['level'].upper())
        formatter = logging.Formatter(self.config['logging']['format'])
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(formatter)
        console_handler.setLevel(log_level)
        logger.addHandler(console_handler)
        logger.setLevel(log_level)
        return logger

class DocumentChecker:
    """Base class for document checking."""
    def __init__(self, config_path: Optional[str] = None):
        self.config_manager = DocumentCheckerConfig(config_path)
        self.logger = self.config_manager.logger

    @staticmethod
    def validate_input(doc: List[str]) -> bool:
        """Validate input document."""
        return doc is not None and isinstance(doc, list) and len(doc) > 0

    @classmethod
    def extract_paragraphs(cls, doc_path: str) -> List[str]:
        """Extract plain text paragraphs from a document."""
        try:
            doc = Document(doc_path)
            return [para.text for para in doc.paragraphs if para.text.strip()]
        except Exception as e:
            logging.error(f"Error extracting paragraphs: {e}")
            return []

class FAADocumentChecker(DocumentChecker):
    """Main document checker implementation."""
    def __init__(self, config_path: Optional[str] = None):
        super().__init__(config_path)

    @profile_performance
    def heading_title_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
        """Check headings for a specific document type."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        checks = self.config_manager.config['document_types'].get(doc_type, {})
        required_headings = checks.get('required_headings', [])
        headings_found = []
        required_headings_set = set(required_headings)

        for para in doc:
            para_strip = para.strip()
            if para_strip in required_headings_set:
                headings_found.append(para_strip)

        all_headings_present = set(headings_found) == required_headings_set
        issues = []
        if not all_headings_present:
            missing_headings = required_headings_set - set(headings_found)
            issues.append({'missing_headings': list(missing_headings)})

        return DocumentCheckResult(
            success=all_headings_present,
            issues=issues,
            details={
                'found_headings': headings_found,
                'required_headings': required_headings
            }
        )

    @profile_performance
    def heading_title_period_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
        """Check if headings end with periods according to document type requirements."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        period_required = {
            "Advisory Circular": True,
            "Airworthiness Criteria": False,
            "Deviation Memo": False,
            "Exemption": False,
            "Federal Register Notice": False,
            "Order": True,
            "Policy Statement": False,
            "Rule": False,
            "Special Condition": False,
            "Technical Standard Order": True,
            "Other": False
        }

        should_have_period = period_required.get(doc_type, False)
        checks = self.config_manager.config['document_types'].get(doc_type, {})
        required_headings = checks.get('required_headings', [])
        required_headings_set = set(required_headings)

        issues = []
        checked_headings = []

        for para in doc:
            para_strip = para.strip()
            if para_strip in required_headings_set:
                ends_with_period = para_strip.endswith('.')
                
                if should_have_period and not ends_with_period:
                    issues.append({
                        'heading': para_strip,
                        'issue': 'missing_period',
                        'message': f"Heading should end with a period: '{para_strip}'"
                    })
                    checked_headings.append({
                        'heading': para_strip,
                        'has_period': False,
                        'needs_period': True
                    })
                elif not should_have_period and ends_with_period:
                    issues.append({
                        'heading': para_strip,
                        'issue': 'unexpected_period',
                        'message': f"Heading should not end with a period: '{para_strip}'"
                    })
                    checked_headings.append({
                        'heading': para_strip,
                        'has_period': True,
                        'needs_period': False
                    })
                else:
                    checked_headings.append({
                        'heading': para_strip,
                        'has_period': ends_with_period,
                        'needs_period': should_have_period
                    })

        return DocumentCheckResult(
            success=len(issues) == 0,
            issues=issues,
            details={
                'document_type': doc_type,
                'periods_required': should_have_period,
                'checked_headings': checked_headings
            }
        )

    @profile_performance
    def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
        """Check if acronyms are defined at their first use."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        defined_acronyms = set()
        first_occurrences = {}
        heading_words = {
            'INFORMATION', 'GENERAL', 'SUMMARY', 'INTRODUCTION', 'BACKGROUND', 
            'DISCUSSION', 'CONCLUSION', 'APPENDIX', 'CHAPTER', 'SECTION',
            'PURPOSE', 'APPLICABILITY', 'CANCELLATION', 'DEFINITION', 'REQUIREMENTS',
            'AUTHORITY', 'POLICY', 'SCOPE', 'RELATED', 'MATERIAL', 'DISTRIBUTION',
            'EXPLANATION', 'PROCEDURES', 'NOTE', 'WARNING', 'CAUTION', 'EXCEPTION',
            'GROUPS', 'PARTS', 'TABLE', 'FIGURE', 'REFERENCES', 'DEFINITIONS'
        }

        predefined_acronyms = {
            'CFR', 'U.S.', 'USA', 'US', 'U.S.C', 'e.g.', 'i.e.', 'FAQ', 'No.', 'ZIP', 'PDF', 'SSN',
            'DC', 'MA', 'WA', 'TX', 'MO'
        }

        defined_acronyms.update(predefined_acronyms)
        defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
        acronym_pattern = re.compile(r'\b[A-Z]{2,}\b(?!\s*[:.]\s*)')

        for paragraph in doc:
            words = paragraph.strip().split()
            if all(word.isupper() for word in words) and any(word in heading_words for word in words):
                continue

            defined_matches = defined_pattern.findall(paragraph)
            for full_term, acronym in defined_matches:
                defined_acronyms.add(acronym)
                if acronym in first_occurrences:
                    del first_occurrences[acronym]

            usage_matches = acronym_pattern.finditer(paragraph)
            for match in usage_matches:
                acronym = match.group()
                
                if (acronym not in defined_acronyms and
                    acronym not in heading_words and
                    not any(not c.isalpha() for c in acronym) and
                    len(acronym) <= 10):
                    
                    if acronym not in first_occurrences:
                        sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                        for sentence in sentences:
                            if acronym in sentence:
                                if not (sentence.isupper() and any(word in heading_words for word in sentence.split())):
                                    first_occurrences[acronym] = {
                                        'acronym': acronym,
                                        'sentence': sentence.strip()
                                    }
                                break

        undefined_acronyms = list(first_occurrences.values())
        success = len(undefined_acronyms) == 0
        issues = undefined_acronyms if not success else []

        return DocumentCheckResult(success=success, issues=issues)

    @profile_performance
    def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
        """Check document terminology for consistency and preferred terms."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        term_replacements = {
            r'\bUSC\b': 'U.S.C.',
            r'\bCFR Part\b': 'CFR part',
            r'\bC\.F\.R\.\b': 'CFR',
            r'\b14 CFR\s*§': '14 CFR',
            r'\bWe\b': 'The FAA',
            r'\bwe\b': 'the FAA',
            r'\bcancelled\b': 'canceled',
            r'\bshall\b': 'must',
            r'\b\&\b': 'and',
            r'\bflight crew\b': 'flightcrew'
        }

        prohibited_phrases = [
            r'\babove\b',
            r'\bbelow\b',
            r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b'
        ]

        issues = []

        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                for incorrect_pattern, correct_term in term_replacements.items():
                    matches = re.finditer(incorrect_pattern, sentence)
                    for match in matches:
                        incorrect_term = match.group()
                        issues.append({
                            'type': 'incorrect_term',
                            'incorrect_term': incorrect_term,
                            'correct_term': correct_term,
                            'sentence': sentence.strip()
                        })

                for phrase_pattern in prohibited_phrases:
                    match = re.search(phrase_pattern, sentence, re.IGNORECASE)
                    if match:
                        issues.append({
                            'type': 'prohibited_phrase',
                            'phrase': match.group().strip(),
                            'sentence': sentence.strip()
                        })

        return DocumentCheckResult(success=len(issues) == 0, issues=issues)

    @profile_performance
    def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
        """Check for correct usage of section symbols (§)."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        issues = []
        sentences_starting_with_section_symbol = []
        incorrect_14_CFR_section_symbol_usage = []
        single_section_symbol_multiple_sections = []
        missing_section_symbol_in_multiple_sections = []

        pattern_14_CFR_section = re.compile(r'\b14 CFR §\s*\d+\.\d+\b')
        pattern_single_section_symbol_and = re.compile(r'§\s*\d+\.\d+\s+and\s+\d+\.\d+')
        pattern_single_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+\d+\.\d+')
        pattern_single_section_symbol_through = re.compile(r'§\s*\d+\.\d+\s+through\s+\d+\.\d+')
        pattern_missing_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+')

        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                if sentence.strip().startswith('§'):
                    sentences_starting_with_section_symbol.append(sentence.strip())

            matches_14_CFR = pattern_14_CFR_section.findall(paragraph)
            incorrect_14_CFR_section_symbol_usage.extend(matches_14_CFR)

            matches_and = pattern_single_section_symbol_and.findall(paragraph)
            single_section_symbol_multiple_sections.extend(matches_and)

            matches_or = pattern_single_section_symbol_or.findall(paragraph)
            single_section_symbol_multiple_sections.extend(matches_or)

            matches_through = pattern_single_section_symbol_through.findall(paragraph)
            single_section_symbol_multiple_sections.extend(matches_through)

            matches_missing_or = pattern_missing_section_symbol_or.findall(paragraph)
            missing_section_symbol_in_multiple_sections.extend(matches_missing_or)

        if sentences_starting_with_section_symbol:
            issues.append({
                'issue': 'sentences_starting_with_section_symbol',
                'sentences': sentences_starting_with_section_symbol
            })
        if incorrect_14_CFR_section_symbol_usage:
            issues.append({
                'issue': 'incorrect_14_CFR_section_symbol_usage',
                'matches': incorrect_14_CFR_section_symbol_usage
            })
        if single_section_symbol_multiple_sections:
            issues.append({
                'issue': 'single_section_symbol_multiple_sections',
                'matches': single_section_symbol_multiple_sections
            })
        if missing_section_symbol_in_multiple_sections:
            issues.append({
                'issue': 'missing_section_symbol_in_multiple_sections',
                'matches': missing_section_symbol_in_multiple_sections
            })

        return DocumentCheckResult(success=len(issues) == 0, issues=issues)

    @profile_performance
    def caption_check(self, doc: List[str], doc_type: str, caption_type: str) -> DocumentCheckResult:
        """Check for correctly formatted captions."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        if doc_type in ["Advisory Circular", "Order"]:
            caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
            correct_format = f"{caption_type} X-Y"
        else:
            caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
            correct_format = f"{caption_type} X"

        incorrect_captions = []
        in_toc = False

        for paragraph in doc:
            if "Table of Contents" in paragraph or "Contents" in paragraph:
                in_toc = True
                continue
            elif in_toc and paragraph.strip() == "":
                in_toc = False

            if in_toc:
                continue

            paragraph_strip = paragraph.strip()
            if paragraph_strip.lower().startswith(caption_type.lower()):
                if not caption_pattern.match(paragraph_strip):
                    incorrect_captions.append({
                        'incorrect_caption': paragraph_strip,
                        'correct_format': correct_format
                    })

        return DocumentCheckResult(success=len(incorrect_captions) == 0, issues=incorrect_captions)

    @profile_performance
    def table_figure_reference_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
        """Check for correct references to tables and figures."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
        
        incorrect_references = []
        
        if doc_type in ["Advisory Circular", "Order"]:
            table_pattern = r'\b[Tt]able\s+\d+-\d+\b'
            figure_pattern = r'\b[Ff]igure\s+\d+-\d+\b'
            correct_mid_table_format = "table X-Y"
            correct_start_table_format = "Table X-Y"
            correct_mid_figure_format = "figure X-Y"
            correct_start_figure_format = "Figure X-Y"
        else:
            table_pattern = r'\b[Tt]able\s+\d+\b'
            figure_pattern = r'\b[Ff]igure\s+\d+\b'
            correct_mid_table_format = "table X"
            correct_start_table_format = "Table X"
            correct_mid_figure_format = "figure X"
            correct_start_figure_format = "Figure X"
        
        table_ref_pattern = re.compile(table_pattern)
        figure_ref_pattern = re.compile(figure_pattern)
        
        for paragraph in doc:
            paragraph_strip = paragraph.strip()
            starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
            
            if not starts_with_table_or_figure:
                sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                
                for sentence in sentences:
                    sentence = sentence.strip()
                    
                    # Check table references
                    matches = table_ref_pattern.finditer(sentence)
                    for match in matches:
                        ref = match.group()
                        text_before = sentence[:match.start()].strip()
                        is_sentence_start = text_before == ""
                        
                        if is_sentence_start and not ref.startswith('Table'):
                            incorrect_references.append({
                                'incorrect_ref': ref,
                                'correct_format': correct_start_table_format,
                                'sentence': sentence,
                                'issue': "Table reference at sentence start should be capitalized"
                            })
                        elif not is_sentence_start and not ref.startswith('table'):
                            incorrect_references.append({
                                'incorrect_ref': ref,
                                'correct_format': correct_mid_table_format,
                                'sentence': sentence,
                                'issue': "Table reference within sentence should be lowercase"
                            })
                    
                    # Check figure references
                    matches = figure_ref_pattern.finditer(sentence)
                    for match in matches:
                        ref = match.group()
                        text_before = sentence[:match.start()].strip()
                        is_sentence_start = text_before == ""
                        
                        if is_sentence_start and not ref.startswith('Figure'):
                            incorrect_references.append({
                                'incorrect_ref': ref,
                                'correct_format': correct_start_figure_format,
                                'sentence': sentence,
                                'issue': "Figure reference at sentence start should be capitalized"
                            })
                        elif not is_sentence_start and not ref.startswith('figure'):
                            incorrect_references.append({
                                'incorrect_ref': ref,
                                'correct_format': correct_mid_figure_format,
                                'sentence': sentence,
                                'issue': "Figure reference within sentence should be lowercase"
                            })
        
        return DocumentCheckResult(success=len(incorrect_references) == 0, issues=incorrect_references)

    @profile_performance
    def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
        """Check for correct formatting of document titles."""
        try:
            if isinstance(doc_path, (str, bytes, io.BytesIO)):
                doc = Document(doc_path)
            else:
                return DocumentCheckResult(
                    success=False, 
                    issues=[{'error': 'Invalid document input type'}]
                )
            
            incorrect_titles = []
            formatting_rules = {
                "Advisory Circular": {"italics": True, "quotes": False},
                "Airworthiness Criteria": {"italics": False, "quotes": True},
                "Deviation Memo": {"italics": False, "quotes": True},
                "Exemption": {"italics": False, "quotes": True},
                "Federal Register Notice": {"italics": False, "quotes": True},
                "Order": {"italics": False, "quotes": True},
                "Policy Statement": {"italics": False, "quotes": False},
                "Rule": {"italics": False, "quotes": True},
                "Special Condition": {"italics": False, "quotes": True},
                "Technical Standard Order": {"italics": False, "quotes": True},
                "Other": {"italics": False, "quotes": False}
            }

            if doc_type not in formatting_rules:
                self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
                return DocumentCheckResult(success=True, issues=[])

            required_format = formatting_rules[doc_type]
            ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')

            for paragraph in doc.paragraphs:
                text = paragraph.text
                matches = ac_pattern.finditer(text)

                for match in matches:
                    title_text = match.group(2).strip()
                    title_start = match.start(2)
                    title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])

                    title_is_italicized = False
                    current_pos = 0
                    for run in paragraph.runs:
                        run_length = len(run.text)
                        run_start = current_pos
                        run_end = current_pos + run_length
                        if run_start <= title_start < run_end:
                            title_is_italicized = run.italic
                            break
                        current_pos += run_length

                    formatting_incorrect = False
                    issue_message = []

                    if required_format["italics"] and not title_is_italicized:
                        formatting_incorrect = True
                        issue_message.append("should be italicized")
                    elif not required_format["italics"] and title_is_italicized:
                        formatting_incorrect = True
                        issue_message.append("should not be italicized")

                    if required_format["quotes"] and not title_in_quotes:
                        formatting_incorrect = True
                        issue_message.append("should be in quotes")
                    elif not required_format["quotes"] and title_in_quotes:
                        formatting_incorrect = True
                        issue_message.append("should not be in quotes")

                    if formatting_incorrect:
                        incorrect_titles.append({
                            'text': title_text,
                            'issue': ', '.join(issue_message),
                            'sentence': text.strip()
                        })

            return DocumentCheckResult(
                success=len(incorrect_titles) == 0,
                issues=incorrect_titles
            )

        except Exception as e:
            self.logger.error(f"Error in document_title_check: {e}")
            return DocumentCheckResult(
                success=False,
                issues=[{'error': str(e)}]
            )

    @profile_performance
    def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
        """Check for sentences that end with two periods."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        incorrect_sentences = []

        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?]) +', paragraph)
            for sentence in sentences:
                if sentence.endswith('..'):
                    incorrect_sentences.append({'sentence': sentence.strip()})

        success = len(incorrect_sentences) == 0

        return DocumentCheckResult(success=success, issues=incorrect_sentences)

    @profile_performance
    def spacing_check(self, doc: List[str]) -> DocumentCheckResult:
        """Check for correct spacing in the document."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        incorrect_spacing = []

        patterns = [
            (re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE), 
             "Missing space between document type and number"),
            (re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE), 
             "Missing space after section symbol (§)"),
            (re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE), 
             "Missing space between 'Part' and number"),
            (re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE), 
             "Missing space before paragraph indication"),
            (re.compile(r'\s{2,}'), 
             "Double spaces between words")
        ]

        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                for pattern, issue in patterns:
                    if pattern.search(sentence):
                        incorrect_spacing.append({
                            'issue_description': issue,
                            'sentence': sentence.strip()
                        })

        success = len(incorrect_spacing) == 0

        return DocumentCheckResult(success=success, issues=incorrect_spacing)

    @profile_performance
    def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
        """Check for abbreviation consistency after first definition."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        abbreviations = {}
        issues = []
        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', sentence)
                for full_term, acronym in defined_matches:
                    if acronym not in abbreviations:
                        abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}

                for acronym, data in abbreviations.items():
                    full_term = data["full_term"]
                    if full_term in sentence:
                        if data["defined"]:
                            data["defined"] = False
                        else:
                            issues.append({
                                'full_term': full_term,
                                'acronym': acronym,
                                'sentence': sentence.strip()
                            })

        return DocumentCheckResult(success=len(issues) == 0, issues=issues)

    @profile_performance
    def check_date_formats(self, doc: List[str]) -> DocumentCheckResult:
        """Check for inconsistent date formats."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
        
        date_issues = []
        ignore_patterns = [
            r'\bAD \d{4}-\d{2}-\d{2}\b',
            r'\bSWPM \d{2}-\d{2}-\d{2}\b',
            r'\bAMM \d{2}-\d{2}-\d{2}\b',
            r'\bSOPM \d{2}-\d{2}-\d{2}\b',
            r'\b[A-Z]{2,4} \d{2}-\d{2}-\d{2}\b'
        ]
        
        ignore_regex = '|'.join(ignore_patterns)
        ignore_pattern = re.compile(ignore_regex)
        
        correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
        
        date_patterns = [
            (re.compile(r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])'), 
             "Use 'Month Day, Year' format instead of 'MM/DD/YYYY'"),
            (re.compile(r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])'), 
             "Use 'Month Day, Year' format instead of 'MM-DD-YYYY'"),
            (re.compile(r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])'), 
             "Use 'Month Day, Year' format instead of 'YYYY-MM-DD'")
        ]
        
        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                ignored_matches = list(ignore_pattern.finditer(sentence))
                working_sentence = sentence
                
                for match in reversed(ignored_matches):
                    start, end = match.span()
                    working_sentence = working_sentence[:start] + 'X' * (end - start) + working_sentence[end:]
                
                for pattern, issue in date_patterns:
                    matches = pattern.finditer(working_sentence)
                    for match in matches:
                        original_date = sentence[match.start():match.end()]
                        date_issues.append({
                            'date': original_date,
                            'issue': issue,
                            'sentence': sentence.strip()
                        })
        
        return DocumentCheckResult(success=len(date_issues) == 0, issues=date_issues)

    @profile_performance
    def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
        """Check for placeholders that should be removed."""
        if not self.validate_input(doc):
            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])

        placeholder_phrases = [
            r'\bTBD\b',
            r'\bTo be determined\b',
            r'\bTo be added\b'
        ]
        issues = []
        for paragraph in doc:
            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
            for sentence in sentences:
                for phrase in placeholder_phrases:
                    match = re.search(phrase, sentence, re.IGNORECASE)
                    if match:
                        issues.append({
                            'placeholder': match.group().strip(),
                            'sentence': sentence.strip()
                        })

        return DocumentCheckResult(success=len(issues) == 0, issues=issues)

    def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
        """Run all document checks."""
        # Read the document
        doc = self.extract_paragraphs(doc_path)

        # Get configuration flags
        checks_config = self.config_manager.config['document_types'].get(doc_type, {})
        skip_title_check = checks_config.get('skip_title_check', False)

        # Run all checks
        results = {}
        results['heading_title_check'] = self.heading_title_check(doc, doc_type)
        results['heading_title_period_check'] = self.heading_title_period_check(doc, doc_type) 
        results['acronym_check'] = self.acronym_check(doc)
        results['terminology_check'] = self.check_terminology(doc)
        results['section_symbol_usage_check'] = self.check_section_symbol_usage(doc)
        results['caption_check_table'] = self.caption_check(doc, doc_type, 'Table')
        results['caption_check_figure'] = self.caption_check(doc, doc_type, 'Figure')
        results['table_figure_reference_check'] = self.table_figure_reference_check(doc, doc_type)
        if not skip_title_check:
            results['document_title_check'] = self.document_title_check(doc_path, doc_type)
        else:
            results['document_title_check'] = DocumentCheckResult(success=True, issues=[])
        results['double_period_check'] = self.double_period_check(doc)
        results['spacing_check'] = self.spacing_check(doc)
        results['abbreviation_usage_check'] = self.check_abbreviation_usage(doc)
        results['date_formats_check'] = self.check_date_formats(doc)
        results['placeholders_check'] = self.check_placeholders(doc)

        return results

def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
    """Process document and run all checks."""
    try:
        checker = FAADocumentChecker()
        
        if isinstance(file_obj, bytes):
            file_obj = io.BytesIO(file_obj)
            
        results = checker.run_all_checks(file_obj, doc_type, template_type)
        return format_markdown_results(results, doc_type)
        
    except Exception as e:
        logging.error(f"Error processing document: {str(e)}")
        traceback.print_exc()
        return f"""
# ❌ Error Processing Document

**Error Details:** {str(e)}

Please ensure:
1. The file is a valid .docx document
2. The file is not corrupted or password protected
3. The file is properly formatted

Try again after checking these issues. If the problem persists, contact support.
"""
    
def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
    """Format check results into a Markdown string for Gradio display."""
    output = []
    
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    output.extend([
        f"# Document Check Results - {current_time}",
        f"## Document Type: {doc_type}",
        "---\n"
    ])

    total_issues = sum(1 for r in results.values() if not r.success)
    
    if total_issues == 0:
        output.append("✅ **All checks passed successfully!**\n")
        return "\n".join(output)
    
    output.append(f"❗ Found issues in {total_issues} check categories\n")

    check_categories = {
        'heading_title_check': {'title': '📋 Required Headings', 'priority': 1},
        'heading_title_period_check': {'title': '🔍 Heading Period Usage', 'priority': 1},
        'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 2},
        'terminology_check': {'title': '📖 Terminology Usage', 'priority': 2},
        'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
        'caption_check_table': {'title': '📊 Table Captions', 'priority': 3},
        'caption_check_figure': {'title': '🖼️ Figure Captions', 'priority': 3},
        'table_figure_reference_check': {'title': '🔗 Table/Figure References', 'priority': 3},
        'document_title_check': {'title': '📑 Document Title Format', 'priority': 1},
        'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
        'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
        'abbreviation_usage_check': {'title': '📎 Abbreviation Usage', 'priority': 3},
        'date_formats_check': {'title': '📅 Date Formats', 'priority': 3},
        'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 1}
    }

    sorted_checks = sorted(
        [(name, result) for name, result in results.items()],
        key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
    )

    for check_name, result in sorted_checks:
        if not result.success:
            category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
            
            output.append(f"### {category['title']}")
            
            if isinstance(result.issues, list):
                for issue in result.issues[:5]:
                    if isinstance(issue, dict):
                        for key, value in issue.items():
                            if isinstance(value, list):
                                output.extend([f"- {item}" for item in value])
                            else:
                                output.append(f"- {key}: {value}")
                    else:
                        output.append(f"- {issue}")
                
                if len(result.issues) > 5:
                    output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
            
            output.append("")

    output.extend([
        "## 📋 Summary and Recommendations",
        "",
        "### Priority Order for Fixes:",
        "1. 🔴 Critical: Heading formats, required content, and document structure",
        "2. 🟡 Important: Terminology, acronyms, and references",
        "3. 🟢 Standard: Formatting, spacing, and style consistency",
        "",
        "### Next Steps:",
        "1. Address issues in priority order",
        "2. Use search/replace for consistent fixes",
        "3. Re-run checker after making changes",
        "4. Update your document template if needed",
        ""
    ])

    return "\n".join(output)

class DocumentCheckResultsFormatter:
    """Formats document check results in a user-friendly way with detailed examples and fixes."""
    
    def __init__(self):
        """Initialize the formatter with colorama for cross-platform color support."""
        init()  # Initialize colorama
        
        # Enhanced issue categories with examples and specific fixes
        self.issue_categories = {
            'acronym_check': {
                'title': 'Acronym Definition Issues',
                'description': 'Acronyms must be defined at their first use in the document.',
                'solution': 'Define each acronym at its first use, e.g., "Federal Aviation Administration (FAA)"',
                'example_fix': {
                    'before': 'The FAA published new guidelines.',
                    'after': 'The Federal Aviation Administration (FAA) published new guidelines.'
                }
            },
            'terminology_check': {
                'title': 'Incorrect Terminology',
                'description': 'Non-standard or prohibited terms and phrases detected. Avoid relative position references.',
                'solution': 'Use explicit references to paragraphs, sections, tables, and figures',
                'example_fix': {
                    'before': 'See above section for details | Refer to below table | shall comply with',
                    'after': 'See paragraph 3.2 for details | Refer to table 2-1 | must comply with'
                }
            },
            'section_symbol_usage_check': {
                'title': 'Section Symbol (§) Format Issues',
                'description': 'Incorrect formatting of section symbols in references.',
                'solution': 'Format section symbols correctly and never start sentences with them',
                'example_fix': {
                    'before': '§ 25.25 states | 14 CFR § 21.21',
                    'after': 'Section 25.25 states | 14 CFR 21.21'
                }
            },
            'caption_check_table': {
                'title': 'Table Caption Format Issues',
                'description': 'Table captions do not follow the required format.',
                'solution': 'Use consistent table numbering format',
                'example_fix': {
                    'before': 'Table 5. | Table A | Tables',
                    'after': 'Table 5-1. | Table 1-1 | Table 2-1'
                }
            },
            'caption_check_figure': {
                'title': 'Figure Caption Format Issues',
                'description': 'Figure captions do not follow the required format.',
                'solution': 'Use consistent figure numbering format',
                'example_fix': {
                    'before': 'Figure 5. | Figure A | Figures',
                    'after': 'Figure 5-1. | Figure 1-1 | Figure 2-1'
                }
            },
            'document_title_check': {
                'title': 'Document Title Format Issues',
                'description': 'Document titles are not properly formatted.',
                'solution': 'Format titles according to document type requirements',
                'example_fix': {
                    'before': '"AC 20-114" | "Advisory Circular"',
                    'after': 'AC 20-114 | Advisory Circular'
                }
            },
            'double_period_check': {
                'title': 'Multiple Period Issues',
                'description': 'Sentences ending with multiple periods.',
                'solution': 'Use single period to end sentences',
                'example_fix': {
                    'before': 'The following ACs are related to the guidance in this document..',
                    'after': 'The following ACs are related to the guidance in this document.'
                }
            },
            'spacing_check': {
                'title': 'Spacing Issues',
                'description': 'Incorrect spacing in text.',
                'solution': 'Maintain consistent spacing throughout the document',
                'example_fix': {
                    'before': 'AC25.25 | The  following ACs (double spaces)',
                    'after': 'AC 25.25 | The following ACs (single space)'
                }
            },
            'date_formats_check': {
                'title': 'Date Format Issues',
                'description': 'Dates not in the required format.',
                'solution': 'Use the format "Month Day, Year"',
                'example_fix': {
                    'before': '01/15/2024 | 2024-01-15',
                    'after': 'January 15, 2024'
                }
            },
            'placeholders_check': {
                'title': 'Placeholder Content',
                'description': 'Placeholder text remains in the document.',
                'solution': 'Replace all placeholder content with actual content',
                'example_fix': {
                    'before': 'TBD | To be determined | [Insert text]',
                    'after': 'Actual content specific to the context'
                }
            }
        }

    def format_results(self, results: Dict[str, Any], doc_type: str) -> str:
        """Format check results into a Markdown string for display."""
        output = []
        
        # Add header
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        output.extend([
            "================================================================================",
            "Document Check Results Summary",
            "================================================================================\n"
        ])

        # Count issues
        total_issues = sum(1 for r in results.values() if not r.success)
        
        if total_issues == 0:
            output.append("✅ All checks passed successfully!\n")
            return "\n".join(output)
        
        output.append(f"Found {total_issues} categories of issues that need attention:\n")

        # Process each check result
        for check_name, result in results.items():
            if not result.success and check_name in self.issue_categories:
                category = self.issue_categories[check_name]
                
                # Add section header
                output.append(f"■ {category['title']}")
                output.append(f"  {category['description']}")
                output.append(f"  How to fix: {category['solution']}\n")
                
                # Add example
                output.append("  Example Fix:")
                output.append(f"    ❌ Incorrect: {category['example_fix']['before']}")
                output.append(f"    ✓ Correct  : {category['example_fix']['after']}\n")
                
                # Add actual issues
                output.append("  Issues found in your document:")
                issue_count = len(result.issues)
                shown_issues = result.issues[:3]  # Show first 3 issues
                
                for issue in shown_issues:
                    if isinstance(issue, dict):
                        if 'sentence' in issue:
                            sentence = textwrap.fill(
                                issue['sentence'],
                                width=76,
                                initial_indent='    • ',
                                subsequent_indent='      '
                            )
                            output.append(sentence)
                        elif 'missing_headings' in issue:
                            missing = ', '.join(issue['missing_headings'])
                            output.append(f"    • Missing required headings: {missing}")
                        else:
                            issue_str = ', '.join(f"{k}: {v}" for k, v in issue.items())
                            output.append(f"    • {issue_str}")
                
                if issue_count > 3:
                    output.append(f"\n    ... and {issue_count - 3} more similar issues.")
                
                output.append("")  # Add spacing between categories
        
        return "\n".join(output)

    def save_report(self, results: Dict[str, Any], filepath: str, doc_type: str):
        """Save the formatted results to a file with proper formatting."""
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                # Create a report without color codes
                report = self.format_results(results, doc_type)
                
                # Strip color codes
                for color in [Fore.CYAN, Fore.GREEN, Fore.YELLOW, Fore.RED, Style.RESET_ALL]:
                    report = report.replace(str(color), '')
                
                # Convert markdown-style italics to alternative formatting for plain text
                report = report.replace('*', '_')
                
                f.write(report)
        except Exception as e:
            print(f"Error saving report: {e}")

def create_interface():
    """Create and configure the Gradio interface."""
    
    document_types = [
        "Advisory Circular",
        "Airworthiness Criteria",
        "Deviation Memo",
        "Exemption",
        "Federal Register Notice",
        "Order",
        "Policy Statement",
        "Rule",
        "Special Condition",
        "Technical Standard Order",
        "Other"
    ]
    
    template_types = ["Short AC template AC", "Long AC template AC"]

    def format_results_as_html(text_results):
        """Convert the text results into styled HTML."""
        if not text_results:
            return """
                <div class="p-4 text-gray-600">
                    Results will appear here after processing...
                </div>
            """
        
        # Split into sections
        sections = text_results.split('■')
        header = sections[0]
        issues = sections[1:]
        
        # Format header
        header_html = f"""
            <div class="max-w-4xl mx-auto p-4 bg-white rounded-lg shadow-sm mb-6">
                <h1 class="text-2xl font-bold text-gray-800 mb-4">Document Check Results Summary</h1>
                <div class="text-lg {'text-green-600' if 'All checks passed' in header else 'text-amber-600'}">
                    {header.strip()}
                </div>
            </div>
        """
        
        # Format each issue section
        issues_html = ""
        for section in issues:
            if not section.strip():
                continue
                
            lines = section.strip().split('\n')
            title = lines[0]
            content = '\n'.join(lines[1:])
            
            # Split content into description, how to fix, and examples
            parts = content.split('Example Fix:')
            description = parts[0]
            examples = parts[1] if len(parts) > 1 else ""
            
            issues_html += f"""
                <div class="bg-white rounded-lg shadow-sm mb-6 overflow-hidden">
                    <div class="bg-gray-50 px-6 py-4 border-b">
                        <h2 class="text-lg font-semibold text-gray-800">{title.strip()}</h2>
                    </div>
                    
                    <div class="px-6 py-4">
                        <div class="text-gray-600 mb-4">
                            {description.strip()}
                        </div>
                        
                        <div class="bg-green-50 rounded p-4 mb-4">
                            <div class="text-green-800">
                                <span class="font-medium">How to fix:</span>
                                {description.split('How to fix:')[1].strip() if 'How to fix:' in description else ''}
                            </div>
                        </div>
                    """
            
            if examples:
                examples_lines = examples.strip().split('\n')
                issues_html += """
                    <div class="mb-4">
                        <h3 class="font-medium text-gray-800 mb-2">Examples:</h3>
                        <div class="space-y-2 ml-4">
                """
                
                for line in examples_lines:
                    line = line.strip()
                    if line.startswith('❌'):
                        issues_html += f"""
                            <div class="text-red-600">
                                <span class="inline-block w-4">❌</span>
                                {line.replace('❌ Incorrect:', '').strip()}
                            </div>
                        """
                    elif line.startswith('✓'):
                        issues_html += f"""
                            <div class="text-green-600">
                                <span class="inline-block w-4">✓</span>
                                {line.replace('✓ Correct  :', '').strip()}
                            </div>
                        """
                    elif line.startswith('•'):
                        issues_html += f"""
                            <div class="text-gray-600 ml-4">
                                • {line.replace('•', '').strip()}
                            </div>
                        """
                    elif 'more similar issues' in line:
                        issues_html += f"""
                            <div class="text-gray-500 italic mt-2">
                                {line.strip()}
                            </div>
                        """
                        
                issues_html += """
                        </div>
                    </div>
                """
                
            issues_html += """
                    </div>
                </div>
            """
        
        # Combine all HTML
        full_html = f"""
        <div class="mx-auto p-4" style="font-family: system-ui, -apple-system, sans-serif;">
            <style>
                .text-2xl {{ font-size: 1.5rem; }}
                .text-lg {{ font-size: 1.125rem; }}
                .font-bold {{ font-weight: 700; }}
                .font-semibold {{ font-weight: 600; }}
                .font-medium {{ font-weight: 500; }}
                .text-gray-800 {{ color: #1f2937; }}
                .text-gray-600 {{ color: #4b5563; }}
                .text-gray-500 {{ color: #6b7280; }}
                .text-green-600 {{ color: #059669; }}
                .text-green-800 {{ color: #065f46; }}
                .text-red-600 {{ color: #dc2626; }}
                .text-amber-600 {{ color: #d97706; }}
                .bg-white {{ background-color: #ffffff; }}
                .bg-gray-50 {{ background-color: #f9fafb; }}
                .bg-green-50 {{ background-color: #ecfdf5; }}
                .rounded-lg {{ border-radius: 0.5rem; }}
                .shadow-sm {{ box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05); }}
                .mb-6 {{ margin-bottom: 1.5rem; }}
                .mb-4 {{ margin-bottom: 1rem; }}
                .mb-2 {{ margin-bottom: 0.5rem; }}
                .ml-4 {{ margin-left: 1rem; }}
                .mt-2 {{ margin-top: 0.5rem; }}
                .p-4 {{ padding: 1rem; }}
                .px-6 {{ padding-left: 1.5rem; padding-right: 1.5rem; }}
                .py-4 {{ padding-top: 1rem; padding-bottom: 1rem; }}
                .space-y-2 > * + * {{ margin-top: 0.5rem; }}
                .italic {{ font-style: italic; }}
                .border-b {{ border-bottom: 1px solid #e5e7eb; }}
                .overflow-hidden {{ overflow: hidden; }}
            </style>
            {header_html}
            {issues_html}
        </div>
        """
        
        return full_html

    with gr.Blocks() as demo:
        gr.Markdown(
            """
            # 📑 Document Checker Tool
            
            ### Purpose
            This tool checks Word documents for compliance with U.S. federal documentation standards.
            
            ### How to Use
            1. Upload your Word document (.docx format)
            2. Select the document type
            3. Click "Check Document"
            
            > **Note:** Please ensure your document is clean (no track changes or comments)
            """
        )
        
        with gr.Row():
            with gr.Column(scale=1):
                file_input = gr.File(
                    label="📎 Upload Word Document (.docx)",
                    file_types=[".docx"],
                    type="binary"
                )
                
                doc_type = gr.Dropdown(
                    choices=document_types,
                    label="📋 Document Type",
                    value="Advisory Circular",
                    info="Select the type of document you're checking"
                )
                
                template_type = gr.Radio(
                    choices=template_types,
                    label="📑 Template Type",
                    visible=False,
                    info="Only applicable for Advisory Circulars"
                )
                
                submit_btn = gr.Button(
                    "🔍 Check Document",
                    variant="primary"
                )
            
            with gr.Column(scale=2):
                results = gr.HTML()
        
        def process_and_format(file_obj, doc_type, template_type):
            """Process document and format results as HTML."""
            try:
                # Get text results from original process_document function
                checker = FAADocumentChecker()
                if isinstance(file_obj, bytes):
                    file_obj = io.BytesIO(file_obj)
                results = checker.run_all_checks(file_obj, doc_type, template_type)
                
                # Format results using DocumentCheckResultsFormatter
                formatter = DocumentCheckResultsFormatter()
                text_results = formatter.format_results(results, doc_type)
                
                # Convert to HTML
                return format_results_as_html(text_results)
                
            except Exception as e:
                logging.error(f"Error processing document: {str(e)}")
                traceback.print_exc()
                return f"""
                    <div style="color: red; padding: 1rem;">
                        ❌ Error processing document: {str(e)}
                        <br><br>
                        Please ensure the file is a valid .docx document and try again.
                    </div>
                """
        
        # Update template type visibility based on document type
        def update_template_visibility(doc_type):
            return gr.update(visible=doc_type == "Advisory Circular")
        
        doc_type.change(
            fn=update_template_visibility,
            inputs=[doc_type],
            outputs=[template_type]
        )
        
        # Handle document processing
        submit_btn.click(
            fn=process_and_format,
            inputs=[file_input, doc_type, template_type],
            outputs=[results]
        )
        
        gr.Markdown(
            """
            ### 📌 Important Notes
            - This tool is in development; you may encounter false positives
            - For questions or feedback, contact Eric Putnam
            - Results are not stored or saved
            """
        )
    
    return demo

# Initialize and launch the interface
if __name__ == "__main__":
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
        
    # Create and launch the interface
    demo = create_interface()
    demo.launch(
        share=False,  # Set to True if you want to generate a public link
        server_name="0.0.0.0",  # Allows external access
        server_port=7860,  # Default Gradio port
        debug=True
    )