Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

App Files Files Community

Hoctar77 commited on Nov 7, 2024

Commit

0656ea9

verified ·

1 Parent(s): bb6584a

Update app.py

Browse files

Files changed (1) hide show

app.py +429 -459

app.py CHANGED Viewed

@@ -11,7 +11,12 @@ import io
 import os
 import traceback
 from datetime import datetime
 @dataclass
 class DocumentCheckResult:
     """Structured result for document checks."""
@@ -26,7 +31,6 @@ def profile_performance(func):
         start_time = time.time()
         result = func(*args, **kwargs)
         end_time = time.time()
-        # Get logger from the class instance (first argument)
         logger = args[0].logger if hasattr(args[0], 'logger') else logging.getLogger(__name__)
         logger.info(
             f"Performance: {func.__name__} took {end_time - start_time:.4f} seconds"
@@ -37,7 +41,6 @@ def profile_performance(func):
 class DocumentCheckerConfig:
     """Configuration management for document checks."""
     def __init__(self, config_path: Optional[str] = None):
-        """Initialize configuration with optional config file."""
         self.config = self._load_config(config_path)
         self.logger = self._setup_logger()
@@ -104,6 +107,26 @@ class DocumentCheckerConfig:
                     ],
                     "skip_title_check": False
                 },
                 "Other": {
                     "required_headings": [],
                     "skip_title_check": True
@@ -164,35 +187,27 @@ class DocumentChecker:
             return []
 class FAADocumentChecker(DocumentChecker):
     def __init__(self, config_path: Optional[str] = None):
         super().__init__(config_path)
-    @profile_performance  # Use the decorator directly
     def heading_title_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
         """Check headings for a specific document type."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
-        # Use configuration for document-specific headings
-        checks = self.config_manager.config['document_types'].get(
-            doc_type, {}
-        )
         required_headings = checks.get('required_headings', [])
         headings_found = []
-        # Create a set for faster lookup
         required_headings_set = set(required_headings)
         for para in doc:
             para_strip = para.strip()
-            # Check if the paragraph is in the required headings list
             if para_strip in required_headings_set:
                 headings_found.append(para_strip)
-        # Check if all required headings are found
         all_headings_present = set(headings_found) == required_headings_set
         issues = []
         if not all_headings_present:
             missing_headings = required_headings_set - set(headings_found)
@@ -206,23 +221,13 @@ class FAADocumentChecker(DocumentChecker):
                 'required_headings': required_headings
             }
         )
     @profile_performance
     def heading_title_period_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
-        """
-        Check if headings end with periods according to document type requirements.
-        Args:
-            doc (List[str]): List of document paragraphs
-            doc_type (str): Type of document being checked
-        Returns:
-            DocumentCheckResult: Result of the heading period check
-        """
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
-        # Define document types requiring periods in headings
         period_required = {
             "Advisory Circular": True,
             "Airworthiness Criteria": False,
@@ -237,10 +242,7 @@ class FAADocumentChecker(DocumentChecker):
             "Other": False
         }
-        # Get whether periods are required for this document type
         should_have_period = period_required.get(doc_type, False)
-        # Get the headings configuration for this document type
         checks = self.config_manager.config['document_types'].get(doc_type, {})
         required_headings = checks.get('required_headings', [])
         required_headings_set = set(required_headings)
@@ -250,7 +252,6 @@ class FAADocumentChecker(DocumentChecker):
         for para in doc:
             para_strip = para.strip()
-            # Check only if paragraph is a heading
             if para_strip in required_headings_set:
                 ends_with_period = para_strip.endswith('.')
@@ -283,78 +284,85 @@ class FAADocumentChecker(DocumentChecker):
                         'needs_period': should_have_period
                     })
-        success = len(issues) == 0
         return DocumentCheckResult(
-        success=success,
-        issues=issues,
-        details={
-            'document_type': doc_type,
-            'periods_required': should_have_period,
-            'checked_headings': checked_headings
-        }
-    )
     @profile_performance
     def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
-        """Check if acronyms are defined at their first use, only flagging the first instance of undefined acronyms."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         defined_acronyms = set()
-        first_occurrences = {}  # Track first occurrence of each acronym
-        undefined_acronyms = []
-        acronym_pattern = re.compile(r'\b[A-Z]{2,}\b')
-        defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
-        # Predefined acronyms
-        defined_acronyms.add("14 CFR")
         for paragraph in doc:
-            # Check for definitions first
             defined_matches = defined_pattern.findall(paragraph)
             for full_term, acronym in defined_matches:
                 defined_acronyms.add(acronym)
-                # If this was previously marked as undefined, remove it since we found its definition
                 if acronym in first_occurrences:
                     del first_occurrences[acronym]
-            # Check for acronyms in the paragraph
-            usage_matches = acronym_pattern.findall(paragraph)
-            for acronym in usage_matches:
-                if acronym not in defined_acronyms:
-                    # Only process if we haven't seen this acronym before
                     if acronym not in first_occurrences:
-                        # Find the sentence containing the first undefined acronym
                         sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                         for sentence in sentences:
                             if acronym in sentence:
-                                first_occurrences[acronym] = {
-                                    'acronym': acronym,
-                                    'sentence': sentence.strip()
-                                }
                                 break
-        # Convert first occurrences to list of issues
         undefined_acronyms = list(first_occurrences.values())
         success = len(undefined_acronyms) == 0
         issues = undefined_acronyms if not success else []
-        return DocumentCheckResult(success=success, issues=issues)
     @profile_performance
     def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
-        """
-        Check document terminology for:
-        1. Legal reference formatting and preferred terms
-        2. Prohibited phrases and constructions
-        """
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
-        # Dictionary of terms that should be replaced with preferred alternatives
         term_replacements = {
             r'\bUSC\b': 'U.S.C.',
             r'\bCFR Part\b': 'CFR part',
@@ -368,11 +376,10 @@ class FAADocumentChecker(DocumentChecker):
             r'\bflight crew\b': 'flightcrew'
         }
-        # Prohibited phrases that should be flagged
         prohibited_phrases = [
             r'\babove\b',
             r'\bbelow\b',
-            r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b'  # Matches 'There is/are' at start of sentences
         ]
         issues = []
@@ -380,7 +387,6 @@ class FAADocumentChecker(DocumentChecker):
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
-                # Check for incorrect terms that need replacement
                 for incorrect_pattern, correct_term in term_replacements.items():
                     matches = re.finditer(incorrect_pattern, sentence)
                     for match in matches:
@@ -392,7 +398,6 @@ class FAADocumentChecker(DocumentChecker):
                             'sentence': sentence.strip()
                         })
-                # Check for prohibited phrases
                 for phrase_pattern in prohibited_phrases:
                     match = re.search(phrase_pattern, sentence, re.IGNORECASE)
                     if match:
@@ -402,65 +407,46 @@ class FAADocumentChecker(DocumentChecker):
                             'sentence': sentence.strip()
                         })
-        success = len(issues) == 0
-        return DocumentCheckResult(success=success, issues=issues)
     @profile_performance
     def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
-        """Check for various section symbol (§) usage issues."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         issues = []
-        # Patterns to identify issues
         sentences_starting_with_section_symbol = []
         incorrect_14_CFR_section_symbol_usage = []
         single_section_symbol_multiple_sections = []
         missing_section_symbol_in_multiple_sections = []
-        # Pattern to find '14 CFR §25.25'
         pattern_14_CFR_section = re.compile(r'\b14 CFR §\s*\d+\.\d+\b')
-        # Patterns for multiple sections with single '§'
         pattern_single_section_symbol_and = re.compile(r'§\s*\d+\.\d+\s+and\s+\d+\.\d+')
         pattern_single_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+\d+\.\d+')
         pattern_single_section_symbol_through = re.compile(r'§\s*\d+\.\d+\s+through\s+\d+\.\d+')
-        # Pattern for missing '§' before subsequent sections with 'or'
         pattern_missing_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+')
         for paragraph in doc:
-            # Check for sentences starting with '§'
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
                 if sentence.strip().startswith('§'):
                     sentences_starting_with_section_symbol.append(sentence.strip())
-            # Check for '14 CFR §25.25' usage
             matches_14_CFR = pattern_14_CFR_section.findall(paragraph)
-            for match in matches_14_CFR:
-                incorrect_14_CFR_section_symbol_usage.append(match)
-            # Check for single '§' with multiple sections using 'and'
             matches_and = pattern_single_section_symbol_and.findall(paragraph)
-            for match in matches_and:
-                single_section_symbol_multiple_sections.append(match)
-            # Check for single '§' with multiple sections using 'or'
             matches_or = pattern_single_section_symbol_or.findall(paragraph)
-            for match in matches_or:
-                single_section_symbol_multiple_sections.append(match)
-            # Check for single '§' with multiple sections using 'through'
             matches_through = pattern_single_section_symbol_through.findall(paragraph)
-            for match in matches_through:
-                single_section_symbol_multiple_sections.append(match)
-            # Check for missing '§' before subsequent sections with 'or'
             matches_missing_or = pattern_missing_section_symbol_or.findall(paragraph)
-            for match in matches_missing_or:
-                missing_section_symbol_in_multiple_sections.append(match)
         if sentences_starting_with_section_symbol:
             issues.append({
@@ -483,17 +469,14 @@ class FAADocumentChecker(DocumentChecker):
                 'matches': missing_section_symbol_in_multiple_sections
             })
-        success = len(issues) == 0
-        return DocumentCheckResult(success=success, issues=issues)
     @profile_performance
     def caption_check(self, doc: List[str], doc_type: str, caption_type: str) -> DocumentCheckResult:
-        """Check for correctly formatted captions (Table or Figure)."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
-        # Determine the caption pattern based on document type
         if doc_type in ["Advisory Circular", "Order"]:
             caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
             correct_format = f"{caption_type} X-Y"
@@ -505,18 +488,15 @@ class FAADocumentChecker(DocumentChecker):
         in_toc = False
         for paragraph in doc:
-            # Check for start or end of Table of Contents (TOC)
             if "Table of Contents" in paragraph or "Contents" in paragraph:
                 in_toc = True
                 continue
             elif in_toc and paragraph.strip() == "":
-                in_toc = False  # Assume blank line marks the end of TOC
-            # If within TOC, skip this paragraph
             if in_toc:
                 continue
-            # Only check paragraphs that start with "Table" or "Figure" for proper caption format
             paragraph_strip = paragraph.strip()
             if paragraph_strip.lower().startswith(caption_type.lower()):
                 if not caption_pattern.match(paragraph_strip):
@@ -525,24 +505,17 @@ class FAADocumentChecker(DocumentChecker):
                         'correct_format': correct_format
                     })
-        success = len(incorrect_captions) == 0
-        return DocumentCheckResult(success=success, issues=incorrect_captions)
     @profile_performance
     def table_figure_reference_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
-        """
-        Check for incorrect references to tables and figures in the document.
-        References should be lowercase within sentences and capitalized at sentence start.
-        """
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         incorrect_references = []
-        # Define patterns based on document type
         if doc_type in ["Advisory Circular", "Order"]:
-            # Matches both capitalized and lowercase variations
             table_pattern = r'\b[Tt]able\s+\d+-\d+\b'
             figure_pattern = r'\b[Ff]igure\s+\d+-\d+\b'
             correct_mid_table_format = "table X-Y"
@@ -562,11 +535,9 @@ class FAADocumentChecker(DocumentChecker):
         for paragraph in doc:
             paragraph_strip = paragraph.strip()
-            # Exclude captions
             starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
             if not starts_with_table_or_figure:
-                # Split into sentences while preserving the original text
                 sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                 for sentence in sentences:
@@ -576,13 +547,9 @@ class FAADocumentChecker(DocumentChecker):
                     matches = table_ref_pattern.finditer(sentence)
                     for match in matches:
                         ref = match.group()
-                        # Get the text before the reference
                         text_before = sentence[:match.start()].strip()
-                        # Determine if reference is at start of sentence
                         is_sentence_start = text_before == ""
-                        # Check if capitalization is correct
                         if is_sentence_start and not ref.startswith('Table'):
                             incorrect_references.append({
                                 'incorrect_ref': ref,
@@ -602,13 +569,9 @@ class FAADocumentChecker(DocumentChecker):
                     matches = figure_ref_pattern.finditer(sentence)
                     for match in matches:
                         ref = match.group()
-                        # Get the text before the reference
                         text_before = sentence[:match.start()].strip()
-                        # Determine if reference is at start of sentence
                         is_sentence_start = text_before == ""
-                        # Check if capitalization is correct
                         if is_sentence_start and not ref.startswith('Figure'):
                             incorrect_references.append({
                                 'incorrect_ref': ref,
@@ -624,14 +587,12 @@ class FAADocumentChecker(DocumentChecker):
                                 'issue': "Figure reference within sentence should be lowercase"
                             })
-        success = len(incorrect_references) == 0
-        return DocumentCheckResult(success=success, issues=incorrect_references)
     @profile_performance
-    def document_title_check(self, doc_path, doc_type: str) -> DocumentCheckResult:
         """Check for correct formatting of document titles."""
         try:
-            # Handle both file paths and BytesIO objects
             if isinstance(doc_path, (str, bytes, io.BytesIO)):
                 doc = Document(doc_path)
             else:
@@ -639,11 +600,8 @@ class FAADocumentChecker(DocumentChecker):
                     success=False,
                     issues=[{'error': 'Invalid document input type'}]
                 )
-            # Rest of the method remains the same
-            incorrect_titles = []
-            # Define formatting rules for different document types
             formatting_rules = {
                 "Advisory Circular": {"italics": True, "quotes": False},
                 "Airworthiness Criteria": {"italics": False, "quotes": True},
@@ -657,25 +615,23 @@ class FAADocumentChecker(DocumentChecker):
                 "Technical Standard Order": {"italics": False, "quotes": True},
                 "Other": {"italics": False, "quotes": False}
             }
             if doc_type not in formatting_rules:
                 self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
                 return DocumentCheckResult(success=True, issues=[])
             required_format = formatting_rules[doc_type]
             ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
             for paragraph in doc.paragraphs:
                 text = paragraph.text
                 matches = ac_pattern.finditer(text)
                 for match in matches:
-                    full_match = match.group(0)
                     title_text = match.group(2).strip()
                     title_start = match.start(2)
-                    title_end = match.end(2)
                     title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
                     title_is_italicized = False
                     current_pos = 0
                     for run in paragraph.runs:
@@ -686,36 +642,36 @@ class FAADocumentChecker(DocumentChecker):
                             title_is_italicized = run.italic
                             break
                         current_pos += run_length
                     formatting_incorrect = False
                     issue_message = []
                     if required_format["italics"] and not title_is_italicized:
                         formatting_incorrect = True
                         issue_message.append("should be italicized")
                     elif not required_format["italics"] and title_is_italicized:
                         formatting_incorrect = True
                         issue_message.append("should not be italicized")
                     if required_format["quotes"] and not title_in_quotes:
                         formatting_incorrect = True
                         issue_message.append("should be in quotes")
                     elif not required_format["quotes"] and title_in_quotes:
                         formatting_incorrect = True
                         issue_message.append("should not be in quotes")
                     if formatting_incorrect:
                         incorrect_titles.append({
                             'text': title_text,
                             'issue': ', '.join(issue_message),
                             'sentence': text.strip()
                         })
             return DocumentCheckResult(
                 success=len(incorrect_titles) == 0,
                 issues=incorrect_titles
             )
         except Exception as e:
             self.logger.error(f"Error in document_title_check: {e}")
             return DocumentCheckResult(
@@ -732,7 +688,6 @@ class FAADocumentChecker(DocumentChecker):
         incorrect_sentences = []
         for paragraph in doc:
-            # Split the paragraph into sentences based on common sentence-ending punctuation
             sentences = re.split(r'(?<=[.!?]) +', paragraph)
             for sentence in sentences:
                 if sentence.endswith('..'):
@@ -750,13 +705,17 @@ class FAADocumentChecker(DocumentChecker):
         incorrect_spacing = []
-        # Regex patterns to find incorrect spacing
         patterns = [
-            (re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE), "Missing space between document type and number"),
-            (re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE), "Missing space after section symbol (§)"),
-            (re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE), "Missing space between 'Part' and number"),
-            (re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE), "Missing space before paragraph indication"),
-            (re.compile(r'\s{2,}'), "Double spaces between words")
         ]
         for paragraph in doc:
@@ -784,79 +743,67 @@ class FAADocumentChecker(DocumentChecker):
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
-                # Find definitions like "Federal Aviation Administration (FAA)"
                 defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', sentence)
                 for full_term, acronym in defined_matches:
                     if acronym not in abbreviations:
                         abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
-                # Check for full term usage after definition
                 for acronym, data in abbreviations.items():
                     full_term = data["full_term"]
                     if full_term in sentence:
-                        # Ignore first usage where it's defined
                         if data["defined"]:
-                            data["defined"] = False  # Mark it as now defined
                         else:
-                            # Only flag subsequent occurrences
                             issues.append({
                                 'full_term': full_term,
                                 'acronym': acronym,
                                 'sentence': sentence.strip()
                             })
-        success = len(issues) == 0
-        return DocumentCheckResult(success=success, issues=issues)
     @profile_performance
     def check_date_formats(self, doc: List[str]) -> DocumentCheckResult:
-        """Check for inconsistent date formats while ignoring aviation reference numbers."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         date_issues = []
-        # Patterns to ignore (aviation references)
         ignore_patterns = [
-            r'\bAD \d{4}-\d{2}-\d{2}\b',  # Airworthiness Directive references
-            r'\bSWPM \d{2}-\d{2}-\d{2}\b',  # Standard Wiring Practices Manual references
-            r'\bAMM \d{2}-\d{2}-\d{2}\b',   # Aircraft Maintenance Manual references
-            r'\bSOPM \d{2}-\d{2}-\d{2}\b',  # Standard Operating Procedure references
-            r'\b[A-Z]{2,4} \d{2}-\d{2}-\d{2}\b'  # Generic manual reference pattern
         ]
-        # Combine ignore patterns into one
         ignore_regex = '|'.join(ignore_patterns)
         ignore_pattern = re.compile(ignore_regex)
-        # Correct date pattern: 'Month Day, Year' e.g., 'January 1, 2020'
         correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
-        # Incorrect date patterns
         date_patterns = [
-            (re.compile(r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM/DD/YYYY'"),
-            (re.compile(r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM-DD-YYYY'"),
-            (re.compile(r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'YYYY-MM-DD'")
         ]
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
-                # First, identify and temporarily remove text that should be ignored
                 ignored_matches = list(ignore_pattern.finditer(sentence))
                 working_sentence = sentence
-                # Replace ignored patterns with placeholders
                 for match in reversed(ignored_matches):
                     start, end = match.span()
                     working_sentence = working_sentence[:start] + 'X' * (end - start) + working_sentence[end:]
-                # Now check for date patterns in the modified sentence
                 for pattern, issue in date_patterns:
                     matches = pattern.finditer(working_sentence)
                     for match in matches:
-                        # Get the original text from the match position
                         original_date = sentence[match.start():match.end()]
                         date_issues.append({
                             'date': original_date,
@@ -864,8 +811,7 @@ class FAADocumentChecker(DocumentChecker):
                             'sentence': sentence.strip()
                         })
-        success = len(date_issues) == 0
-        return DocumentCheckResult(success=success, issues=date_issues)
     @profile_performance
     def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
@@ -890,30 +836,18 @@ class FAADocumentChecker(DocumentChecker):
                             'sentence': sentence.strip()
                         })
-        success = len(issues) == 0
-        return DocumentCheckResult(success=success, issues=issues)
     def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
-        """
-        Run all checks on the document.
-        Args:
-            doc_path (str): Path to the document.
-            doc_type (str): Type of the document.
-            template_type (str, optional): Template type, if applicable.
-        Returns:
-            Dict[str, DocumentCheckResult]: Dictionary of check names to results.
-        """
         # Read the document
         doc = self.extract_paragraphs(doc_path)
-        # Retrieve any specific flags
         checks_config = self.config_manager.config['document_types'].get(doc_type, {})
         skip_title_check = checks_config.get('skip_title_check', False)
-        # Run checks
         results = {}
         results['heading_title_check'] = self.heading_title_check(doc, doc_type)
         results['heading_title_period_check'] = self.heading_title_period_check(doc, doc_type)
@@ -935,18 +869,10 @@ class FAADocumentChecker(DocumentChecker):
         return results
-@dataclass
-class DocumentCheckResult:
-    """Structured result for document checks."""
-    success: bool
-    issues: List[Dict[str, Any]]
-    details: Optional[Dict[str, Any]] = None
-def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
-    """Format check results into a Markdown string for display."""
     output = []
-    # Add header with timestamp
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     output.extend([
         f"# Document Check Results - {current_time}",
@@ -954,7 +880,6 @@ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str)
         "---\n"
     ])
-    # Count issues
     total_issues = sum(1 for r in results.values() if not r.success)
     if total_issues == 0:
@@ -963,73 +888,28 @@ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str)
     output.append(f"❗ Found issues in {total_issues} check categories\n")
-    # Define check categories and their display names
     check_categories = {
-        'heading_title_check': {
-            'title': '📋 Required Headings',
-            'priority': 1
-        },
-        'heading_title_period_check': {
-            'title': '🔍 Heading Period Usage',
-            'priority': 1
-        },
-        'acronym_check': {
-            'title': '📝 Acronym Definitions',
-            'priority': 2
-        },
-        'terminology_check': {
-            'title': '📖 Terminology Usage',
-            'priority': 2
-        },
-        'section_symbol_usage_check': {
-            'title': '§ Section Symbol Usage',
-            'priority': 2
-        },
-        'caption_check_table': {
-            'title': '📊 Table Captions',
-            'priority': 3
-        },
-        'caption_check_figure': {
-            'title': '🖼️ Figure Captions',
-            'priority': 3
-        },
-        'table_figure_reference_check': {
-            'title': '🔗 Table/Figure References',
-            'priority': 3
-        },
-        'document_title_check': {
-            'title': '📑 Document Title Format',
-            'priority': 1
-        },
-        'double_period_check': {
-            'title': '⚡ Double Periods',
-            'priority': 4
-        },
-        'spacing_check': {
-            'title': '⌨️ Spacing Issues',
-            'priority': 4
-        },
-        'abbreviation_usage_check': {
-            'title': '📎 Abbreviation Usage',
-            'priority': 3
-        },
-        'date_formats_check': {
-            'title': '📅 Date Formats',
-            'priority': 3
-        },
-        'placeholders_check': {
-            'title': '🚩 Placeholder Content',
-            'priority': 1
-        }
     }
-    # Sort checks by priority
     sorted_checks = sorted(
         [(name, result) for name, result in results.items()],
         key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
     )
-    # Process each check result
     for check_name, result in sorted_checks:
         if not result.success:
             category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
@@ -1037,9 +917,8 @@ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str)
             output.append(f"### {category['title']}")
             if isinstance(result.issues, list):
-                for issue in result.issues[:5]:  # Show first 5 issues
                     if isinstance(issue, dict):
-                        # Format dictionary issues
                         for key, value in issue.items():
                             if isinstance(value, list):
                                 output.extend([f"- {item}" for item in value])
@@ -1048,13 +927,11 @@ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str)
                     else:
                         output.append(f"- {issue}")
-                # Show count of remaining issues
                 if len(result.issues) > 5:
                     output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
-            output.append("")  # Add spacing between sections
-    # Add summary and recommendations
     output.extend([
         "## 📋 Summary and Recommendations",
         "",
@@ -1073,206 +950,299 @@ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str)
     return "\n".join(output)
-def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
-    """Process document and run all checks."""
-    try:
-        # Initialize checker
-        checker = FAADocumentChecker()
-        # Convert file object to BytesIO if needed
-        if isinstance(file_obj, bytes):
-            file_obj = io.BytesIO(file_obj)
-        # Extract paragraphs
-        doc = Document(file_obj)
-        paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
-        # Rewind file object
-        file_obj.seek(0)
-        # Run all checks
-        results = checker.run_all_checks(file_obj, doc_type, template_type)
-        # Format results for display
-        return format_check_results(results, doc_type)
-    except Exception as e:
-        logging.error(f"Error processing document: {str(e)}")
-        traceback.print_exc()
-        return f"❌ Error processing document: {str(e)}\n\nPlease ensure the file is a valid .docx document and try again."
-def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
-    """Format the results for display in Gradio."""
-    output = ["# Document Check Results\n"]
-    # Map check names to display titles
-    check_titles = {
-        'heading_check': "Required Headings Check",
-        'heading_period_check': "Heading Period Check",
-        'acronym_check': "Acronym Check",
-        'terminology_check': "Terminology Check",
-        'section_symbol_check': "Section Symbol Usage",
-        'table_caption_check': "Table Caption Format",
-        'figure_caption_check': "Figure Caption Format",
-        'references_check': "Table and Figure References",
-        'title_check': "Document Title Style",
-        'double_period_check': "Double Period Check",
-        'spacing_check': "Spacing Check",
-        'abbreviation_check': "Abbreviation Usage",
-        'date_check': "Date Format Check",
-        'placeholder_check': "Placeholder Check"
-    }
-    for check_name, result in results.items():
-        title = check_titles.get(check_name, check_name.replace('_', ' ').title())
-        output.append(f"## {title}")
-        if result.success:
-            output.append("✅ All checks passed.\n")
-        else:
-            output.append("❌ Issues found:")
-            for issue in result.issues:
-                if isinstance(issue, dict):
-                    for key, value in issue.items():
-                        if isinstance(value, list):
-                            for item in value:
-                                output.append(f"- {item}")
-                        else:
-                            output.append(f"- {key}: {value}")
-                else:
-                    output.append(f"- {issue}")
-            output.append("")
-        if result.details:
-            output.append("Additional Details:")
-            for key, value in result.details.items():
-                if isinstance(value, list):
-                    output.append(f"- {key}:")
-                    for item in value:
-                        output.append(f"  - {item}")
-                else:
-                    output.append(f"- {key}: {value}")
-            output.append("")
-    return "\n".join(output)
-def create_interface():
-    """Create and configure the Gradio interface."""
-    document_types = [
-        "Advisory Circular",
-        "Airworthiness Criteria",
-        "Deviation Memo",
-        "Exemption",
-        "Federal Register Notice",
-        "Order",
-        "Policy Statement",
-        "Rule",
-        "Special Condition",
-        "Technical Standard Order",
-        "Other"
-    ]
-    template_types = ["Short AC template AC", "Long AC template AC"]
-    # Custom CSS for better styling
-    custom_css = """
-    .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-    }
-    .container {
-        max-width: 900px;
-        margin: auto;
-    }
-    .alert {
-        padding: 1rem;
-        margin-bottom: 1rem;
-        border-radius: 0.5rem;
-        background-color: #f8f9fa;
-        border: 1px solid #dee2e6;
-    }
-    """
-    with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
-        gr.Markdown(
-            """
-            # 📑 Document Checker Tool
-            ### Purpose
-            This tool checks Word documents for compliance with U.S. federal documentation standards.
-            ### How to Use
-            1. Upload your Word document (.docx format)
-            2. Select the document type
-            3. Click "Check Document"
-            > **Note:** Please ensure your document is clean (no track changes or comments)
-            """
-        )
-        with gr.Row():
-            with gr.Column(scale=1):
-                file_input = gr.File(
-                    label="📎 Upload Word Document (.docx)",
-                    file_types=[".docx"],
-                    type="binary"
-                )
-                doc_type = gr.Dropdown(
-                    choices=document_types,
-                    label="📋 Document Type",
-                    value="Advisory Circular",
-                    info="Select the type of document you're checking"
-                )
-                template_type = gr.Radio(
-                    choices=template_types,
-                    label="📑 Template Type",
-                    visible=False,
-                    info="Only applicable for Advisory Circulars"
-                )
-                submit_btn = gr.Button(
-                    "🔍 Check Document",
-                    variant="primary"
-                )
-            with gr.Column(scale=2):
-                results = gr.Markdown(
-                    label="Check Results",
-                    value="Results will appear here after processing...",
-                    elem_classes=["results-panel"]
-                )
-        # Update template type visibility based on document type
-        def update_template_visibility(doc_type):
-            return gr.update(visible=doc_type == "Advisory Circular")
-        doc_type.change(
-            fn=update_template_visibility,
-            inputs=[doc_type],
-            outputs=[template_type]
-        )
-        # Handle document processing
-        submit_btn.click(
-            fn=process_document,
-            inputs=[file_input, doc_type, template_type],
-            outputs=[results]
-        )
-        gr.Markdown(
-            """
-            ### 📌 Important Notes
-            - This tool is in development; you may encounter false positives
-            - For questions or feedback, contact Eric Putnam
-            - Results are not stored or saved
-            """
         )
-    return demo
-# Initialize and launch the interface
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import os
 import traceback
 from datetime import datetime
+import gc
+# Import your document checker classes
+from main import FAADocumentChecker, DocumentCheckResult
+# Core data structures and utilities
 @dataclass
 class DocumentCheckResult:
     """Structured result for document checks."""
         start_time = time.time()
         result = func(*args, **kwargs)
         end_time = time.time()
         logger = args[0].logger if hasattr(args[0], 'logger') else logging.getLogger(__name__)
         logger.info(
             f"Performance: {func.__name__} took {end_time - start_time:.4f} seconds"
 class DocumentCheckerConfig:
     """Configuration management for document checks."""
     def __init__(self, config_path: Optional[str] = None):
         self.config = self._load_config(config_path)
         self.logger = self._setup_logger()
                     ],
                     "skip_title_check": False
                 },
+                "Airworthiness Criteria": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                },
+                "Deviation Memo": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                },
+                "Exemption": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                },
+                "Rule": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                },
+                "Special Condition": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                },
                 "Other": {
                     "required_headings": [],
                     "skip_title_check": True
             return []
 class FAADocumentChecker(DocumentChecker):
+    """Main document checker implementation with all check methods."""
     def __init__(self, config_path: Optional[str] = None):
         super().__init__(config_path)
+    @profile_performance
     def heading_title_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
         """Check headings for a specific document type."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        checks = self.config_manager.config['document_types'].get(doc_type, {})
         required_headings = checks.get('required_headings', [])
         headings_found = []
         required_headings_set = set(required_headings)
         for para in doc:
             para_strip = para.strip()
             if para_strip in required_headings_set:
                 headings_found.append(para_strip)
         all_headings_present = set(headings_found) == required_headings_set
         issues = []
         if not all_headings_present:
             missing_headings = required_headings_set - set(headings_found)
                 'required_headings': required_headings
             }
         )
     @profile_performance
     def heading_title_period_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
+        """Check if headings end with periods according to document type requirements."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         period_required = {
             "Advisory Circular": True,
             "Airworthiness Criteria": False,
             "Other": False
         }
         should_have_period = period_required.get(doc_type, False)
         checks = self.config_manager.config['document_types'].get(doc_type, {})
         required_headings = checks.get('required_headings', [])
         required_headings_set = set(required_headings)
         for para in doc:
             para_strip = para.strip()
             if para_strip in required_headings_set:
                 ends_with_period = para_strip.endswith('.')
                         'needs_period': should_have_period
                     })
         return DocumentCheckResult(
+            success=len(issues) == 0,
+            issues=issues,
+            details={
+                'document_type': doc_type,
+                'periods_required': should_have_period,
+                'checked_headings': checked_headings
+            }
+        )
     @profile_performance
     def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
+        """Check if acronyms are defined at their first use."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         defined_acronyms = set()
+        first_occurrences = {}
+        heading_words = {
+            'INFORMATION', 'GENERAL', 'SUMMARY', 'INTRODUCTION', 'BACKGROUND',
+            'DISCUSSION', 'CONCLUSION', 'APPENDIX', 'CHAPTER', 'SECTION',
+            'PURPOSE', 'APPLICABILITY', 'CANCELLATION', 'DEFINITION', 'REQUIREMENTS',
+            'AUTHORITY', 'POLICY', 'SCOPE', 'RELATED', 'MATERIAL', 'DISTRIBUTION',
+            'EXPLANATION', 'PROCEDURES', 'NOTE', 'WARNING', 'CAUTION', 'EXCEPTION',
+            'GROUPS', 'PARTS', 'TABLE', 'FIGURE', 'REFERENCES', 'DEFINITIONS'
+        }
+        predefined_acronyms = {
+            'CFR', 'U.S.', 'USA', 'US', 'U.S.C', 'e.g.', 'i.e.', 'FAQ', 'No.', 'ZIP', 'PDF', 'SSN',
+            'DC', 'MA', 'WA', 'TX', 'MO'
+        }
+        defined_acronyms.update(predefined_acronyms)
+        defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
+        acronym_pattern = re.compile(r'\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
         for paragraph in doc:
+            words = paragraph.strip().split()
+            if all(word.isupper() for word in words) and any(word in heading_words for word in words):
+                continue
             defined_matches = defined_pattern.findall(paragraph)
             for full_term, acronym in defined_matches:
                 defined_acronyms.add(acronym)
                 if acronym in first_occurrences:
                     del first_occurrences[acronym]
+            usage_matches = acronym_pattern.finditer(paragraph)
+            for match in usage_matches:
+                acronym = match.group()
+                if (acronym not in defined_acronyms and
+                    acronym not in heading_words and
+                    not any(not c.isalpha() for c in acronym) and
+                    len(acronym) <= 10):
                     if acronym not in first_occurrences:
                         sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                         for sentence in sentences:
                             if acronym in sentence:
+                                if not (sentence.isupper() and any(word in heading_words for word in sentence.split())):
+                                    first_occurrences[acronym] = {
+                                        'acronym': acronym,
+                                        'sentence': sentence.strip()
+                                    }
                                 break
         undefined_acronyms = list(first_occurrences.values())
         success = len(undefined_acronyms) == 0
         issues = undefined_acronyms if not success else []
+        return DocumentCheckResult(success=success, issues=issues)
     @profile_performance
     def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
+        """Check document terminology for consistency and preferred terms."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         term_replacements = {
             r'\bUSC\b': 'U.S.C.',
             r'\bCFR Part\b': 'CFR part',
             r'\bflight crew\b': 'flightcrew'
         }
         prohibited_phrases = [
             r'\babove\b',
             r'\bbelow\b',
+            r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b'
         ]
         issues = []
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
                 for incorrect_pattern, correct_term in term_replacements.items():
                     matches = re.finditer(incorrect_pattern, sentence)
                     for match in matches:
                             'sentence': sentence.strip()
                         })
                 for phrase_pattern in prohibited_phrases:
                     match = re.search(phrase_pattern, sentence, re.IGNORECASE)
                     if match:
                             'sentence': sentence.strip()
                         })
+        return DocumentCheckResult(success=len(issues) == 0, issues=issues)
     @profile_performance
     def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for correct usage of section symbols (§)."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         issues = []
         sentences_starting_with_section_symbol = []
         incorrect_14_CFR_section_symbol_usage = []
         single_section_symbol_multiple_sections = []
         missing_section_symbol_in_multiple_sections = []
         pattern_14_CFR_section = re.compile(r'\b14 CFR §\s*\d+\.\d+\b')
         pattern_single_section_symbol_and = re.compile(r'§\s*\d+\.\d+\s+and\s+\d+\.\d+')
         pattern_single_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+\d+\.\d+')
         pattern_single_section_symbol_through = re.compile(r'§\s*\d+\.\d+\s+through\s+\d+\.\d+')
         pattern_missing_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+')
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
                 if sentence.strip().startswith('§'):
                     sentences_starting_with_section_symbol.append(sentence.strip())
             matches_14_CFR = pattern_14_CFR_section.findall(paragraph)
+            incorrect_14_CFR_section_symbol_usage.extend(matches_14_CFR)
             matches_and = pattern_single_section_symbol_and.findall(paragraph)
+            single_section_symbol_multiple_sections.extend(matches_and)
             matches_or = pattern_single_section_symbol_or.findall(paragraph)
+            single_section_symbol_multiple_sections.extend(matches_or)
             matches_through = pattern_single_section_symbol_through.findall(paragraph)
+            single_section_symbol_multiple_sections.extend(matches_through)
             matches_missing_or = pattern_missing_section_symbol_or.findall(paragraph)
+            missing_section_symbol_in_multiple_sections.extend(matches_missing_or)
         if sentences_starting_with_section_symbol:
             issues.append({
                 'matches': missing_section_symbol_in_multiple_sections
             })
+        return DocumentCheckResult(success=len(issues) == 0, issues=issues)
     @profile_performance
     def caption_check(self, doc: List[str], doc_type: str, caption_type: str) -> DocumentCheckResult:
+        """Check for correctly formatted captions."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         if doc_type in ["Advisory Circular", "Order"]:
             caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
             correct_format = f"{caption_type} X-Y"
         in_toc = False
         for paragraph in doc:
             if "Table of Contents" in paragraph or "Contents" in paragraph:
                 in_toc = True
                 continue
             elif in_toc and paragraph.strip() == "":
+                in_toc = False
             if in_toc:
                 continue
             paragraph_strip = paragraph.strip()
             if paragraph_strip.lower().startswith(caption_type.lower()):
                 if not caption_pattern.match(paragraph_strip):
                         'correct_format': correct_format
                     })
+        return DocumentCheckResult(success=len(incorrect_captions) == 0, issues=incorrect_captions)
     @profile_performance
     def table_figure_reference_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
+        """Check for correct references to tables and figures."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         incorrect_references = []
         if doc_type in ["Advisory Circular", "Order"]:
             table_pattern = r'\b[Tt]able\s+\d+-\d+\b'
             figure_pattern = r'\b[Ff]igure\s+\d+-\d+\b'
             correct_mid_table_format = "table X-Y"
         for paragraph in doc:
             paragraph_strip = paragraph.strip()
             starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
             if not starts_with_table_or_figure:
                 sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                 for sentence in sentences:
                     matches = table_ref_pattern.finditer(sentence)
                     for match in matches:
                         ref = match.group()
                         text_before = sentence[:match.start()].strip()
                         is_sentence_start = text_before == ""
                         if is_sentence_start and not ref.startswith('Table'):
                             incorrect_references.append({
                                 'incorrect_ref': ref,
                     matches = figure_ref_pattern.finditer(sentence)
                     for match in matches:
                         ref = match.group()
                         text_before = sentence[:match.start()].strip()
                         is_sentence_start = text_before == ""
                         if is_sentence_start and not ref.startswith('Figure'):
                             incorrect_references.append({
                                 'incorrect_ref': ref,
                                 'issue': "Figure reference within sentence should be lowercase"
                             })
+        return DocumentCheckResult(success=len(incorrect_references) == 0, issues=incorrect_references)
     @profile_performance
+    def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
         """Check for correct formatting of document titles."""
         try:
             if isinstance(doc_path, (str, bytes, io.BytesIO)):
                 doc = Document(doc_path)
             else:
                     success=False,
                     issues=[{'error': 'Invalid document input type'}]
                 )
+            incorrect_titles = []
             formatting_rules = {
                 "Advisory Circular": {"italics": True, "quotes": False},
                 "Airworthiness Criteria": {"italics": False, "quotes": True},
                 "Technical Standard Order": {"italics": False, "quotes": True},
                 "Other": {"italics": False, "quotes": False}
             }
             if doc_type not in formatting_rules:
                 self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
                 return DocumentCheckResult(success=True, issues=[])
             required_format = formatting_rules[doc_type]
             ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
             for paragraph in doc.paragraphs:
                 text = paragraph.text
                 matches = ac_pattern.finditer(text)
                 for match in matches:
                     title_text = match.group(2).strip()
                     title_start = match.start(2)
                     title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
                     title_is_italicized = False
                     current_pos = 0
                     for run in paragraph.runs:
                             title_is_italicized = run.italic
                             break
                         current_pos += run_length
                     formatting_incorrect = False
                     issue_message = []
                     if required_format["italics"] and not title_is_italicized:
                         formatting_incorrect = True
                         issue_message.append("should be italicized")
                     elif not required_format["italics"] and title_is_italicized:
                         formatting_incorrect = True
                         issue_message.append("should not be italicized")
                     if required_format["quotes"] and not title_in_quotes:
                         formatting_incorrect = True
                         issue_message.append("should be in quotes")
                     elif not required_format["quotes"] and title_in_quotes:
                         formatting_incorrect = True
                         issue_message.append("should not be in quotes")
                     if formatting_incorrect:
                         incorrect_titles.append({
                             'text': title_text,
                             'issue': ', '.join(issue_message),
                             'sentence': text.strip()
                         })
             return DocumentCheckResult(
                 success=len(incorrect_titles) == 0,
                 issues=incorrect_titles
             )
         except Exception as e:
             self.logger.error(f"Error in document_title_check: {e}")
             return DocumentCheckResult(
         incorrect_sentences = []
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?]) +', paragraph)
             for sentence in sentences:
                 if sentence.endswith('..'):
         incorrect_spacing = []
         patterns = [
+            (re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE),
+             "Missing space between document type and number"),
+            (re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE),
+             "Missing space after section symbol (§)"),
+            (re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE),
+             "Missing space between 'Part' and number"),
+            (re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE),
+             "Missing space before paragraph indication"),
+            (re.compile(r'\s{2,}'),
+             "Double spaces between words")
         ]
         for paragraph in doc:
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
                 defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', sentence)
                 for full_term, acronym in defined_matches:
                     if acronym not in abbreviations:
                         abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
                 for acronym, data in abbreviations.items():
                     full_term = data["full_term"]
                     if full_term in sentence:
                         if data["defined"]:
+                            data["defined"] = False
                         else:
                             issues.append({
                                 'full_term': full_term,
                                 'acronym': acronym,
                                 'sentence': sentence.strip()
                             })
+        return DocumentCheckResult(success=len(issues) == 0, issues=issues)
     @profile_performance
     def check_date_formats(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for inconsistent date formats."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         date_issues = []
         ignore_patterns = [
+            r'\bAD \d{4}-\d{2}-\d{2}\b',
+            r'\bSWPM \d{2}-\d{2}-\d{2}\b',
+            r'\bAMM \d{2}-\d{2}-\d{2}\b',
+            r'\bSOPM \d{2}-\d{2}-\d{2}\b',
+            r'\b[A-Z]{2,4} \d{2}-\d{2}-\d{2}\b'
         ]
         ignore_regex = '|'.join(ignore_patterns)
         ignore_pattern = re.compile(ignore_regex)
         correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
         date_patterns = [
+            (re.compile(r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])'),
+             "Use 'Month Day, Year' format instead of 'MM/DD/YYYY'"),
+            (re.compile(r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])'),
+             "Use 'Month Day, Year' format instead of 'MM-DD-YYYY'"),
+            (re.compile(r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])'),
+             "Use 'Month Day, Year' format instead of 'YYYY-MM-DD'")
         ]
         for paragraph in doc:
             sentences = re.split(r'(?<=[.!?])\s+', paragraph)
             for sentence in sentences:
                 ignored_matches = list(ignore_pattern.finditer(sentence))
                 working_sentence = sentence
                 for match in reversed(ignored_matches):
                     start, end = match.span()
                     working_sentence = working_sentence[:start] + 'X' * (end - start) + working_sentence[end:]
                 for pattern, issue in date_patterns:
                     matches = pattern.finditer(working_sentence)
                     for match in matches:
                         original_date = sentence[match.start():match.end()]
                         date_issues.append({
                             'date': original_date,
                             'sentence': sentence.strip()
                         })
+        return DocumentCheckResult(success=len(date_issues) == 0, issues=date_issues)
     @profile_performance
     def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
                             'sentence': sentence.strip()
                         })
+        return DocumentCheckResult(success=len(issues) == 0, issues=issues)
     def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
+        """Run all document checks."""
         # Read the document
         doc = self.extract_paragraphs(doc_path)
+        # Get configuration flags
         checks_config = self.config_manager.config['document_types'].get(doc_type, {})
         skip_title_check = checks_config.get('skip_title_check', False)
+        # Run all checks
         results = {}
         results['heading_title_check'] = self.heading_title_check(doc, doc_type)
         results['heading_title_period_check'] = self.heading_title_period_check(doc, doc_type)
         return results
+def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
+    """Format check results into a Markdown string for Gradio display."""
     output = []
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     output.extend([
         f"# Document Check Results - {current_time}",
         "---\n"
     ])
     total_issues = sum(1 for r in results.values() if not r.success)
     if total_issues == 0:
     output.append(f"❗ Found issues in {total_issues} check categories\n")
     check_categories = {
+        'heading_title_check': {'title': '📋 Required Headings', 'priority': 1},
+        'heading_title_period_check': {'title': '🔍 Heading Period Usage', 'priority': 1},
+        'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 2},
+        'terminology_check': {'title': '📖 Terminology Usage', 'priority': 2},
+        'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
+        'caption_check_table': {'title': '📊 Table Captions', 'priority': 3},
+        'caption_check_figure': {'title': '🖼️ Figure Captions', 'priority': 3},
+        'table_figure_reference_check': {'title': '🔗 Table/Figure References', 'priority': 3},
+        'document_title_check': {'title': '📑 Document Title Format', 'priority': 1},
+        'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
+        'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
+        'abbreviation_usage_check': {'title': '📎 Abbreviation Usage', 'priority': 3},
+        'date_formats_check': {'title': '📅 Date Formats', 'priority': 3},
+        'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 1}
     }
     sorted_checks = sorted(
         [(name, result) for name, result in results.items()],
         key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
     )
     for check_name, result in sorted_checks:
         if not result.success:
             category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
             output.append(f"### {category['title']}")
             if isinstance(result.issues, list):
+                for issue in result.issues[:5]:
                     if isinstance(issue, dict):
                         for key, value in issue.items():
                             if isinstance(value, list):
                                 output.extend([f"- {item}" for item in value])
                     else:
                         output.append(f"- {issue}")
                 if len(result.issues) > 5:
                     output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
+            output.append("")
     output.extend([
         "## 📋 Summary and Recommendations",
         "",
     return "\n".join(output)
+def create_interface():
+    def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
+        """Process document and run all checks."""
+        try:
+            checker = FAADocumentChecker()
+            if isinstance(file_obj, bytes):
+                file_obj = io.BytesIO(file_obj)
+            results = checker.run_all_checks(file_obj, doc_type, template_type)
+            return format_markdown_results(results, doc_type)
+        except Exception as e:
+            logging.error(f"Error processing document: {str(e)}")
+            traceback.print_exc()
+            return f"""
+        # ❌ Error Processing Document
+        **Error Details:** {str(e)}
+        Please ensure:
+        1. The file is a valid .docx document
+        2. The file is not corrupted or password protected
+        3. The file is properly formatted
+        Try again after checking these issues. If the problem persists, contact support.
+        """
+    def create_interface():
+        """Create and configure the Gradio interface."""
+        document_types = [
+            "Advisory Circular",
+            "Airworthiness Criteria",
+            "Deviation Memo",
+            "Exemption",
+            "Federal Register Notice",
+            "Order",
+            "Policy Statement",
+            "Rule",
+            "Special Condition",
+            "Technical Standard Order",
+            "Other"
+        ]
+        template_types = ["Short AC template AC", "Long AC template AC"]
+        custom_css = """
+        .gradio-container {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+        }
+        .container {
+            max-width: 900px;
+            margin: auto;
+        }
+        .alert {
+            padding: 1rem;
+            margin-bottom: 1rem;
+            border-radius: 0.5rem;
+            background-color: #f8f9fa;
+            border: 1px solid #dee2e6;
+        }
+        .results-panel {
+            max-height: 800px;
+            overflow-y: auto;
+            padding: 1rem;
+            background-color: #ffffff;
+            border-radius: 0.5rem;
+            border: 1px solid #e9ecef;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        }
+        .info-box {
+            background-color: #e7f3ff;
+            padding: 1rem;
+            border-radius: 0.5rem;
+            margin: 1rem 0;
+        }
+        .warning-box {
+            background-color: #fff3cd;
+            padding: 1rem;
+            border-radius: 0.5rem;
+            margin: 1rem 0;
+        }
+        """
+        with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
+            gr.Markdown(
+                """
+                # 📑 Document Checker Tool
+                ### Purpose
+                This tool checks Word documents for compliance with U.S. federal documentation standards.
+                ### Features
+                - ✓ Heading format and structure checks
+                - ✓ Terminology and style verification
+                - ✓ Figure and table caption validation
+                - ✓ Section symbol and abbreviation usage
+                - ✓ Date format standardization
+                ### Supported Document Types
+                - Advisory Circulars
+                - Orders
+                - Policy Statements
+                - Federal Register Notices
+                - Technical Standard Orders
+                - And more...
+                """
+            )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    with gr.Group():
+                        file_input = gr.File(
+                            label="📎 Upload Word Document (.docx)",
+                            file_types=[".docx"],
+                            type="binary",
+                            elem_classes="file-input"
+                        )
+                        doc_type = gr.Dropdown(
+                            choices=document_types,
+                            label="📋 Document Type",
+                            value="Advisory Circular",
+                            info="Select the type of document you're checking",
+                            elem_classes="doc-type-select"
+                        )
+                        template_type = gr.Radio(
+                            choices=template_types,
+                            label="📑 Template Type",
+                            visible=False,
+                            info="Only applicable for Advisory Circulars",
+                            elem_classes="template-type-select"
+                        )
+                        submit_btn = gr.Button(
+                            "🔍 Check Document",
+                            variant="primary",
+                            elem_classes="submit-button"
+                        )
+                    with gr.Group():
+                        gr.Markdown(
+                            """
+                            ### 📌 Quick Tips
+                            1. Ensure document is clean (no track changes)
+                            2. Save any pending changes before upload
+                            3. Check document type selection
+                            4. Review results in priority order
+                            """,
+                            elem_classes="tips-section"
+                        )
+                with gr.Column(scale=2):
+                    results = gr.Markdown(
+                        label="Check Results",
+                        value="Results will appear here after processing...",
+                        elem_classes=["results-panel"]
+                    )
+            def update_template_visibility(doc_type):
+                return gr.update(visible=doc_type == "Advisory Circular")
+            doc_type.change(
+                fn=update_template_visibility,
+                inputs=[doc_type],
+                outputs=[template_type]
+            )
+            submit_btn.click(
+                fn=process_document,
+                inputs=[file_input, doc_type, template_type],
+                outputs=[results]
+            )
+            gr.Markdown(
+                """
+                ### 📌 Important Notes
+                - This tool is in development; you may encounter false positives
+                - For questions or feedback, contact Eric Putnam
+                - Results are not stored or saved
+                ### 🔑 Key Benefits
+                - Saves time on manual document review
+                - Ensures consistency across documents
+                - Helps maintain compliance with standards
+                - Identifies common issues early
+                ### 💡 Tips for Best Results
+                1. Address high-priority issues first
+                2. Use search/replace for consistent fixes
+                3. Re-run checks after making changes
+                4. Keep your document templates updated
+                """
+            )
+        return demo
+    def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
+        """Format check results into a Markdown string for Gradio display."""
+        output = []
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        output.extend([
+            f"# Document Check Results - {current_time}",
+            f"## Document Type: {doc_type}",
+            "---\n"
+        ])
+        total_issues = sum(1 for r in results.values() if not r.success)
+        if total_issues == 0:
+            output.append("✅ **All checks passed successfully!**\n")
+            return "\n".join(output)
+        output.append(f"❗ Found issues in {total_issues} check categories\n")
+        check_categories = {
+            'heading_title_check': {'title': '📋 Required Headings', 'priority': 1},
+            'heading_title_period_check': {'title': '🔍 Heading Period Usage', 'priority': 1},
+            'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 2},
+            'terminology_check': {'title': '📖 Terminology Usage', 'priority': 2},
+            'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
+            'caption_check_table': {'title': '📊 Table Captions', 'priority': 3},
+            'caption_check_figure': {'title': '🖼️ Figure Captions', 'priority': 3},
+            'table_figure_reference_check': {'title': '🔗 Table/Figure References', 'priority': 3},
+            'document_title_check': {'title': '📑 Document Title Format', 'priority': 1},
+            'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
+            'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
+            'abbreviation_usage_check': {'title': '📎 Abbreviation Usage', 'priority': 3},
+            'date_formats_check': {'title': '📅 Date Formats', 'priority': 3},
+            'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 1}
+        }
+        sorted_checks = sorted(
+            [(name, result) for name, result in results.items()],
+            key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
         )
+        for check_name, result in sorted_checks:
+            if not result.success:
+                category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
+                output.append(f"### {category['title']}")
+                if isinstance(result.issues, list):
+                    for issue in result.issues[:5]:
+                        if isinstance(issue, dict):
+                            for key, value in issue.items():
+                                if isinstance(value, list):
+                                    output.extend([f"- {item}" for item in value])
+                                else:
+                                    output.append(f"- {key}: {value}")
+                        else:
+                            output.append(f"- {issue}")
+                    if len(result.issues) > 5:
+                        output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
+                output.append("")
+        output.extend([
+            "## 📋 Summary and Recommendations",
+            "",
+            "### Priority Order for Fixes:",
+            "1. 🔴 Critical: Heading formats, required content, and document structure",
+            "2. 🟡 Important: Terminology, acronyms, and references",
+            "3. 🟢 Standard: Formatting, spacing, and style consistency",
+            "",
+            "### Next Steps:",
+            "1. Address issues in priority order",
+            "2. Use search/replace for consistent fixes",
+            "3. Re-run checker after making changes",
+            "4. Update your document template if needed",
+            ""
+        ])
+        return "\n".join(output)
+    # Initialize and launch the interface
+    if __name__ == "__main__":
+        # Setup logging
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        # Create and launch the interface
+        demo = create_interface()
+        demo.launch(
+            share=False,  # Set to True if you want to generate a public link
+            server_name="0.0.0.0",  # Allows external access
+            server_port=7860,  # Default Gradio port
+            debug=True
+        )