Spaces:

Hoctar77
/

DocumentCheckerTool

Running

App Files Files Community

Hoctar77 commited on Nov 7, 2024

Commit

dc51583

verified ·

1 Parent(s): 15d6396

Update app.py

Browse files

Files changed (1) hide show

app.py +987 -579

app.py CHANGED Viewed

@@ -1,609 +1,1018 @@
 import gradio as gr
 import logging
 import re
 from docx import Document
 import io
 import traceback
-def heading_title_check(paragraphs, required_headings):
-    headings_found = []
-    required_headings_set = set(required_headings)
-    for para in paragraphs:
-        para_strip = para.strip()
-        if para_strip in required_headings_set:
-            headings_found.append(para_strip)
-    all_headings_present = set(headings_found) == required_headings_set
-    return all_headings_present, headings_found
-def acronym_check(paragraphs):
-    defined_acronyms = set()
-    undefined_acronyms = set()
-    acronym_pattern = re.compile(r'(\b[A-Z]{2,}\b)')
-    defined_pattern = re.compile(r'(\b\w+\b) \((\b[A-Z]{2,}\b)\)')
-    for paragraph in paragraphs:
-        defined_matches = defined_pattern.findall(paragraph)
-        for full_term, acronym in defined_matches:
-            defined_acronyms.add(acronym)
-        usage_matches = acronym_pattern.findall(paragraph)
-        for acronym in usage_matches:
-            if acronym not in defined_acronyms:
-                undefined_acronyms.add(acronym)
-    return len(undefined_acronyms) == 0, undefined_acronyms
-def legal_check(paragraphs):
-    incorrect_variations = {
-        r"\bUSC\b": "U.S.C.",
-        r"\bCFR Part\b": "CFR part",
-        r"\bC\.F\.R\.\b": "CFR",
-        r"\bWe\b": "The FAA",
-        r"\bwe\b": "the FAA",
-        r"\bcancelled\b": "canceled",
-        r"\bshall\b": "must or will",
-        r"\b&\b": "and"
-    }
-    incorrect_legal_references = []
-    for paragraph in paragraphs:
-        title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
-        matches = re.finditer(title_14_pattern, paragraph)
-        for match in matches:
-            prefix = match.group('prefix')
-            current_title = match.group('title')
-            if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
-                if current_title != "Title 14":
-                    incorrect_legal_references.append((current_title, "Title 14"))
-            elif prefix.isspace() and current_title != "title 14":
-                incorrect_legal_references.append((current_title, "title 14"))
-        for incorrect_pattern, correct_term in incorrect_variations.items():
-            matches = re.finditer(incorrect_pattern, paragraph)
-            for match in matches:
-                incorrect_legal_references.append((match.group(), correct_term))
-    return len(incorrect_legal_references) == 0, incorrect_legal_references
-def table_caption_check(paragraphs, doc_type):
-    if doc_type in ["Advisory Circular", "Order"]:
-        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
-    else:
-        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
-    incorrect_captions = []
-    for paragraph in paragraphs:
-        paragraph_strip = paragraph.strip()
-        if paragraph_strip.lower().startswith("table"):
-            if not table_caption_pattern.match(paragraph_strip):
-                incorrect_captions.append(paragraph_strip)
-    return len(incorrect_captions) == 0, incorrect_captions
-def figure_caption_check(paragraphs, doc_type):
-    if doc_type in ["Advisory Circular", "Order"]:
-        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
-    else:
-        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
-    incorrect_fig_captions = []
-    for paragraph in paragraphs:
-        paragraph_strip = paragraph.strip()
-        if paragraph_strip.lower().startswith("figure"):
-            if not figure_caption_pattern.match(paragraph_strip):
-                incorrect_fig_captions.append(paragraph_strip)
-    return len(incorrect_fig_captions) == 0, incorrect_fig_captions
-def table_figure_reference_check(paragraphs, doc_type):
-    incorrect_table_figure_references = []
-    if doc_type in ["Advisory Circular", "Order"]:
-        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
-        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
-    else:
-        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
-        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
-    for paragraph in paragraphs:
-        paragraph_strip = paragraph.strip()
-        starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
-        if not starts_with_table_or_figure:
-            incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
-            if incorrect_tables:
-                incorrect_table_figure_references.extend(incorrect_tables)
-            incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
-            if incorrect_figures:
-                incorrect_table_figure_references.extend(incorrect_figures)
-    return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
-def document_title_check(doc_path, doc_type):
-    incorrect_titles = []
-    doc = Document(doc_path)
-    # Updated pattern to capture titles correctly
-    ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
-    # Define formatting rules for different document types
-    formatting_rules = {
-        "Advisory Circular": {"italics": True, "quotes": False},
-        "Airworthiness Criteria": {"italics": False, "quotes": True},
-        "Deviation Memo": {"italics": False, "quotes": True},
-        "Exemption": {"italics": False, "quotes": True},
-        "Federal Register Notice": {"italics": False, "quotes": True},
-        "Handbook/Manual": {"italics": False, "quotes": False},
-        "Order": {"italics": False, "quotes": True},
-        "Policy Statement": {"italics": False, "quotes": False},
-        "Rule": {"italics": False, "quotes": True},
-        "Special Condition": {"italics": False, "quotes": True},
-        "Technical Standard Order": {"italics": False, "quotes": True},
-        "Other": {"italics": False, "quotes": False}
-    }
-    # Get the rules for the current document type
-    if doc_type not in formatting_rules:
-        raise ValueError(f"Unsupported document type: {doc_type}")
-    required_format = formatting_rules[doc_type]
-    for paragraph in doc.paragraphs:
-        text = paragraph.text
-        matches = ac_pattern.finditer(text)
-        for match in matches:
-            full_match = match.group(0)
-            title_text = match.group(1).strip()
-            # Get the position where the title starts
-            title_start = match.start(1)
-            # Check for any type of quotation marks, including smart quotes
-            title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
-            # Check the formatting of the title
-            title_is_italicized = False
-            current_pos = 0
-            for run in paragraph.runs:
-                run_length = len(run.text)
-                if current_pos <= title_start < current_pos + run_length:
-                    relative_pos = title_start - current_pos
-                    title_is_italicized = run.italic
-                    break
-                current_pos += run_length
-            # Check if formatting matches the required format
-            formatting_incorrect = False
-            issue_message = []
-            # Check italics requirement
-            if required_format["italics"] and not title_is_italicized:
-                formatting_incorrect = True
-                issue_message.append("should be italicized")
-            elif not required_format["italics"] and title_is_italicized:
-                formatting_incorrect = True
-                issue_message.append("should not be italicized")
-            # Check quotes requirement
-            if required_format["quotes"] and not title_in_quotes:
-                formatting_incorrect = True
-                issue_message.append("should be in quotes")
-            elif not required_format["quotes"] and title_in_quotes:
-                formatting_incorrect = True
-                issue_message.append("should not be in quotes")
-            if formatting_incorrect:
-                incorrect_titles.append({
-                    'text': full_match,
-                    'issue': ', '.join(issue_message)
-                })
-    return len(incorrect_titles) == 0, incorrect_titles
-def get_document_checks(doc_type, template_type):
-    """Return the required headings and other checks based on document type."""
-    document_checks = {
-        "Advisory Circular": {
-            "Short AC template AC": {
-                "required_headings": [
-                    "PURPOSE.",
-                    "APPLICABILITY.",
-                    "CANCELLATION.",
-                    "RELATED MATERIAL.",
-                    "DEFINITION OF KEY TERMS."
-                ]
-            },
-            "Long AC template AC": {
-                "required_headings": [
-                    "Purpose.",
-                    "Applicability.",
-                    "Cancellation.",
-                    "Related Material.",
-                    "Definition of Key Terms."
-                ]
-            }
-        },
-        "Airworthiness Criteria": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Deviation Memo": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Exemption": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Federal Register Notice": {
-            "required_headings": [
-                "Purpose of This Notice",
-                "Audience",
-                "Where can I Find This Notice"
-            ]
-        },
-        "Handbook/Manual": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Order": {
-            "required_headings": [
-                "Purpose of This Order.",
-                "Audience.",
-                "Where to Find This Order."
-            ]
-        },
-        "Policy Statement": {
-            "required_headings": [
-                "SUMMARY",
-                "CURRENT REGULATORY AND ADVISORY MATERIAL",
-                "RELEVANT PAST PRACTICE",
-                "POLICY",
-                "EFFECT OF POLICY",
-                "CONCLUSION"
-            ]
-        },
-        "Rule": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Special Condition": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Technical Standard Order": {
-            "required_headings": [
-                "PURPOSE.",
-                "APPLICABILITY.",
-                "REQUIREMENTS.",
-                "MARKING.",
-                "APPLICATION DATA REQUIREMENTS.",
-                "MANUFACTURER DATA REQUIREMENTS.",
-                "FURNISHED DATA REQUIREMENTS.",
-                "HOW TO GET REFERENCED DOCUMENTS."
-            ]
-        },
-        "Other": {
-            "required_headings": [
-                "N/A"
-            ]
         }
-    }
-    # Add debugging logs
-    logger = logging.getLogger(__name__)
-    logger.info(f"Requested document type: {doc_type}")
-    logger.info(f"Requested template type: {template_type}")
-    if doc_type == "Advisory Circular":
-        checks = document_checks.get(doc_type, {}).get(template_type, {})
-    else:
-        checks = document_checks.get(doc_type, {})
-    logger.info(f"Retrieved checks: {checks}")
-    return checks
-def double_period_check(paragraphs):
-    incorrect_sentences = []
-    for paragraph in paragraphs:
-        sentences = re.split(r'(?<=[.!?]) +', paragraph)
-        for sentence in sentences:
-            if sentence.endswith('..'):
-                incorrect_sentences.append(sentence.strip())
-    return len(incorrect_sentences) == 0, incorrect_sentences
-def spacing_check(paragraphs):
-    incorrect_spacing = []
-    doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
-    section_symbol_pattern = re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE)
-    part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
-    paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
-    double_space_pattern = re.compile(r'\s{2,}')
-    for paragraph in paragraphs:
-        if doc_type_pattern.search(paragraph) or \
-           section_symbol_pattern.search(paragraph) or \
-           part_number_pattern.search(paragraph) or \
-           paragraph_pattern.search(paragraph) or \
-           double_space_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
-    return len(incorrect_spacing) == 0, incorrect_spacing
-def check_prohibited_phrases(paragraphs):
-    prohibited_phrases = [
-        r'\babove\b',
-        r'\bbelow\b',
-        r'\bthere is\b',
-        r'\bthere are\b'
-    ]
-    issues = []
-    for paragraph in paragraphs:
-        for phrase in prohibited_phrases:
-            if re.search(phrase, paragraph, re.IGNORECASE):
-                issues.append((phrase.strip(r'\b'), paragraph.strip()))
-    return issues
-def check_abbreviation_usage(paragraphs):
-    """Check for abbreviation consistency after first definition."""
-    abbreviations = {}
-    issues = []
-    for paragraph in paragraphs:
-        # Find definitions like "Federal Aviation Administration (FAA)"
-        defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
-        for full_term, acronym in defined_matches:
-            if acronym not in abbreviations:
-                abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
-        # Check for full term usage after definition
-        for acronym, data in abbreviations.items():
-            full_term = data["full_term"]
-            if full_term in paragraph:
-                # Ignore first usage where it's defined
-                if data["defined"]:
-                    data["defined"] = False  # Mark it as now defined
-                else:
-                    # Only flag subsequent occurrences
-                    issues.append((full_term, acronym, paragraph.strip()))
-    return issues
-def check_date_formats(paragraphs):
-    """Check for inconsistent date formats."""
-    date_issues = []
-    correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
-    date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b')  # MM/DD/YYYY
-    for paragraph in paragraphs:
-        if date_pattern.search(paragraph):
-            dates = date_pattern.findall(paragraph)
-            for date in dates:
-                if not correct_date_pattern.match(date):
-                    date_issues.append((date, paragraph.strip()))
-    return date_issues
-def check_placeholders(paragraphs):
-    """Check for placeholders that should be removed."""
-    placeholder_phrases = [
-        r'\bTBD\b',
-        r'\bTo be determined\b',
-        r'\bTo be added\b'
-    ]
-    issues = []
-    for paragraph in paragraphs:
-        for phrase in placeholder_phrases:
-            if re.search(phrase, paragraph, re.IGNORECASE):
-                issues.append((phrase.strip(r'\b'), paragraph.strip()))
-    return issues
-def process_document(file_obj, doc_type, template_type):
-    try:
-        doc = Document(file_obj)
-        paragraphs = [para.text for para in doc.paragraphs]
-        required_headings = get_document_checks(doc_type, template_type).get("required_headings", [])
-        # Perform each check with `paragraphs` as input
-        heading_valid, headings_found = heading_title_check(paragraphs, required_headings)
-        acronyms_valid, undefined_acronyms = acronym_check(paragraphs)
-        legal_valid, incorrect_legal_references = legal_check(paragraphs)
-        table_valid, incorrect_captions = table_caption_check(paragraphs, doc_type)
-        figure_valid, incorrect_fig_captions = figure_caption_check(paragraphs, doc_type)
-        references_valid, incorrect_table_figure_references = table_figure_reference_check(paragraphs, doc_type)
-        title_style_valid, incorrect_titles = document_title_check(file_obj, doc_type) if doc_type in ["Advisory Circular", "Order"] else (True, [])
-        double_period_valid, incorrect_sentences = double_period_check(paragraphs)
-        spacing_valid, incorrect_spacing = spacing_check(paragraphs)
-        date_issues = check_date_formats(paragraphs)  # Pass paragraphs here
-        placeholder_issues = check_placeholders(paragraphs)  # Pass paragraphs here
-        # Format results
-        results = format_results_for_gradio(
-            heading_valid=heading_valid, headings_found=headings_found,
-            acronyms_valid=acronyms_valid, undefined_acronyms=undefined_acronyms,
-            legal_valid=legal_valid, incorrect_legal_references=incorrect_legal_references,
-            table_valid=table_valid, incorrect_captions=incorrect_captions,
-            figure_valid=figure_valid, incorrect_fig_captions=incorrect_fig_captions,
-            references_valid=references_valid, incorrect_table_figure_references=incorrect_table_figure_references,
-            title_style_valid=title_style_valid, incorrect_titles=incorrect_titles,
-            double_period_valid=double_period_valid, incorrect_sentences=incorrect_sentences,
-            spacing_valid=spacing_valid, incorrect_spacing=incorrect_spacing,
-            date_issues=date_issues,  # Added date_issues
-            placeholder_issues=placeholder_issues,  # Added placeholder_issues
-            required_headings=required_headings, doc_type=doc_type
-        )
         return results
     except Exception as e:
         print(f"Error in process_document: {str(e)}")
         return f"An error occurred while processing the document: {str(e)}"
-def format_results_for_gradio(**kwargs):
     """Format the results for display in Gradio."""
-    results = []
-    results.append("# Document Check Results\n")
-    # Required Headings Check
-    results.append("## Required Headings Check")
-    if kwargs['heading_valid']:
-        results.append("✅ All required headings are present.\n")
-    else:
-        missing_headings = set(kwargs['required_headings']) - set(kwargs['headings_found'])
-        results.append("❌ Missing Required Headings:")
-        for heading in missing_headings:
-            results.append(f"- {heading}")
-    results.append("")
-    # Acronym Check
-    results.append("## Acronym Check")
-    if kwargs['acronyms_valid']:
-        results.append("✅ All acronyms are properly defined.\n")
-    else:
-        results.append("❌ The following acronyms need to be defined at first use:")
-        for acronym in kwargs['undefined_acronyms']:
-            results.append(f"- {acronym}")
-    results.append("")
-    # Legal Check
-    results.append("## Legal Terminology Check")
-    if kwargs['legal_valid']:
-        results.append("✅ All legal references are properly formatted.\n")
-    else:
-        results.append("❌ Incorrect Legal Terminology:")
-        for incorrect_term, correct_term in kwargs['incorrect_legal_references']:
-            results.append(f"- Use '{correct_term}' instead of '{incorrect_term}'")
-    results.append("")
-    # Table Caption Check
-    results.append("## Table Caption Check")
-    if kwargs['table_valid']:
-        results.append("✅ All table captions are correctly formatted.\n")
-    else:
-        results.append("❌ Incorrect Table Captions:")
-        for caption in kwargs['incorrect_captions']:
-            results.append(f"- {caption}")
-    results.append("")
-    # Figure Caption Check
-    results.append("## Figure Caption Check")
-    if kwargs['figure_valid']:
-        results.append("✅ All figure captions are correctly formatted.\n")
-    else:
-        results.append("❌ Incorrect Figure Captions:")
-        for caption in kwargs['incorrect_fig_captions']:
-            results.append(f"- {caption}")
-    results.append("")
-    # Table and Figure References Check
-    results.append("## Table and Figure References Check")
-    if kwargs['references_valid']:
-        results.append("✅ All table and figure references are correctly formatted.\n")
-    else:
-        results.append("❌ Incorrect Table/Figure References:")
-        for ref in kwargs['incorrect_table_figure_references']:
-            results.append(f"- {ref}")
-    results.append("")
-    # Document Title Style Check
-    results.append("## Document Title Style Check")
-    if kwargs['title_style_valid']:
-        results.append("✅ All document title references are properly styled.\n")
-    else:
-        results.append("❌ Incorrect Document Title Styling:")
-        for title in kwargs['incorrect_titles']:
-            results.append(f"- {title['text']}")
-            results.append(f"  - Issue: {title['issue']}")
-        # Add formatting guidance
-        formatting_notes = {
-            "Advisory Circular": "Document titles should be italicized, not in quotation marks.",
-            "Order": "Document titles should be in quotation marks, not italicized.",
-            "Federal Register Notice": "Document titles should be in quotation marks, not italicized.",
-            "Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
-        }
-        doc_type = kwargs.get('doc_type', 'Unknown')
-        if doc_type in formatting_notes:
-            results.append(f"\nNote: {formatting_notes[doc_type]}")
-        else:
-            results.append("\nNote: Please verify the correct formatting style for this document type.")
-    results.append("")
-    # Double Period Check
-    results.append("## Double Period Check")
-    if kwargs['double_period_valid']:
-        results.append("✅ No double periods found.\n")
-    else:
-        results.append("❌ Sentences found with double periods:")
-        for sentence in kwargs['incorrect_sentences']:
-            results.append(f"- {sentence}")
-    results.append("")
-    # Spacing Check
-    results.append("## Spacing Check")
-    if kwargs['spacing_valid']:
-        results.append("✅ All spacing is correct.\n")
-    else:
-        results.append("❌ Incorrect spacing found in:")
-        for spacing in kwargs['incorrect_spacing']:
-            results.append(f"- {spacing}")
-    results.append("")
-    # Date Format Consistency
-    results.append("## Date Format Consistency")
-    if not kwargs['date_issues']:
-        results.append("✅ All dates are in the correct format.\n")
-    else:
-        results.append("❌ Date Format Issues:")
-        for date, paragraph in kwargs['date_issues']:
-            results.append(f"- Incorrect date format '{date}' in: {paragraph}")
-    results.append("")
-    # Placeholder Check
-    results.append("## Placeholder Check")
-    if not kwargs['placeholder_issues']:
-        results.append("✅ No future references or placeholders found.\n")
-    else:
-        results.append("❌ Placeholders Found:")
-        for phrase, paragraph in kwargs['placeholder_issues']:
-            results.append(f"- Placeholder '{phrase}' in: {paragraph}")
-    return "\n".join(results)
-def process_file(file_obj, doc_type, template_type):
-    """Process the uploaded file and return results with error handling."""
-    if file_obj is None:
-        return "Please upload a document first."
-    try:
-        # Convert bytes to BytesIO object
-        doc_bytes = io.BytesIO(file_obj) if isinstance(file_obj, bytes) else io.BytesIO(file_obj.read())
-        # Process the document
-        results = process_document(doc_bytes, doc_type, template_type)
-        return results
-    except Exception as e:
-        error_message = f"""An error occurred while processing the document:
-Error: {str(e)}
-Please ensure:
-1. The file is a valid Word document (.docx)
-2. The file is not corrupted
-3. The file is not password protected
-Technical details: {str(e)}"""
-        print(f"Error processing file: {str(e)}")
-        return error_message
 # Create the Gradio interface
 demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
@@ -619,7 +1028,7 @@ with demo:
     document_types = [
         "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
-        "Federal Register Notice", "Handbook/Manual", "Order", "Policy Statement",
         "Rule", "Special Condition", "Technical Standard Order", "Other"
     ]
@@ -651,7 +1060,6 @@ with demo:
                 value="Results will appear here after processing..."
             )
-    # Update template type visibility based on document type
     def update_template_visibility(doc_type):
         return gr.update(visible=doc_type == "Advisory Circular")
@@ -661,12 +1069,12 @@ with demo:
         outputs=[template_type]
     )
-    # Process file when submit button is clicked
     submit_btn.click(
-        fn=process_file,
         inputs=[file_input, doc_type, template_type],
         outputs=[output]
     )
 # Launch the demo
-demo.launch()

 import gradio as gr
 import logging
 import re
+import json
+import time
+from typing import Dict, List, Any, Tuple, Optional
+from dataclasses import dataclass
+from functools import wraps
 from docx import Document
 import io
+import os
 import traceback
+@dataclass
+class DocumentCheckResult:
+    """Structured result for document checks."""
+    success: bool
+    issues: List[Dict[str, Any]]
+    details: Optional[Dict[str, Any]] = None
+def profile_performance(func):
+    """Decorator to profile function performance."""
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        # Get logger from the class instance (first argument)
+        logger = args[0].logger if hasattr(args[0], 'logger') else logging.getLogger(__name__)
+        logger.info(
+            f"Performance: {func.__name__} took {end_time - start_time:.4f} seconds"
+        )
+        return result
+    return wrapper
+class DocumentCheckerConfig:
+    """Configuration management for document checks."""
+    def __init__(self, config_path: Optional[str] = None):
+        """Initialize configuration with optional config file."""
+        self.config = self._load_config(config_path)
+        self.logger = self._setup_logger()
+    def _load_config(self, config_path: Optional[str] = None) -> Dict[str, Any]:
+        """Load configuration from JSON file or use default settings."""
+        default_config = {
+            "logging": {
+                "level": "INFO",
+                "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            },
+            "checks": {
+                "acronyms": True,
+                "terminology_check": True,
+                "headings": True
+            },
+            "document_types": {
+                "Advisory Circular": {
+                    "required_headings": [
+                        "Purpose.",
+                        "Applicability.",
+                        "Cancellation.",
+                        "Related Material.",
+                        "Definition of Key Terms."
+                    ],
+                    "skip_title_check": False
+                },
+                "Federal Register Notice": {
+                    "required_headings": [
+                        "Purpose of This Notice",
+                        "Audience",
+                        "Where can I Find This Notice"
+                    ],
+                    "skip_title_check": False
+                },
+                "Order": {
+                    "required_headings": [
+                        "Purpose of This Order.",
+                        "Audience.",
+                        "Where to Find This Order."
+                    ],
+                    "skip_title_check": False
+                },
+                "Policy Statement": {
+                    "required_headings": [
+                        "SUMMARY",
+                        "CURRENT REGULATORY AND ADVISORY MATERIAL",
+                        "RELEVANT PAST PRACTICE",
+                        "POLICY",
+                        "EFFECT OF POLICY",
+                        "CONCLUSION"
+                    ],
+                    "skip_title_check": False
+                },
+                "Technical Standard Order": {
+                    "required_headings": [
+                        "PURPOSE.",
+                        "APPLICABILITY.",
+                        "REQUIREMENTS.",
+                        "MARKING.",
+                        "APPLICATION DATA REQUIREMENTS.",
+                        "MANUFACTURER DATA REQUIREMENTS.",
+                        "FURNISHED DATA REQUIREMENTS.",
+                        "HOW TO GET REFERENCED DOCUMENTS."
+                    ],
+                    "skip_title_check": False
+                },
+                "Other": {
+                    "required_headings": [],
+                    "skip_title_check": True
+                }
+            }
+        }
+        if config_path and os.path.exists(config_path):
+            try:
+                with open(config_path, 'r') as f:
+                    user_config = json.load(f)
+                    self._deep_merge(default_config, user_config)
+            except (json.JSONDecodeError, IOError) as e:
+                logging.warning(f"Error loading config: {e}. Using default config.")
+        return default_config
+    def _deep_merge(self, base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
+        """Recursively merge two dictionaries."""
+        for key, value in update.items():
+            if isinstance(value, dict) and key in base and isinstance(base[key], dict):
+                self._deep_merge(base[key], value)
+            else:
+                base[key] = value
+        return base
+    def _setup_logger(self) -> logging.Logger:
+        """Set up and configure logging based on configuration."""
+        logger = logging.getLogger(__name__)
+        log_level = getattr(logging, self.config['logging']['level'].upper())
+        formatter = logging.Formatter(self.config['logging']['format'])
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(formatter)
+        console_handler.setLevel(log_level)
+        logger.addHandler(console_handler)
+        logger.setLevel(log_level)
+        return logger
+class DocumentChecker:
+    """Base class for document checking."""
+    def __init__(self, config_path: Optional[str] = None):
+        self.config_manager = DocumentCheckerConfig(config_path)
+        self.logger = self.config_manager.logger
+    @staticmethod
+    def validate_input(doc: List[str]) -> bool:
+        """Validate input document."""
+        return doc is not None and isinstance(doc, list) and len(doc) > 0
+    @classmethod
+    def extract_paragraphs(cls, doc_path: str) -> List[str]:
+        """Extract plain text paragraphs from a document."""
+        try:
+            doc = Document(doc_path)
+            return [para.text for para in doc.paragraphs if para.text.strip()]
+        except Exception as e:
+            logging.error(f"Error extracting paragraphs: {e}")
+            return []
+class FAADocumentChecker(DocumentChecker):
+    def __init__(self, config_path: Optional[str] = None):
+        super().__init__(config_path)
+    @profile_performance  # Use the decorator directly
+    def heading_title_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
+        """Check headings for a specific document type."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Use configuration for document-specific headings
+        checks = self.config_manager.config['document_types'].get(
+            doc_type, {}
+        )
+        required_headings = checks.get('required_headings', [])
+        headings_found = []
+        # Create a set for faster lookup
+        required_headings_set = set(required_headings)
+        for para in doc:
+            para_strip = para.strip()
+            # Check if the paragraph is in the required headings list
+            if para_strip in required_headings_set:
+                headings_found.append(para_strip)
+        # Check if all required headings are found
+        all_headings_present = set(headings_found) == required_headings_set
+        issues = []
+        if not all_headings_present:
+            missing_headings = required_headings_set - set(headings_found)
+            issues.append({'missing_headings': list(missing_headings)})
+        return DocumentCheckResult(
+            success=all_headings_present,
+            issues=issues,
+            details={
+                'found_headings': headings_found,
+                'required_headings': required_headings
+            }
+        )
+    @profile_performance
+    def heading_title_period_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
+        """
+        Check if headings end with periods according to document type requirements.
+        Args:
+            doc (List[str]): List of document paragraphs
+            doc_type (str): Type of document being checked
+        Returns:
+            DocumentCheckResult: Result of the heading period check
+        """
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Define document types requiring periods in headings
+        period_required = {
+            "Advisory Circular": True,
+            "Airworthiness Criteria": False,
+            "Deviation Memo": False,
+            "Exemption": False,
+            "Federal Register Notice": False,
+            "Order": True,
+            "Policy Statement": False,
+            "Rule": False,
+            "Special Condition": False,
+            "Technical Standard Order": True,
+            "Other": False
+        }
+        # Get whether periods are required for this document type
+        should_have_period = period_required.get(doc_type, False)
+        # Get the headings configuration for this document type
+        checks = self.config_manager.config['document_types'].get(doc_type, {})
+        required_headings = checks.get('required_headings', [])
+        required_headings_set = set(required_headings)
+        issues = []
+        checked_headings = []
+        for para in doc:
+            para_strip = para.strip()
+            # Check only if paragraph is a heading
+            if para_strip in required_headings_set:
+                ends_with_period = para_strip.endswith('.')
+                if should_have_period and not ends_with_period:
+                    issues.append({
+                        'heading': para_strip,
+                        'issue': 'missing_period',
+                        'message': f"Heading should end with a period: '{para_strip}'"
+                    })
+                    checked_headings.append({
+                        'heading': para_strip,
+                        'has_period': False,
+                        'needs_period': True
+                    })
+                elif not should_have_period and ends_with_period:
+                    issues.append({
+                        'heading': para_strip,
+                        'issue': 'unexpected_period',
+                        'message': f"Heading should not end with a period: '{para_strip}'"
+                    })
+                    checked_headings.append({
+                        'heading': para_strip,
+                        'has_period': True,
+                        'needs_period': False
+                    })
+                else:
+                    checked_headings.append({
+                        'heading': para_strip,
+                        'has_period': ends_with_period,
+                        'needs_period': should_have_period
+                    })
+        success = len(issues) == 0
+        return DocumentCheckResult(
+        success=success,
+        issues=issues,
+        details={
+            'document_type': doc_type,
+            'periods_required': should_have_period,
+            'checked_headings': checked_headings
+        }
+    )
+    @profile_performance
+    def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
+        """Check if acronyms are defined at their first use, only flagging the first instance of undefined acronyms."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        defined_acronyms = set()
+        first_occurrences = {}  # Track first occurrence of each acronym
+        undefined_acronyms = []
+        acronym_pattern = re.compile(r'\b[A-Z]{2,}\b')
+        defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
+        # Predefined acronyms
+        defined_acronyms.add("14 CFR")
+        for paragraph in doc:
+            # Check for definitions first
+            defined_matches = defined_pattern.findall(paragraph)
+            for full_term, acronym in defined_matches:
+                defined_acronyms.add(acronym)
+                # If this was previously marked as undefined, remove it since we found its definition
+                if acronym in first_occurrences:
+                    del first_occurrences[acronym]
+            # Check for acronyms in the paragraph
+            usage_matches = acronym_pattern.findall(paragraph)
+            for acronym in usage_matches:
+                if acronym not in defined_acronyms:
+                    # Only process if we haven't seen this acronym before
+                    if acronym not in first_occurrences:
+                        # Find the sentence containing the first undefined acronym
+                        sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+                        for sentence in sentences:
+                            if acronym in sentence:
+                                first_occurrences[acronym] = {
+                                    'acronym': acronym,
+                                    'sentence': sentence.strip()
+                                }
+                                break
+        # Convert first occurrences to list of issues
+        undefined_acronyms = list(first_occurrences.values())
+        success = len(undefined_acronyms) == 0
+        issues = undefined_acronyms if not success else []
+        return DocumentCheckResult(success=success, issues=issues)
+    @profile_performance
+    def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
+        """
+        Check document terminology for:
+        1. Legal reference formatting and preferred terms
+        2. Prohibited phrases and constructions
+        """
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Dictionary of terms that should be replaced with preferred alternatives
+        term_replacements = {
+            r'\bUSC\b': 'U.S.C.',
+            r'\bCFR Part\b': 'CFR part',
+            r'\bC\.F\.R\.\b': 'CFR',
+            r'\b14 CFR\s*§': '14 CFR',
+            r'\bWe\b': 'The FAA',
+            r'\bwe\b': 'the FAA',
+            r'\bcancelled\b': 'canceled',
+            r'\bshall\b': 'must',
+            r'\b\&\b': 'and',
+            r'\bflight crew\b': 'flightcrew'
         }
+        # Prohibited phrases that should be flagged
+        prohibited_phrases = [
+            r'\babove\b',
+            r'\bbelow\b',
+            r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b'  # Matches 'There is/are' at start of sentences
+        ]
+        issues = []
+        for paragraph in doc:
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                # Check for incorrect terms that need replacement
+                for incorrect_pattern, correct_term in term_replacements.items():
+                    matches = re.finditer(incorrect_pattern, sentence)
+                    for match in matches:
+                        incorrect_term = match.group()
+                        issues.append({
+                            'type': 'incorrect_term',
+                            'incorrect_term': incorrect_term,
+                            'correct_term': correct_term,
+                            'sentence': sentence.strip()
+                        })
+                # Check for prohibited phrases
+                for phrase_pattern in prohibited_phrases:
+                    match = re.search(phrase_pattern, sentence, re.IGNORECASE)
+                    if match:
+                        issues.append({
+                            'type': 'prohibited_phrase',
+                            'phrase': match.group().strip(),
+                            'sentence': sentence.strip()
+                        })
+        success = len(issues) == 0
+        return DocumentCheckResult(success=success, issues=issues)
+    @profile_performance
+    def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for various section symbol (§) usage issues."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        issues = []
+        # Patterns to identify issues
+        sentences_starting_with_section_symbol = []
+        incorrect_14_CFR_section_symbol_usage = []
+        single_section_symbol_multiple_sections = []
+        missing_section_symbol_in_multiple_sections = []
+        # Pattern to find '14 CFR §25.25'
+        pattern_14_CFR_section = re.compile(r'\b14 CFR §\s*\d+\.\d+\b')
+        # Patterns for multiple sections with single '§'
+        pattern_single_section_symbol_and = re.compile(r'§\s*\d+\.\d+\s+and\s+\d+\.\d+')
+        pattern_single_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+\d+\.\d+')
+        pattern_single_section_symbol_through = re.compile(r'§\s*\d+\.\d+\s+through\s+\d+\.\d+')
+        # Pattern for missing '§' before subsequent sections with 'or'
+        pattern_missing_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+')
+        for paragraph in doc:
+            # Check for sentences starting with '§'
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                if sentence.strip().startswith('§'):
+                    sentences_starting_with_section_symbol.append(sentence.strip())
+            # Check for '14 CFR §25.25' usage
+            matches_14_CFR = pattern_14_CFR_section.findall(paragraph)
+            for match in matches_14_CFR:
+                incorrect_14_CFR_section_symbol_usage.append(match)
+            # Check for single '§' with multiple sections using 'and'
+            matches_and = pattern_single_section_symbol_and.findall(paragraph)
+            for match in matches_and:
+                single_section_symbol_multiple_sections.append(match)
+            # Check for single '§' with multiple sections using 'or'
+            matches_or = pattern_single_section_symbol_or.findall(paragraph)
+            for match in matches_or:
+                single_section_symbol_multiple_sections.append(match)
+            # Check for single '§' with multiple sections using 'through'
+            matches_through = pattern_single_section_symbol_through.findall(paragraph)
+            for match in matches_through:
+                single_section_symbol_multiple_sections.append(match)
+            # Check for missing '§' before subsequent sections with 'or'
+            matches_missing_or = pattern_missing_section_symbol_or.findall(paragraph)
+            for match in matches_missing_or:
+                missing_section_symbol_in_multiple_sections.append(match)
+        if sentences_starting_with_section_symbol:
+            issues.append({
+                'issue': 'sentences_starting_with_section_symbol',
+                'sentences': sentences_starting_with_section_symbol
+            })
+        if incorrect_14_CFR_section_symbol_usage:
+            issues.append({
+                'issue': 'incorrect_14_CFR_section_symbol_usage',
+                'matches': incorrect_14_CFR_section_symbol_usage
+            })
+        if single_section_symbol_multiple_sections:
+            issues.append({
+                'issue': 'single_section_symbol_multiple_sections',
+                'matches': single_section_symbol_multiple_sections
+            })
+        if missing_section_symbol_in_multiple_sections:
+            issues.append({
+                'issue': 'missing_section_symbol_in_multiple_sections',
+                'matches': missing_section_symbol_in_multiple_sections
+            })
+        success = len(issues) == 0
+        return DocumentCheckResult(success=success, issues=issues)
+    @profile_performance
+    def caption_check(self, doc: List[str], doc_type: str, caption_type: str) -> DocumentCheckResult:
+        """Check for correctly formatted captions (Table or Figure)."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Determine the caption pattern based on document type
+        if doc_type in ["Advisory Circular", "Order"]:
+            caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+            correct_format = f"{caption_type} X-Y"
+        else:
+            caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+            correct_format = f"{caption_type} X"
+        incorrect_captions = []
+        in_toc = False
+        for paragraph in doc:
+            # Check for start or end of Table of Contents (TOC)
+            if "Table of Contents" in paragraph or "Contents" in paragraph:
+                in_toc = True
+                continue
+            elif in_toc and paragraph.strip() == "":
+                in_toc = False  # Assume blank line marks the end of TOC
+            # If within TOC, skip this paragraph
+            if in_toc:
+                continue
+            # Only check paragraphs that start with "Table" or "Figure" for proper caption format
+            paragraph_strip = paragraph.strip()
+            if paragraph_strip.lower().startswith(caption_type.lower()):
+                if not caption_pattern.match(paragraph_strip):
+                    incorrect_captions.append({
+                        'incorrect_caption': paragraph_strip,
+                        'correct_format': correct_format
+                    })
+        success = len(incorrect_captions) == 0
+        return DocumentCheckResult(success=success, issues=incorrect_captions)
+    @profile_performance
+    def table_figure_reference_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
+        """
+        Check for incorrect references to tables and figures in the document.
+        References should be lowercase within sentences and capitalized at sentence start.
+        """
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        incorrect_references = []
+        # Define patterns based on document type
+        if doc_type in ["Advisory Circular", "Order"]:
+            # Matches both capitalized and lowercase variations
+            table_pattern = r'\b[Tt]able\s+\d+-\d+\b'
+            figure_pattern = r'\b[Ff]igure\s+\d+-\d+\b'
+            correct_mid_table_format = "table X-Y"
+            correct_start_table_format = "Table X-Y"
+            correct_mid_figure_format = "figure X-Y"
+            correct_start_figure_format = "Figure X-Y"
+        else:
+            table_pattern = r'\b[Tt]able\s+\d+\b'
+            figure_pattern = r'\b[Ff]igure\s+\d+\b'
+            correct_mid_table_format = "table X"
+            correct_start_table_format = "Table X"
+            correct_mid_figure_format = "figure X"
+            correct_start_figure_format = "Figure X"
+        table_ref_pattern = re.compile(table_pattern)
+        figure_ref_pattern = re.compile(figure_pattern)
+        for paragraph in doc:
+            paragraph_strip = paragraph.strip()
+            # Exclude captions
+            starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
+            if not starts_with_table_or_figure:
+                # Split into sentences while preserving the original text
+                sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+                for sentence in sentences:
+                    sentence = sentence.strip()
+                    # Check table references
+                    matches = table_ref_pattern.finditer(sentence)
+                    for match in matches:
+                        ref = match.group()
+                        # Get the text before the reference
+                        text_before = sentence[:match.start()].strip()
+                        # Determine if reference is at start of sentence
+                        is_sentence_start = text_before == ""
+                        # Check if capitalization is correct
+                        if is_sentence_start and not ref.startswith('Table'):
+                            incorrect_references.append({
+                                'incorrect_ref': ref,
+                                'correct_format': correct_start_table_format,
+                                'sentence': sentence,
+                                'issue': "Table reference at sentence start should be capitalized"
+                            })
+                        elif not is_sentence_start and not ref.startswith('table'):
+                            incorrect_references.append({
+                                'incorrect_ref': ref,
+                                'correct_format': correct_mid_table_format,
+                                'sentence': sentence,
+                                'issue': "Table reference within sentence should be lowercase"
+                            })
+                    # Check figure references
+                    matches = figure_ref_pattern.finditer(sentence)
+                    for match in matches:
+                        ref = match.group()
+                        # Get the text before the reference
+                        text_before = sentence[:match.start()].strip()
+                        # Determine if reference is at start of sentence
+                        is_sentence_start = text_before == ""
+                        # Check if capitalization is correct
+                        if is_sentence_start and not ref.startswith('Figure'):
+                            incorrect_references.append({
+                                'incorrect_ref': ref,
+                                'correct_format': correct_start_figure_format,
+                                'sentence': sentence,
+                                'issue': "Figure reference at sentence start should be capitalized"
+                            })
+                        elif not is_sentence_start and not ref.startswith('figure'):
+                            incorrect_references.append({
+                                'incorrect_ref': ref,
+                                'correct_format': correct_mid_figure_format,
+                                'sentence': sentence,
+                                'issue': "Figure reference within sentence should be lowercase"
+                            })
+        success = len(incorrect_references) == 0
+        return DocumentCheckResult(success=success, issues=incorrect_references)
+    @profile_performance
+    def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
+        """Check for correct formatting of document titles."""
+        try:
+            doc = Document(doc_path)
+        except Exception as e:
+            self.logger.error(f"Error reading the document in title check: {e}")
+            return DocumentCheckResult(success=False, issues=[{'error': str(e)}])
+        incorrect_titles = []
+        # Define formatting rules for different document types
+        formatting_rules = {
+            "Advisory Circular": {"italics": True, "quotes": False},
+            "Airworthiness Criteria": {"italics": False, "quotes": True},
+            "Deviation Memo": {"italics": False, "quotes": True},
+            "Exemption": {"italics": False, "quotes": True},
+            "Federal Register Notice": {"italics": False, "quotes": True},
+            "Order": {"italics": False, "quotes": True},
+            "Policy Statement": {"italics": False, "quotes": False},
+            "Rule": {"italics": False, "quotes": True},
+            "Special Condition": {"italics": False, "quotes": True},
+            "Technical Standard Order": {"italics": False, "quotes": True},
+            "Other": {"italics": False, "quotes": False}
+        }
+        if doc_type not in formatting_rules:
+            self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
+            return DocumentCheckResult(success=True, issues=[])
+        required_format = formatting_rules[doc_type]
+        ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
+        for paragraph in doc.paragraphs:
+            text = paragraph.text
+            matches = ac_pattern.finditer(text)
+            for match in matches:
+                full_match = match.group(0)
+                title_text = match.group(2).strip()
+                # Get the position where the title starts
+                title_start = match.start(2)
+                title_end = match.end(2)
+                # Check for any type of quotation marks, including smart quotes
+                title_in_quotes = any(q in title_text for q in ['"', "'", '“', '”', '‘', '’'])
+                # Check the formatting of the title
+                title_is_italicized = False
+                current_pos = 0
+                for run in paragraph.runs:
+                    run_length = len(run.text)
+                    run_start = current_pos
+                    run_end = current_pos + run_length
+                    if run_start <= title_start < run_end:
+                        title_is_italicized = run.italic
+                        break
+                    current_pos += run_length
+                # Check if formatting matches the required format
+                formatting_incorrect = False
+                issue_message = []
+                # Check italics requirement
+                if required_format["italics"] and not title_is_italicized:
+                    formatting_incorrect = True
+                    issue_message.append("should be italicized")
+                elif not required_format["italics"] and title_is_italicized:
+                    formatting_incorrect = True
+                    issue_message.append("should not be italicized")
+                # Check quotes requirement
+                if required_format["quotes"] and not title_in_quotes:
+                    formatting_incorrect = True
+                    issue_message.append("should be in quotes")
+                elif not required_format["quotes"] and title_in_quotes:
+                    formatting_incorrect = True
+                    issue_message.append("should not be in quotes")
+                if formatting_incorrect:
+                    incorrect_titles.append({
+                        'text': title_text,
+                        'issue': ', '.join(issue_message),
+                        'sentence': text.strip()
+                    })
+        success = len(incorrect_titles) == 0
+        return DocumentCheckResult(success=success, issues=incorrect_titles)
+    @profile_performance
+    def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for sentences that end with two periods."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        incorrect_sentences = []
+        for paragraph in doc:
+            # Split the paragraph into sentences based on common sentence-ending punctuation
+            sentences = re.split(r'(?<=[.!?]) +', paragraph)
+            for sentence in sentences:
+                if sentence.endswith('..'):
+                    incorrect_sentences.append({'sentence': sentence.strip()})
+        success = len(incorrect_sentences) == 0
+        return DocumentCheckResult(success=success, issues=incorrect_sentences)
+    @profile_performance
+    def spacing_check(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for correct spacing in the document."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        incorrect_spacing = []
+        # Regex patterns to find incorrect spacing
+        patterns = [
+            (re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE), "Missing space between document type and number"),
+            (re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE), "Missing space after section symbol (§)"),
+            (re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE), "Missing space between 'Part' and number"),
+            (re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE), "Missing space before paragraph indication"),
+            (re.compile(r'\s{2,}'), "Double spaces between words")
+        ]
+        for paragraph in doc:
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                for pattern, issue in patterns:
+                    if pattern.search(sentence):
+                        incorrect_spacing.append({
+                            'issue_description': issue,
+                            'sentence': sentence.strip()
+                        })
+        success = len(incorrect_spacing) == 0
+        return DocumentCheckResult(success=success, issues=incorrect_spacing)
+    @profile_performance
+    def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for abbreviation consistency after first definition."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        abbreviations = {}
+        issues = []
+        for paragraph in doc:
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                # Find definitions like "Federal Aviation Administration (FAA)"
+                defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', sentence)
+                for full_term, acronym in defined_matches:
+                    if acronym not in abbreviations:
+                        abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
+                # Check for full term usage after definition
+                for acronym, data in abbreviations.items():
+                    full_term = data["full_term"]
+                    if full_term in sentence:
+                        # Ignore first usage where it's defined
+                        if data["defined"]:
+                            data["defined"] = False  # Mark it as now defined
+                        else:
+                            # Only flag subsequent occurrences
+                            issues.append({
+                                'full_term': full_term,
+                                'acronym': acronym,
+                                'sentence': sentence.strip()
+                            })
+        success = len(issues) == 0
+        return DocumentCheckResult(success=success, issues=issues)
+    @profile_performance
+    def check_date_formats(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for inconsistent date formats while ignoring aviation reference numbers."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        date_issues = []
+        # Patterns to ignore (aviation references)
+        ignore_patterns = [
+            r'\bAD \d{4}-\d{2}-\d{2}\b',  # Airworthiness Directive references
+            r'\bSWPM \d{2}-\d{2}-\d{2}\b',  # Standard Wiring Practices Manual references
+            r'\bAMM \d{2}-\d{2}-\d{2}\b',   # Aircraft Maintenance Manual references
+            r'\bSOPM \d{2}-\d{2}-\d{2}\b',  # Standard Operating Procedure references
+            r'\b[A-Z]{2,4} \d{2}-\d{2}-\d{2}\b'  # Generic manual reference pattern
+        ]
+        # Combine ignore patterns into one
+        ignore_regex = '|'.join(ignore_patterns)
+        ignore_pattern = re.compile(ignore_regex)
+        # Correct date pattern: 'Month Day, Year' e.g., 'January 1, 2020'
+        correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
+        # Incorrect date patterns
+        date_patterns = [
+            (re.compile(r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM/DD/YYYY'"),
+            (re.compile(r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM-DD-YYYY'"),
+            (re.compile(r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'YYYY-MM-DD'")
+        ]
+        for paragraph in doc:
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                # First, identify and temporarily remove text that should be ignored
+                ignored_matches = list(ignore_pattern.finditer(sentence))
+                working_sentence = sentence
+                # Replace ignored patterns with placeholders
+                for match in reversed(ignored_matches):
+                    start, end = match.span()
+                    working_sentence = working_sentence[:start] + 'X' * (end - start) + working_sentence[end:]
+                # Now check for date patterns in the modified sentence
+                for pattern, issue in date_patterns:
+                    matches = pattern.finditer(working_sentence)
+                    for match in matches:
+                        # Get the original text from the match position
+                        original_date = sentence[match.start():match.end()]
+                        date_issues.append({
+                            'date': original_date,
+                            'issue': issue,
+                            'sentence': sentence.strip()
+                        })
+        success = len(date_issues) == 0
+        return DocumentCheckResult(success=success, issues=date_issues)
+    @profile_performance
+    def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for placeholders that should be removed."""
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        placeholder_phrases = [
+            r'\bTBD\b',
+            r'\bTo be determined\b',
+            r'\bTo be added\b'
+        ]
+        issues = []
+        for paragraph in doc:
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+            for sentence in sentences:
+                for phrase in placeholder_phrases:
+                    match = re.search(phrase, sentence, re.IGNORECASE)
+                    if match:
+                        issues.append({
+                            'placeholder': match.group().strip(),
+                            'sentence': sentence.strip()
+                        })
+        success = len(issues) == 0
+        return DocumentCheckResult(success=success, issues=issues)
+    def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
+        """
+        Run all checks on the document.
+        Args:
+            doc_path (str): Path to the document.
+            doc_type (str): Type of the document.
+            template_type (str, optional): Template type, if applicable.
+        Returns:
+            Dict[str, DocumentCheckResult]: Dictionary of check names to results.
+        """
+        # Read the document
+        doc = self.extract_paragraphs(doc_path)
+        # Retrieve any specific flags
+        checks_config = self.config_manager.config['document_types'].get(doc_type, {})
+        skip_title_check = checks_config.get('skip_title_check', False)
+        # Run checks
+        results = {}
+        results['heading_title_check'] = self.heading_title_check(doc, doc_type)
+        results['heading_title_period_check'] = self.heading_title_period_check(doc, doc_type)
+        results['acronym_check'] = self.acronym_check(doc)
+        results['terminology_check'] = self.check_terminology(doc)
+        results['section_symbol_usage_check'] = self.check_section_symbol_usage(doc)
+        results['caption_check_table'] = self.caption_check(doc, doc_type, 'Table')
+        results['caption_check_figure'] = self.caption_check(doc, doc_type, 'Figure')
+        results['table_figure_reference_check'] = self.table_figure_reference_check(doc, doc_type)
+        if not skip_title_check:
+            results['document_title_check'] = self.document_title_check(doc_path, doc_type)
+        else:
+            results['document_title_check'] = DocumentCheckResult(success=True, issues=[])
+        results['double_period_check'] = self.double_period_check(doc)
+        results['spacing_check'] = self.spacing_check(doc)
+        results['abbreviation_usage_check'] = self.check_abbreviation_usage(doc)
+        results['date_formats_check'] = self.check_date_formats(doc)
+        results['placeholders_check'] = self.check_placeholders(doc)
         return results
+def process_document(file_obj, doc_type, template_type):
+    """Process the document and run all checks."""
+    try:
+        checker = FAADocumentChecker()
+        doc = Document(file_obj)
+        paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
+        # Run all checks
+        results = {}
+        results['heading_check'] = checker.heading_title_check(paragraphs, doc_type)
+        results['heading_period_check'] = checker.heading_title_period_check(paragraphs, doc_type)
+        results['acronym_check'] = checker.acronym_check(paragraphs)
+        results['terminology_check'] = checker.check_terminology(paragraphs)
+        results['section_symbol_check'] = checker.check_section_symbol_usage(paragraphs)
+        results['table_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Table')
+        results['figure_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Figure')
+        results['references_check'] = checker.table_figure_reference_check(paragraphs, doc_type)
+        results['title_check'] = checker.document_title_check(file_obj, doc_type)
+        results['double_period_check'] = checker.double_period_check(paragraphs)
+        results['spacing_check'] = checker.spacing_check(paragraphs)
+        results['abbreviation_check'] = checker.check_abbreviation_usage(paragraphs)
+        results['date_check'] = checker.check_date_formats(paragraphs)
+        results['placeholder_check'] = checker.check_placeholders(paragraphs)
+        return format_results_for_gradio(results, doc_type)
     except Exception as e:
         print(f"Error in process_document: {str(e)}")
         return f"An error occurred while processing the document: {str(e)}"
+def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
     """Format the results for display in Gradio."""
+    output = ["# Document Check Results\n"]
+    # Map check names to display titles
+    check_titles = {
+        'heading_check': "Required Headings Check",
+        'heading_period_check': "Heading Period Check",
+        'acronym_check': "Acronym Check",
+        'terminology_check': "Terminology Check",
+        'section_symbol_check': "Section Symbol Usage",
+        'table_caption_check': "Table Caption Format",
+        'figure_caption_check': "Figure Caption Format",
+        'references_check': "Table and Figure References",
+        'title_check': "Document Title Style",
+        'double_period_check': "Double Period Check",
+        'spacing_check': "Spacing Check",
+        'abbreviation_check': "Abbreviation Usage",
+        'date_check': "Date Format Check",
+        'placeholder_check': "Placeholder Check"
+    }
+    for check_name, result in results.items():
+        title = check_titles.get(check_name, check_name.replace('_', ' ').title())
+        output.append(f"## {title}")
+        if result.success:
+            output.append("✅ All checks passed.\n")
+        else:
+            output.append("❌ Issues found:")
+            for issue in result.issues:
+                if isinstance(issue, dict):
+                    for key, value in issue.items():
+                        if isinstance(value, list):
+                            for item in value:
+                                output.append(f"- {item}")
+                        else:
+                            output.append(f"- {key}: {value}")
+                else:
+                    output.append(f"- {issue}")
+            output.append("")
+        if result.details:
+            output.append("Additional Details:")
+            for key, value in result.details.items():
+                if isinstance(value, list):
+                    output.append(f"- {key}:")
+                    for item in value:
+                        output.append(f"  - {item}")
+                else:
+                    output.append(f"- {key}: {value}")
+            output.append("")
+    return "\n".join(output)
 # Create the Gradio interface
 demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
     document_types = [
         "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
+        "Federal Register Notice", "Order", "Policy Statement",
         "Rule", "Special Condition", "Technical Standard Order", "Other"
     ]
                 value="Results will appear here after processing..."
             )
     def update_template_visibility(doc_type):
         return gr.update(visible=doc_type == "Advisory Circular")
         outputs=[template_type]
     )
     submit_btn.click(
+        fn=process_document,
         inputs=[file_input, doc_type, template_type],
         outputs=[output]
     )
 # Launch the demo
+if __name__ == "__main__":
+    demo.launch()