Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

App Files Files Community

Hoctar77 commited on Oct 28, 2024

Commit

5e842ab

verified ·

1 Parent(s): 92e51bf

Create app.py

Browse files

Files changed (1) hide show

app.py +736 -0

app.py ADDED Viewed

	@@ -0,0 +1,736 @@

+import gradio as gr
+import logging
+import re
+from docx import Document
+import tempfile
+import os
+def setup_logging():
+    """Initialize logging configuration."""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s %(levelname)s %(name)s - %(message)s',
+        handlers=[logging.StreamHandler()]
+    )
+def read_word_document(doc_path):
+    """Read a Word document and return its content as a list of paragraphs."""
+    doc = Document(doc_path)
+    return [para.text for para in doc.paragraphs if para.text.strip() != ""]
+def heading_title_check(doc, required_headings):
+    """
+    Check if required headings are present in the document.
+    Args:
+        doc (list): List of paragraph texts from the document
+        required_headings (list): List of required heading titles
+    Returns:
+        tuple: (bool, list) - (True if all headings present, list of found headings)
+    """
+    headings_found = []
+    # Create a set of required headings for efficient lookup
+    required_headings_set = set(required_headings)
+    for para in doc:
+        para_strip = para.strip()
+        # Check if the paragraph is in the required headings list
+        if para_strip in required_headings_set:
+            headings_found.append(para_strip)
+    # Check if all required headings are found
+    all_headings_present = set(headings_found) == required_headings_set
+    return all_headings_present, headings_found
+def acronym_check(doc):
+    """Check if all acronyms are defined at first use and return undefined acronyms."""
+    defined_acronyms = set()  # Set to store defined acronyms
+    undefined_acronyms = set()  # Set to store undefined acronyms
+    acronym_pattern = re.compile(r'(\b[A-Z]{2,}\b)')  # Regex to find acronyms (2 or more uppercase letters)
+    defined_pattern = re.compile(r'(\b\w+\b) \((\b[A-Z]{2,}\b)\)')  # Regex to find definitions like "Federal Aviation Administration (FAA)"
+    for paragraph in doc:
+        # Check for defined acronyms
+        defined_matches = defined_pattern.findall(paragraph)
+        for full_term, acronym in defined_matches:
+            defined_acronyms.add(acronym)  # Add the acronym to the defined set
+        # Check for usage of acronyms
+        usage_matches = acronym_pattern.findall(paragraph)
+        for acronym in usage_matches:
+            if acronym not in defined_acronyms:
+                undefined_acronyms.add(acronym)  # Add to undefined acronyms if not defined
+    return len(undefined_acronyms) == 0, undefined_acronyms  # Return True if all acronyms are defined, along with undefined acronyms
+def legal_check(doc):
+    """Check for correct legal references in the document and suggest corrections.
+    Args:
+        doc (list): List of paragraphs/strings to check
+    Returns:
+        tuple: (bool, list) - (True if no errors found, list of (incorrect, correct) terms)
+    """
+    # Mapping of incorrect terms to their correct versions
+    incorrect_variations = {
+        r"\bUSC\b": "U.S.C.",
+        r"\bCFR Part\b": "CFR part",
+        r"\bC\.F\.R\.\b": "CFR",
+        r"\bWe\b": "The FAA",
+        r"\bwe\b": "the FAA",
+        r"\bcancelled\b": "canceled",
+        r"\bshall\b": "must or will",
+        r"\b&\b": "and"
+    }
+    # List to store tuples of incorrect terms and their correct versions
+    incorrect_legal_references = []
+    for paragraph in doc:
+        # Special handling for "Title 14" / "title 14"
+        title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
+        matches = re.finditer(title_14_pattern, paragraph)
+        for match in matches:
+            prefix = match.group('prefix')
+            current_title = match.group('title')
+            # If it follows a sentence-ending punctuation or is at start, it should be "Title 14"
+            if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
+                if current_title != "Title 14":
+                    incorrect_legal_references.append((current_title, "Title 14"))
+            # If it's within a sentence, it should be "title 14"
+            elif prefix.isspace() and current_title != "title 14":
+                incorrect_legal_references.append((current_title, "title 14"))
+        # Check other variations
+        for incorrect_pattern, correct_term in incorrect_variations.items():
+            matches = re.finditer(incorrect_pattern, paragraph)
+            for match in matches:
+                incorrect_legal_references.append((match.group(), correct_term))
+    return len(incorrect_legal_references) == 0, incorrect_legal_references
+def table_caption_check(doc, doc_type):
+    """
+    Check for correctly formatted table captions in the document.
+    Supports both numeric (Table 1-2) and alphanumeric (Table C-1) formats.
+    """
+    if doc_type in ["Advisory Circular", "Order"]:
+        # Pattern for "Table X-Y" where X and Y can be either letters or numbers
+        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    else:
+        # Pattern for "Table X" where X can be either letters or numbers
+        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    incorrect_captions = []
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        if paragraph_strip.lower().startswith("table"):
+            if not table_caption_pattern.match(paragraph_strip):
+                incorrect_captions.append(paragraph_strip)
+    return len(incorrect_captions) == 0, incorrect_captions
+def figure_caption_check(doc, doc_type):
+    """
+    Check for correctly formatted figure captions in the document.
+    Supports both numeric (Figure 1-2) and alphanumeric (Figure C-1) formats.
+    """
+    if doc_type in ["Advisory Circular", "Order"]:
+        # Pattern for "Figure X-Y" where X and Y can be either letters or numbers
+        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    else:
+        # Pattern for "Figure X" where X can be either letters or numbers
+        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    incorrect_fig_captions = []
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        if paragraph_strip.lower().startswith("figure"):
+            if not figure_caption_pattern.match(paragraph_strip):
+                incorrect_fig_captions.append(paragraph_strip)
+    return len(incorrect_fig_captions) == 0, incorrect_fig_captions
+def table_figure_reference_check(doc, doc_type):
+    """Check for incorrect references to tables and figures in the document."""
+    incorrect_table_figure_references = []
+    if doc_type in ["Advisory Circular", "Order"]:
+        # For Advisory Circulars and Orders, correct references are "Table X-Y" or "Figure X-Y"
+        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
+        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
+    else:
+        # For other document types, correct references are "Table X" or "Figure X"
+        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
+        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        # Exclude captions
+        starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
+        if not starts_with_table_or_figure:
+            # Find incorrect table references
+            incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
+            if incorrect_tables:
+                incorrect_table_figure_references.extend(incorrect_tables)
+            # Find incorrect figure references
+            incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
+            if incorrect_figures:
+                incorrect_table_figure_references.extend(incorrect_figures)
+    # Return False if any incorrect references are found
+    return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
+def document_title_check(doc_path, doc_type):
+    incorrect_titles = []
+    doc = Document(doc_path)
+    # Updated pattern to capture titles correctly
+    ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
+    # Define formatting rules for different document types
+    formatting_rules = {
+        "Advisory Circular": {"italics": True, "quotes": False},
+        "Airworthiness Criteria": {"italics": False, "quotes": True},
+        "Deviation Memo": {"italics": False, "quotes": True},
+        "Exemption": {"italics": False, "quotes": True},
+        "Federal Register Notice": {"italics": False, "quotes": True},
+        "Handbook/Manual": {"italics": False, "quotes": False},
+        "Order": {"italics": False, "quotes": True},
+        "Policy Statement": {"italics": False, "quotes": False},
+        "Rule": {"italics": False, "quotes": True},
+        "Special Condition": {"italics": False, "quotes": True},
+        "Technical Standard Order": {"italics": False, "quotes": True},
+        "Other": {"italics": False, "quotes": False}
+    }
+    # Get the rules for the current document type
+    if doc_type not in formatting_rules:
+        raise ValueError(f"Unsupported document type: {doc_type}")
+    required_format = formatting_rules[doc_type]
+    for paragraph in doc.paragraphs:
+        text = paragraph.text
+        matches = ac_pattern.finditer(text)
+        for match in matches:
+            full_match = match.group(0)
+            title_text = match.group(1).strip()
+            # Get the position where the title starts
+            title_start = match.start(1)
+            # Check for any type of quotation marks, including smart quotes
+            title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
+            # Check the formatting of the title
+            title_is_italicized = False
+            current_pos = 0
+            for run in paragraph.runs:
+                run_length = len(run.text)
+                if current_pos <= title_start < current_pos + run_length:
+                    relative_pos = title_start - current_pos
+                    title_is_italicized = run.italic
+                    break
+                current_pos += run_length
+            # Check if formatting matches the required format
+            formatting_incorrect = False
+            issue_message = []
+            # Check italics requirement
+            if required_format["italics"] and not title_is_italicized:
+                formatting_incorrect = True
+                issue_message.append("should be italicized")
+            elif not required_format["italics"] and title_is_italicized:
+                formatting_incorrect = True
+                issue_message.append("should not be italicized")
+            # Check quotes requirement
+            if required_format["quotes"] and not title_in_quotes:
+                formatting_incorrect = True
+                issue_message.append("should be in quotes")
+            elif not required_format["quotes"] and title_in_quotes:
+                formatting_incorrect = True
+                issue_message.append("should not be in quotes")
+            if formatting_incorrect:
+                incorrect_titles.append({
+                    'text': full_match,
+                    'issue': ', '.join(issue_message)
+                })
+    return len(incorrect_titles) == 0, incorrect_titles
+def get_document_checks(doc_type, template_type):
+    """Return expected outline and required headings based on document type and template type."""
+    document_checks = {
+        "Advisory Circular": {
+            "Short AC template AC": {
+                "required_headings": [
+                    "PURPOSE.",
+                    "APPLICABILITY.",
+                    "CANCELLATION.",
+                    "RELATED MATERIAL.",
+                    "DEFINITION OF KEY TERMS."
+                ]
+            },
+            "Long AC template AC": {
+                "required_headings": [
+                    "Purpose.",
+                    "Applicability.",
+                    "Cancellation.",
+                    "Related Material.",
+                    "Definition of Key Terms."
+                ]
+            }
+        },
+        "Airworthiness Criteria": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Deviation Memo": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Exemption": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Federal Register Notice": {
+            "required_headings": [
+                "Purpose of This Notice",
+                "Audience",
+                "Where can I Find This Notice"
+            ]
+        },
+        "Handbook/Manual": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Order": {
+            "required_headings": [
+                "Purpose of This Order.",
+                "Audience.",
+                "Where to Find This Order."
+            ]
+        },
+        "Policy Statement": {
+            "required_headings": [
+                "SUMMARY",
+                "CURRENT REGULATORY AND ADVISORY MATERIAL",
+                "RELEVANT PAST PRACTICE",
+                "POLICY",
+                "EFFECT OF POLICY",
+                "CONCLUSION"
+            ]
+        },
+        "Rule": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Special Condition": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Technical Standard Order": {
+            "required_headings": [
+                "PURPOSE.",
+                "APPLICABILITY.",
+                "REQUIREMENTS.",
+                "MARKING.",
+                "APPLICATION DATA REQUIREMENTS.",
+                "MANUFACTURER DATA REQUIREMENTS.",
+                "FURNISHED DATA REQUIREMENTS.",
+                "HOW TO GET REFERENCED DOCUMENTS."
+            ]
+        },
+        "Other": {
+            "required_headings": [
+                "N/A"
+            ]
+        }
+    }
+    # Add debugging logs
+    logger = logging.getLogger(__name__)
+    logger.info(f"Requested document type: {doc_type}")
+    logger.info(f"Requested template type: {template_type}")
+    if doc_type == "Advisory Circular":
+        checks = document_checks.get(doc_type, {}).get(template_type, {})
+    else:
+        checks = document_checks.get(doc_type, {})
+    logger.info(f"Retrieved checks: {checks}")
+    return checks
+def double_period_check(doc):
+    """Check for sentences that end with two periods."""
+    incorrect_sentences = []
+    for paragraph in doc:
+        # Split the paragraph into sentences based on common sentence-ending punctuation
+        sentences = re.split(r'(?<=[.!?]) +', paragraph)
+        for sentence in sentences:
+            if sentence.endswith('..'):
+                incorrect_sentences.append(sentence.strip())  # Log the incorrectly formatted sentence
+    return len(incorrect_sentences) == 0, incorrect_sentences  # Return True if no double periods are found, along with any incorrect sentences
+def spacing_check(doc):
+    """
+    Check for correct spacing in US federal regulatory documents.
+    Checks for:
+    - Spacing between document type and number (e.g., "AC 20-114")
+    - Spacing around section symbols (e.g., "§ 25.301")
+    - Spacing around part numbers (e.g., "Part 25")
+    - Spacing around paragraph indications (e.g., "(a)", "(1)")
+    - Double spaces between words
+    """
+    incorrect_spacing = []
+    # Regex patterns to find incorrect spacing
+    doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
+    section_symbol_pattern = re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE)
+    part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
+    paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
+    double_space_pattern = re.compile(r'\s{2,}')
+    for paragraph in doc:
+        # Check for incorrect document type spacing
+        if doc_type_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect section symbol spacing
+        if section_symbol_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect part number spacing
+        if part_number_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect paragraph indication spacing
+        if paragraph_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for double spaces
+        if double_space_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+    return len(incorrect_spacing) == 0, incorrect_spacing
+def check_prohibited_phrases(doc):
+    """Check for prohibited words or phrases."""
+    prohibited_phrases = [
+        r'\babove\b',
+        r'\bbelow\b',
+        r'\bthere is\b',
+        r'\bthere are\b'
+    ]
+    issues = []
+    for paragraph in doc:
+        for phrase in prohibited_phrases:
+            if re.search(phrase, paragraph, re.IGNORECASE):
+                issues.append((phrase.strip(r'\b'), paragraph.strip()))
+    return issues
+def check_abbreviation_usage(doc):
+    """Check for abbreviation consistency after first definition."""
+    abbreviations = {}
+    issues = []
+    for paragraph in doc:
+        # Find definitions like "Federal Aviation Administration (FAA)"
+        defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
+        for full_term, acronym in defined_matches:
+            if acronym not in abbreviations:
+                abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
+        # Check for full term usage after definition
+        for acronym, data in abbreviations.items():
+            full_term = data["full_term"]
+            if full_term in paragraph:
+                # Ignore first usage where it's defined
+                if data["defined"]:
+                    data["defined"] = False  # Mark it as now defined
+                else:
+                    # Only flag subsequent occurrences
+                    issues.append((full_term, acronym, paragraph.strip()))
+    return issues
+def check_date_formats(doc):
+    """Check for inconsistent date formats."""
+    date_issues = []
+    correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
+    date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b')  # MM/DD/YYYY
+    for paragraph in doc:
+        if date_pattern.search(paragraph):
+            dates = date_pattern.findall(paragraph)
+            for date in dates:
+                if not correct_date_pattern.match(date):
+                    date_issues.append((date, paragraph.strip()))
+    return date_issues
+def check_placeholders(doc):
+    """Check for placeholders that should be removed."""
+    placeholder_phrases = [
+        r'\bTBD\b',
+        r'\bTo be determined\b',
+        r'\bTo be added\b'
+    ]
+    issues = []
+    for paragraph in doc:
+        for phrase in placeholder_phrases:
+            if re.search(phrase, paragraph, re.IGNORECASE):
+                issues.append((phrase.strip(r'\b'), paragraph.strip()))
+    return issues
+def format_results_for_gradio(heading_valid, headings_found, acronyms_valid, undefined_acronyms,
+                            legal_valid, incorrect_legal_references, table_valid, incorrect_captions,
+                            figure_valid, incorrect_fig_captions, references_valid, incorrect_table_figure_references,
+                            title_style_valid, incorrect_titles, required_headings, doc_type, double_period_valid,
+                            incorrect_sentences, spacing_valid, incorrect_spacing, abbreviation_issues, date_issues, placeholder_issues):
+    """Format the results for Gradio display."""
+    results = []
+    results.append("# Document Check Results\n")
+    # Required Headings Check
+    results.append("## Required Headings Check")
+    if heading_valid:
+        results.append("✅ All required headings are present.\n")
+    else:
+        missing_headings = set(required_headings) - set(headings_found)
+        results.append("❌ Missing Required Headings:")
+        for heading in missing_headings:
+            results.append(f"- {heading}")
+    results.append("")
+    # Acronym Check
+    results.append("## Acronym Check")
+    if acronyms_valid:
+        results.append("✅ All acronyms are properly defined.\n")
+    else:
+        results.append(f"❌ The following acronyms need to be defined at first use: {', '.join(undefined_acronyms)}\n")
+    # Legal Check
+    results.append("## Legal Terminology Check")
+    if legal_valid:
+        results.append("✅ All legal references are properly formatted.\n")
+    else:
+        results.append("❌ Incorrect Legal Terminology:")
+        for incorrect_term, correct_term in incorrect_legal_references:
+            results.append(f"- Use '{correct_term}' instead of '{incorrect_term}'")
+    results.append("")
+    # Table Caption Check
+    results.append("## Table Caption Check")
+    if table_valid:
+        results.append("✅ All table captions are correctly formatted.\n")
+    else:
+        results.append("❌ Incorrect Table Captions:")
+        for caption in incorrect_captions:
+            results.append(f"- {caption}")
+    results.append("")
+    # Figure Caption Check
+    results.append("## Figure Caption Check")
+    if figure_valid:
+        results.append("✅ All figure captions are correctly formatted.\n")
+    else:
+        results.append("❌ Incorrect Figure Captions:")
+        for caption in incorrect_fig_captions:
+            results.append(f"- {caption}")
+    results.append("")
+    # Table and Figure References Check
+    results.append("## Table and Figure References Check")
+    if references_valid:
+        results.append("✅ All table and figure references are correctly formatted.\n")
+    else:
+        results.append("❌ Incorrect Table/Figure References:")
+        for ref in incorrect_table_figure_references:
+            results.append(f"- {ref}")
+    results.append("")
+    # Document Title Style Check
+    results.append("## Document Title Style Check")
+    if title_style_valid:
+        results.append("✅ All document title references are properly styled.\n")
+    else:
+        results.append("❌ Incorrect Document Title Styling:")
+        for title in incorrect_titles:
+            results.append(f"- {title['text']}")
+            results.append(f"  - Issue: {title['issue']}")
+        # Add formatting guidance
+        formatting_notes = {
+            "Advisory Circular": "Document titles should be italicized, not in quotation marks.",
+            "Order": "Document titles should be in quotation marks, not italicized.",
+            "Federal Notice": "Document titles should be in quotation marks, not italicized.",
+            "Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
+        }
+        if doc_type in formatting_notes:
+            results.append(f"\nNote: {formatting_notes[doc_type]}")
+        else:
+            results.append("\nNote: Please verify the correct formatting style for this document type.")
+    results.append("")
+    # Double Period Check
+    results.append("## Double Period Check")
+    if double_period_valid:
+        results.append("✅ No double periods found.\n")
+    else:
+        results.append("❌ Sentences found with double periods:")
+        for sentence in incorrect_sentences:
+            results.append(f"- {sentence}")
+    results.append("")
+    # Spacing Check
+    results.append("## Spacing Check")
+    if spacing_valid:
+        results.append("✅ All spacing is correct.\n")
+    else:
+        results.append("❌ Incorrect spacing found in:")
+        for spacing in incorrect_spacing:
+            results.append(f"- {spacing}")
+    results.append("")
+    # Abbreviation Consistency
+    results.append("## Abbreviation Consistency")
+    if not abbreviation_issues:
+        results.append("✅ All abbreviations are used consistently after definition.\n")
+    else:
+        results.append("❌ Abbreviation Issues:")
+        for full_term, acronym, paragraph in abbreviation_issues:
+            results.append(f"- Use '{acronym}' instead of '{full_term}' in: {paragraph}")
+    results.append("")
+    # Date Format Consistency
+    results.append("## Date Format Consistency")
+    if not date_issues:
+        results.append("✅ All dates are in the correct format.\n")
+    else:
+        results.append("❌ Date Format Issues:")
+        for date, paragraph in date_issues:
+            results.append(f"- Incorrect date format '{date}' in: {paragraph}")
+    results.append("")
+    # Placeholder Check
+    results.append("## Placeholder Check")
+    if not placeholder_issues:
+        results.append("✅ No placeholders found.\n")
+    else:
+        results.append("❌ Placeholders Found:")
+        for phrase, paragraph in placeholder_issues:
+            results.append(f"- Placeholder '{phrase}' in: {paragraph}")
+    return "\n".join(results)
+# Modify the process_document function to return formatted results instead of writing to file
+def process_document(file_obj, doc_type, template_type):
+    """Process the document and perform checks based on document type and template type."""
+    logger = logging.getLogger(__name__)
+    # Create a temporary file to save the uploaded file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp_file:
+        tmp_file.write(file_obj.read())
+        tmp_path = tmp_file.name
+    try:
+        # Step 1: Read the Word document
+        doc = read_word_document(tmp_path)
+        logger.info("Document read successfully.")
+        # Get required headings for document type and template type
+        checks = get_document_checks(doc_type, template_type)
+        required_headings = checks.get("required_headings", [])
+        # Step 2: Perform all checks
+        heading_valid, headings_found = heading_title_check(doc, required_headings)
+        acronyms_valid, undefined_acronyms = acronym_check(doc)
+        legal_valid, incorrect_legal_references = legal_check(doc)
+        table_valid, incorrect_captions = table_caption_check(doc, doc_type)
+        figure_valid, incorrect_fig_captions = figure_caption_check(doc, doc_type)
+        references_valid, incorrect_table_figure_references = table_figure_reference_check(doc, doc_type)
+        title_style_valid, incorrect_titles = document_title_check(tmp_path, doc_type)
+        double_period_valid, incorrect_sentences = double_period_check(doc)
+        spacing_valid, incorrect_spacing = spacing_check(doc)
+        abbreviation_issues = check_abbreviation_usage(doc)
+        date_issues = check_date_formats(doc)
+        placeholder_issues = check_placeholders(doc)
+        # Format results for Gradio
+        results = format_results_for_gradio(
+            heading_valid, headings_found,
+            acronyms_valid, undefined_acronyms,
+            legal_valid, incorrect_legal_references,
+            table_valid, incorrect_captions,
+            figure_valid, incorrect_fig_captions,
+            references_valid, incorrect_table_figure_references,
+            title_style_valid, incorrect_titles,
+            required_headings, doc_type,
+            double_period_valid, incorrect_sentences,
+            spacing_valid, incorrect_spacing,
+            abbreviation_issues, date_issues,
+            placeholder_issues
+        )
+        return results
+    finally:
+        # Clean up the temporary file
+        os.unlink(tmp_path)
+# Create the Gradio interface
+def create_gradio_interface():
+    document_types = [
+        "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
+        "Federal Register Notice", "Handbook/Manual", "Order", "Policy Statement",
+        "Rule", "Special Condition", "Technical Standard Order", "Other"
+    ]
+    template_types = ["Short AC template AC", "Long AC template AC"]
+    def process_file(file_obj, doc_type, template_type):
+        if doc_type != "Advisory Circular":
+            template_type = "N/A"
+        return process_document(file_obj, doc_type, template_type)
+    # Create the interface
+    iface = gr.Interface(
+        fn=process_file,
+        inputs=[
+            gr.File(label="Upload Word Document (.docx)", type="binary"),
+            gr.Dropdown(choices=document_types, label="Document Type"),
+            gr.Radio(choices=template_types, label="Template Type (Only for Advisory Circular)", visible=True)
+        ],
+        outputs=gr.Markdown(label="Check Results"),
+        title="FAA Document Checker",
+        description="Upload a Word document to check for compliance with FAA documentation standards.",
+        article="This tool checks document formatting, headings, acronyms, legal references, and more.",
+        theme="default"
+    )
+    return iface
+# Launch the Gradio interface
+if __name__ == "__main__":
+    setup_logging()
+    iface = create_gradio_interface()
+    iface.launch()