Spaces:

Hoctar77
/

DocumentCheckerTool

Running

App Files Files Community

Hoctar77 commited on Oct 29, 2024

Commit

8c95735

verified ·

1 Parent(s): f4450e4

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -397

app.py CHANGED Viewed

@@ -3,19 +3,7 @@ import logging
 import re
 from docx import Document
 import io
-def setup_logging():
-    """Initialize logging configuration."""
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(levelname)s %(name)s - %(message)s',
-        handlers=[logging.StreamHandler()]
-    )
-def read_word_document(doc_path):
-    """Read a Word document and return its content as a list of paragraphs."""
-    doc = Document(doc_path)
-    return [para.text for para in doc.paragraphs if para.text.strip() != ""]
 def heading_title_check(doc, required_headings):
     """Check if all required headings are present."""
@@ -64,63 +52,47 @@ def acronym_check(doc):
     return len(undefined_acronyms) == 0, list(undefined_acronyms)
 def legal_check(doc):
-    """Check for correct legal references in the document and suggest corrections.
-    Args:
-        doc (list): List of paragraphs/strings to check
-    Returns:
-        tuple: (bool, list) - (True if no errors found, list of (incorrect, correct) terms)
-    """
-    # Mapping of incorrect terms to their correct versions
-    incorrect_variations = {
-        r"\bUSC\b": "U.S.C.",
-        r"\bCFR Part\b": "CFR part",
-        r"\bC\.F\.R\.\b": "CFR",
-        r"\bWe\b": "The FAA",
-        r"\bwe\b": "the FAA",
-        r"\bcancelled\b": "canceled",
-        r"\bshall\b": "must or will",
-        r"\b&\b": "and"
-    }
-    # List to store tuples of incorrect terms and their correct versions
     incorrect_legal_references = []
-    for paragraph in doc:
-        # Special handling for "Title 14" / "title 14"
-        title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
-        matches = re.finditer(title_14_pattern, paragraph)
-        for match in matches:
-            prefix = match.group('prefix')
-            current_title = match.group('title')
-            # If it follows a sentence-ending punctuation or is at start, it should be "Title 14"
-            if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
-                if current_title != "Title 14":
-                    incorrect_legal_references.append((current_title, "Title 14"))
-            # If it's within a sentence, it should be "title 14"
-            elif prefix.isspace() and current_title != "title 14":
-                incorrect_legal_references.append((current_title, "title 14"))
-        # Check other variations
-        for incorrect_pattern, correct_term in incorrect_variations.items():
-            matches = re.finditer(incorrect_pattern, paragraph)
-            for match in matches:
-                incorrect_legal_references.append((match.group(), correct_term))
     return len(incorrect_legal_references) == 0, incorrect_legal_references
 def table_caption_check(doc, doc_type):
     """Check if table captions are formatted correctly."""
     incorrect_captions = []
     try:
         # Check table captions
         for table in doc.tables:
             # Get the paragraph before the table
-            table._element.getprevious()
-            # Add your caption checking logic here
     except Exception as e:
         print(f"Error in table caption check: {str(e)}")
         return False, []
@@ -128,373 +100,198 @@ def table_caption_check(doc, doc_type):
     return len(incorrect_captions) == 0, incorrect_captions
 def figure_caption_check(doc, doc_type):
-    """
-    Check for correctly formatted figure captions in the document.
-    Supports both numeric (Figure 1-2) and alphanumeric (Figure C-1) formats.
-    """
-    if doc_type in ["Advisory Circular", "Order"]:
-        # Pattern for "Figure X-Y" where X and Y can be either letters or numbers
-        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
-    else:
-        # Pattern for "Figure X" where X can be either letters or numbers
-        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
     incorrect_fig_captions = []
-    for paragraph in doc:
-        paragraph_strip = paragraph.strip()
-        if paragraph_strip.lower().startswith("figure"):
-            if not figure_caption_pattern.match(paragraph_strip):
-                incorrect_fig_captions.append(paragraph_strip)
     return len(incorrect_fig_captions) == 0, incorrect_fig_captions
 def table_figure_reference_check(doc, doc_type):
-    """Check for incorrect references to tables and figures in the document."""
     incorrect_table_figure_references = []
-    if doc_type in ["Advisory Circular", "Order"]:
-        # For Advisory Circulars and Orders, correct references are "Table X-Y" or "Figure X-Y"
-        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
-        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
-    else:
-        # For other document types, correct references are "Table X" or "Figure X"
-        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
-        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
-    for paragraph in doc:
-        paragraph_strip = paragraph.strip()
-        # Exclude captions
-        starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
-        if not starts_with_table_or_figure:
-            # Find incorrect table references
-            incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
-            if incorrect_tables:
-                incorrect_table_figure_references.extend(incorrect_tables)
-            # Find incorrect figure references
-            incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
-            if incorrect_figures:
-                incorrect_table_figure_references.extend(incorrect_figures)
-    # Return False if any incorrect references are found
     return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
-def document_title_check(doc_path, doc_type):
     incorrect_titles = []
-    doc = Document(doc_path)
-    # Updated pattern to capture titles correctly
-    ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
-    # Define formatting rules for different document types
-    formatting_rules = {
-        "Advisory Circular": {"italics": True, "quotes": False},
-        "Airworthiness Criteria": {"italics": False, "quotes": True},
-        "Deviation Memo": {"italics": False, "quotes": True},
-        "Exemption": {"italics": False, "quotes": True},
-        "Federal Register Notice": {"italics": False, "quotes": True},
-        "Handbook/Manual": {"italics": False, "quotes": False},
-        "Order": {"italics": False, "quotes": True},
-        "Policy Statement": {"italics": False, "quotes": False},
-        "Rule": {"italics": False, "quotes": True},
-        "Special Condition": {"italics": False, "quotes": True},
-        "Technical Standard Order": {"italics": False, "quotes": True},
-        "Other": {"italics": False, "quotes": False}
-    }
-    # Get the rules for the current document type
-    if doc_type not in formatting_rules:
-        raise ValueError(f"Unsupported document type: {doc_type}")
-    required_format = formatting_rules[doc_type]
-    for paragraph in doc.paragraphs:
-        text = paragraph.text
-        matches = ac_pattern.finditer(text)
-        for match in matches:
-            full_match = match.group(0)
-            title_text = match.group(1).strip()
-            # Get the position where the title starts
-            title_start = match.start(1)
-            # Check for any type of quotation marks, including smart quotes
-            title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
-            # Check the formatting of the title
-            title_is_italicized = False
-            current_pos = 0
-            for run in paragraph.runs:
-                run_length = len(run.text)
-                if current_pos <= title_start < current_pos + run_length:
-                    relative_pos = title_start - current_pos
-                    title_is_italicized = run.italic
-                    break
-                current_pos += run_length
-            # Check if formatting matches the required format
-            formatting_incorrect = False
-            issue_message = []
-            # Check italics requirement
-            if required_format["italics"] and not title_is_italicized:
-                formatting_incorrect = True
-                issue_message.append("should be italicized")
-            elif not required_format["italics"] and title_is_italicized:
-                formatting_incorrect = True
-                issue_message.append("should not be italicized")
-            # Check quotes requirement
-            if required_format["quotes"] and not title_in_quotes:
-                formatting_incorrect = True
-                issue_message.append("should be in quotes")
-            elif not required_format["quotes"] and title_in_quotes:
-                formatting_incorrect = True
-                issue_message.append("should not be in quotes")
-            if formatting_incorrect:
-                incorrect_titles.append({
-                    'text': full_match,
-                    'issue': ', '.join(issue_message)
-                })
     return len(incorrect_titles) == 0, incorrect_titles
-def get_document_checks(doc_type, template_type):
-    """Return expected outline and required headings based on document type and template type."""
-    document_checks = {
-        "Advisory Circular": {
-            "Short AC template AC": {
-                "required_headings": [
-                    "PURPOSE.",
-                    "APPLICABILITY.",
-                    "CANCELLATION.",
-                    "RELATED MATERIAL.",
-                    "DEFINITION OF KEY TERMS."
-                ]
-            },
-            "Long AC template AC": {
-                "required_headings": [
-                    "Purpose.",
-                    "Applicability.",
-                    "Cancellation.",
-                    "Related Material.",
-                    "Definition of Key Terms."
-                ]
-            }
-        },
-        "Airworthiness Criteria": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Deviation Memo": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Exemption": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Federal Register Notice": {
-            "required_headings": [
-                "Purpose of This Notice",
-                "Audience",
-                "Where can I Find This Notice"
-            ]
-        },
-        "Handbook/Manual": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Order": {
-            "required_headings": [
-                "Purpose of This Order.",
-                "Audience.",
-                "Where to Find This Order."
-            ]
-        },
-        "Policy Statement": {
-            "required_headings": [
-                "SUMMARY",
-                "CURRENT REGULATORY AND ADVISORY MATERIAL",
-                "RELEVANT PAST PRACTICE",
-                "POLICY",
-                "EFFECT OF POLICY",
-                "CONCLUSION"
-            ]
-        },
-        "Rule": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Special Condition": {
-            "required_headings": [
-                "TBD - Need to research"
-            ]
-        },
-        "Technical Standard Order": {
-            "required_headings": [
-                "PURPOSE.",
-                "APPLICABILITY.",
-                "REQUIREMENTS.",
-                "MARKING.",
-                "APPLICATION DATA REQUIREMENTS.",
-                "MANUFACTURER DATA REQUIREMENTS.",
-                "FURNISHED DATA REQUIREMENTS.",
-                "HOW TO GET REFERENCED DOCUMENTS."
-            ]
-        },
-        "Other": {
-            "required_headings": [
-                "N/A"
-            ]
-        }
-    }
-    # Add debugging logs
-    logger = logging.getLogger(__name__)
-    logger.info(f"Requested document type: {doc_type}")
-    logger.info(f"Requested template type: {template_type}")
-    if doc_type == "Advisory Circular":
-        checks = document_checks.get(doc_type, {}).get(template_type, {})
-    else:
-        checks = document_checks.get(doc_type, {})
-    logger.info(f"Retrieved checks: {checks}")
-    return checks
 def double_period_check(doc):
-    """Check for sentences that end with two periods."""
     incorrect_sentences = []
-    for paragraph in doc:
-        # Split the paragraph into sentences based on common sentence-ending punctuation
-        sentences = re.split(r'(?<=[.!?]) +', paragraph)
-        for sentence in sentences:
-            if sentence.endswith('..'):
-                incorrect_sentences.append(sentence.strip())  # Log the incorrectly formatted sentence
-    return len(incorrect_sentences) == 0, incorrect_sentences  # Return True if no double periods are found, along with any incorrect sentences
 def spacing_check(doc):
-    """
-    Check for correct spacing in US federal regulatory documents.
-    Checks for:
-    - Spacing between document type and number (e.g., "AC 20-114")
-    - Spacing around section symbols (e.g., "§ 25.301")
-    - Spacing around part numbers (e.g., "Part 25")
-    - Spacing around paragraph indications (e.g., "(a)", "(1)")
-    - Double spaces between words
-    """
     incorrect_spacing = []
-    # Regex patterns to find incorrect spacing
-    doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
-    section_symbol_pattern = re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE)
-    part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
-    paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
-    double_space_pattern = re.compile(r'\s{2,}')
-    for paragraph in doc:
-        # Check for incorrect document type spacing
-        if doc_type_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
-        # Check for incorrect section symbol spacing
-        if section_symbol_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
-        # Check for incorrect part number spacing
-        if part_number_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
-        # Check for incorrect paragraph indication spacing
-        if paragraph_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
-        # Check for double spaces
-        if double_space_pattern.search(paragraph):
-            incorrect_spacing.append(paragraph)
     return len(incorrect_spacing) == 0, incorrect_spacing
-def check_prohibited_phrases(doc):
-    """Check for prohibited words or phrases."""
-    prohibited_phrases = [
-        r'\babove\b',
-        r'\bbelow\b',
-        r'\bthere is\b',
-        r'\bthere are\b'
-    ]
-    issues = []
-    for paragraph in doc:
-        for phrase in prohibited_phrases:
-            if re.search(phrase, paragraph, re.IGNORECASE):
-                issues.append((phrase.strip(r'\b'), paragraph.strip()))
-    return issues
 def check_abbreviation_usage(doc):
-    """Check for abbreviation consistency after first definition."""
-    abbreviations = {}
-    issues = []
-    for paragraph in doc:
-        # Find definitions like "Federal Aviation Administration (FAA)"
-        defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
-        for full_term, acronym in defined_matches:
-            if acronym not in abbreviations:
-                abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
-        # Check for full term usage after definition
-        for acronym, data in abbreviations.items():
-            full_term = data["full_term"]
-            if full_term in paragraph:
-                # Ignore first usage where it's defined
-                if data["defined"]:
-                    data["defined"] = False  # Mark it as now defined
-                else:
-                    # Only flag subsequent occurrences
-                    issues.append((full_term, acronym, paragraph.strip()))
-    return issues
 def check_date_formats(doc):
-    """Check for inconsistent date formats."""
     date_issues = []
-    correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
-    date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b')  # MM/DD/YYYY
-    for paragraph in doc:
-        if date_pattern.search(paragraph):
-            dates = date_pattern.findall(paragraph)
-            for date in dates:
-                if not correct_date_pattern.match(date):
-                    date_issues.append((date, paragraph.strip()))
     return date_issues
 def check_placeholders(doc):
-    """Check for placeholders that should be removed."""
-    placeholder_phrases = [
-        r'\bTBD\b',
-        r'\bTo be determined\b',
-        r'\bTo be added\b'
-    ]
-    issues = []
-    for paragraph in doc:
-        for phrase in placeholder_phrases:
-            if re.search(phrase, paragraph, re.IGNORECASE):
-                issues.append((phrase.strip(r'\b'), paragraph.strip()))
-    return issues
-def format_results_for_gradio(heading_valid, headings_found, acronyms_valid, undefined_acronyms,
-                            legal_valid, incorrect_legal_references, table_valid, incorrect_captions,
-                            figure_valid, incorrect_fig_captions, references_valid, incorrect_table_figure_references,
-                            title_style_valid, incorrect_titles, required_headings, doc_type, double_period_valid,
-                            incorrect_sentences, spacing_valid, incorrect_spacing, abbreviation_issues, date_issues, placeholder_issues):
     """Format the results for Gradio display."""
     results = []
     results.append("# Document Check Results\n")
@@ -515,7 +312,10 @@ def format_results_for_gradio(heading_valid, headings_found, acronyms_valid, und
     if acronyms_valid:
         results.append("✅ All acronyms are properly defined.\n")
     else:
-        results.append(f"❌ The following acronyms need to be defined at first use: {', '.join(undefined_acronyms)}\n")
     # Legal Check
     results.append("## Legal Terminology Check")
@@ -571,7 +371,7 @@ def format_results_for_gradio(heading_valid, headings_found, acronyms_valid, und
         formatting_notes = {
             "Advisory Circular": "Document titles should be italicized, not in quotation marks.",
             "Order": "Document titles should be in quotation marks, not italicized.",
-            "Federal Notice": "Document titles should be in quotation marks, not italicized.",
             "Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
         }

 import re
 from docx import Document
 import io
+import traceback
 def heading_title_check(doc, required_headings):
     """Check if all required headings are present."""
     return len(undefined_acronyms) == 0, list(undefined_acronyms)
 def legal_check(doc):
+    """Check if legal terminology is used correctly."""
     incorrect_legal_references = []
+    try:
+        # Define legal terminology mapping
+        legal_terms = {
+            "C.F.R.": "Code of Federal Regulations",
+            "F.R.": "Federal Register",
+            "U.S.C.": "United States Code"
+        }
+        # Check each paragraph
+        for paragraph in doc.paragraprams:
+            text = paragraph.text
+            for incorrect_term, correct_term in legal_terms.items():
+                if incorrect_term in text and correct_term not in text:
+                    incorrect_legal_references.append((incorrect_term, correct_term))
+    except Exception as e:
+        print(f"Error in legal check: {str(e)}")
+        return False, []
     return len(incorrect_legal_references) == 0, incorrect_legal_references
 def table_caption_check(doc, doc_type):
     """Check if table captions are formatted correctly."""
     incorrect_captions = []
     try:
         # Check table captions
         for table in doc.tables:
             # Get the paragraph before the table
+            prev_paragraph = table._element.getprevious()
+            if prev_paragraph is not None and prev_paragraph.text.startswith("Table"):
+                # Check if the caption is formatted correctly
+                if doc_type == "Advisory Circular":
+                    # AC captions should be "Table X. Caption text"
+                    if not prev_paragraph.text.startswith("Table ") or ". " not in prev_paragraph.text:
+                        incorrect_captions.append(prev_paragraph.text)
+                else:
+                    # Other doc types may have different caption formats
+                    pass
     except Exception as e:
         print(f"Error in table caption check: {str(e)}")
         return False, []
     return len(incorrect_captions) == 0, incorrect_captions
 def figure_caption_check(doc, doc_type):
+    """Check if figure captions are formatted correctly."""
     incorrect_fig_captions = []
+    try:
+        # Check figure captions
+        for paragraph in doc.paragraphs:
+            if paragraph.text.startswith("Figure"):
+                # Check if the caption is formatted correctly
+                if doc_type == "Advisory Circular":
+                    # AC captions should be "Figure X. Caption text"
+                    if ". " not in paragraph.text:
+                        incorrect_fig_captions.append(paragraph.text)
+                else:
+                    # Other doc types may have different caption formats
+                    pass
+    except Exception as e:
+        print(f"Error in figure caption check: {str(e)}")
+        return False, []
     return len(incorrect_fig_captions) == 0, incorrect_fig_captions
 def table_figure_reference_check(doc, doc_type):
+    """Check if table and figure references are formatted correctly."""
     incorrect_table_figure_references = []
+    try:
+        # Check table and figure references
+        for paragraph in doc.paragraphs:
+            text = paragraph.text
+            if "Table" in text or "Figure" in text:
+                # Check if the reference is formatted correctly
+                if doc_type == "Advisory Circular":
+                    # AC references should be "Table X" or "Figure X"
+                    if not any(text.startswith(f"{item} ") for item in ["Table", "Figure"]):
+                        incorrect_table_figure_references.append(text)
+                else:
+                    # Other doc types may have different reference formats
+                    pass
+    except Exception as e:
+        print(f"Error in table/figure reference check: {str(e)}")
+        return False, []
     return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
+def document_title_check(doc, doc_type):
+    """Check if the document title is formatted correctly."""
     incorrect_titles = []
+    try:
+        # Check the document title
+        if len(doc.paragraphs) > 0 and doc.paragraphs[0].style.name == 'Title':
+            title_text = doc.paragraphs[0].text
+            # Check the formatting based on document type
+            if doc_type == "Advisory Circular":
+                if not title_text.startswith("ADVISORY CIRCULAR ") or title_text.endswith(" AC"):
+                    incorrect_titles.append({"text": title_text, "issue": "Advisory Circular titles should start with 'ADVISORY CIRCULAR ' and end with ' AC'"})
+            elif doc_type == "Order":
+                if not title_text.startswith('"') or not title_text.endswith('"'):
+                    incorrect_titles.append({"text": title_text, "issue": "Order titles should be enclosed in quotation marks"})
+            elif doc_type == "Federal Register Notice":
+                if not title_text.startswith('"') or not title_text.endswith('"'):
+                    incorrect_titles.append({"text": title_text, "issue": "Federal Register Notice titles should be enclosed in quotation marks"})
+            elif doc_type == "Policy Statement":
+                if title_text.startswith('"') or title_text.endswith('"'):
+                    incorrect_titles.append({"text": title_text, "issue": "Policy Statement titles should not have quotation marks"})
+    except Exception as e:
+        print(f"Error in document title check: {str(e)}")
+        return False, []
     return len(incorrect_titles) == 0, incorrect_titles
 def double_period_check(doc):
+    """Check for sentences with double periods."""
     incorrect_sentences = []
+    try:
+        # Check each paragraph for double periods
+        for paragraph in doc.paragraphs:
+            if ".." in paragraph.text:
+                incorrect_sentences.append(paragraph.text)
+    except Exception as e:
+        print(f"Error in double period check: {str(e)}")
+        return False, []
+    return len(incorrect_sentences) == 0, incorrect_sentences
 def spacing_check(doc):
+    """Check for incorrect spacing."""
     incorrect_spacing = []
+    try:
+        # Check each paragraph for spacing issues
+        for paragraph in doc.paragraphs:
+            if "  " in paragraph.text:
+                incorrect_spacing.append(paragraph.text)
+    except Exception as e:
+        print(f"Error in spacing check: {str(e)}")
+        return False, []
     return len(incorrect_spacing) == 0, incorrect_spacing
 def check_abbreviation_usage(doc):
+    """Check for consistent usage of abbreviations."""
+    abbreviation_issues = []
+    try:
+        # Regular expression to find abbreviations (2-5 capital letters)
+        abbreviation_pattern = r'\b[A-Z]{2,5}\b'
+        # Check each paragraph
+        for paragraph in doc.paragraphs:
+            text = paragraph.text
+            # Find all abbreviations in this paragraph
+            abbreviations = re.findall(abbreviation_pattern, text)
+            for abbr in abbreviations:
+                # Look for the full term definition
+                definition_pattern = rf'.+\({abbr}\)'
+                if any(re.search(definition_pattern, p.text) for p in doc.paragraphs):
+                    # Check if the abbreviation is used consistently after definition
+                    for other_paragraph in doc.paragraphs:
+                        if abbr in other_paragraph.text and definition_pattern not in other_paragraph.text:
+                            abbreviation_issues.append((definition_pattern.split('(')[0].strip(), abbr, paragraph.text))
+                            break
+    except Exception as e:
+        print(f"Error in abbreviation check: {str(e)}")
+        return []
+    return abbreviation_issues
 def check_date_formats(doc):
+    """Check for consistent date formatting."""
     date_issues = []
+    try:
+        # Look for date patterns in each paragraph
+        for paragraph in doc.paragraphs:
+            text = paragraph.text
+            if re.search(r'\b\d{1,2}/\d{1,2}/\d{4}\b', text):
+                date_issues.append((text, paragraph.text))
+    except Exception as e:
+        print(f"Error in date format check: {str(e)}")
+        return []
     return date_issues
 def check_placeholders(doc):
+    """Check for the presence of placeholders."""
+    placeholder_issues = []
+    try:
+        # Look for placeholder text in each paragraph
+        for paragraph in doc.paragraprams:
+            text = paragraph.text
+            if '[ENTER TEXT]' in text or '[ENTER DATE]' in text:
+                placeholder_issues.append((text, paragraph.text))
+    except Exception as e:
+        print(f"Error in placeholder check: {str(e)}")
+        return []
+    return placeholder_issues
+def get_document_checks(doc_type, template_type):
+    """Return the required headings and other checks based on document type."""
+    if doc_type == "Advisory Circular":
+        if template_type == "Short AC template AC":
+            return {
+                "required_headings": ["Purpose", "Applicability", "Related Reading Material",
+                                    "Background", "Discussion"]
+            }
+        else:  # Long AC template
+            return {
+                "required_headings": ["Purpose", "Applicability", "Audience", "Related Reading Material",
+                                    "Background", "Discussion", "Conclusion"]
+            }
+    # Add other document types as needed
+    return {"required_headings": []}
+def format_results_for_gradio(heading_valid, headings_found,
+                              acronyms_valid, undefined_acronyms,
+                              legal_valid, incorrect_legal_references,
+                              table_valid, incorrect_captions,
+                              figure_valid, incorrect_fig_captions,
+                              references_valid, incorrect_table_figure_references,
+                              title_style_valid, incorrect_titles,
+                              required_headings, doc_type,
+                              double_period_valid, incorrect_sentences,
+                              spacing_valid, incorrect_spacing,
+                              abbreviation_issues, date_issues,
+                              placeholder_issues):
     """Format the results for Gradio display."""
     results = []
     results.append("# Document Check Results\n")
     if acronyms_valid:
         results.append("✅ All acronyms are properly defined.\n")
     else:
+        results.append("❌ The following acronyms need to be defined at first use:")
+        for acronym in undefined_acronyms:
+            results.append(f"- {acronym}")
+    results.append("")
     # Legal Check
     results.append("## Legal Terminology Check")
         formatting_notes = {
             "Advisory Circular": "Document titles should be italicized, not in quotation marks.",
             "Order": "Document titles should be in quotation marks, not italicized.",
+            "Federal Register Notice": "Document titles should be in quotation marks, not italicized.",
             "Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
         }