Hoctar77's picture
Update app.py
c6ba992 verified
raw
history blame
32.6 kB
import gradio as gr
import logging
import re
from docx import Document
import io
import traceback
def heading_title_check(doc, required_headings):
"""
Check if required headings are present in the document.
Args:
doc (list): List of paragraph texts from the document
required_headings (list): List of required heading titles
Returns:
tuple: (bool, list) - (True if all headings present, list of found headings)
"""
headings_found = []
# Create a set of required headings for efficient lookup
required_headings_set = set(required_headings)
for para in doc:
para_strip = para.strip()
# Check if the paragraph is in the required headings list
if para_strip in required_headings_set:
headings_found.append(para_strip)
# Check if all required headings are found
all_headings_present = set(headings_found) == required_headings_set
return all_headings_present, headings_found
def acronym_check(doc):
"""Check if all acronyms are defined at first use and return undefined acronyms."""
defined_acronyms = set() # Set to store defined acronyms
undefined_acronyms = set() # Set to store undefined acronyms
acronym_pattern = re.compile(r'(\b[A-Z]{2,}\b)') # Regex to find acronyms (2 or more uppercase letters)
defined_pattern = re.compile(r'(\b\w+\b) \((\b[A-Z]{2,}\b)\)') # Regex to find definitions like "Federal Aviation Administration (FAA)"
for paragraph in doc:
# Check for defined acronyms
defined_matches = defined_pattern.findall(paragraph)
for full_term, acronym in defined_matches:
defined_acronyms.add(acronym) # Add the acronym to the defined set
# Check for usage of acronyms
usage_matches = acronym_pattern.findall(paragraph)
for acronym in usage_matches:
if acronym not in defined_acronyms:
undefined_acronyms.add(acronym) # Add to undefined acronyms if not defined
return len(undefined_acronyms) == 0, undefined_acronyms # Return True if all acronyms are defined, along with undefined acronyms
def legal_check(doc):
"""Check for correct legal references in the document and suggest corrections.
Args:
doc (list): List of paragraphs/strings to check
Returns:
tuple: (bool, list) - (True if no errors found, list of (incorrect, correct) terms)
"""
# Mapping of incorrect terms to their correct versions
incorrect_variations = {
r"\bUSC\b": "U.S.C.",
r"\bCFR Part\b": "CFR part",
r"\bC\.F\.R\.\b": "CFR",
r"\bWe\b": "The FAA",
r"\bwe\b": "the FAA",
r"\bcancelled\b": "canceled",
r"\bshall\b": "must or will",
r"\b&\b": "and"
}
# List to store tuples of incorrect terms and their correct versions
incorrect_legal_references = []
for paragraph in doc:
# Special handling for "Title 14" / "title 14"
title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
matches = re.finditer(title_14_pattern, paragraph)
for match in matches:
prefix = match.group('prefix')
current_title = match.group('title')
# If it follows a sentence-ending punctuation or is at start, it should be "Title 14"
if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
if current_title != "Title 14":
incorrect_legal_references.append((current_title, "Title 14"))
# If it's within a sentence, it should be "title 14"
elif prefix.isspace() and current_title != "title 14":
incorrect_legal_references.append((current_title, "title 14"))
# Check other variations
for incorrect_pattern, correct_term in incorrect_variations.items():
matches = re.finditer(incorrect_pattern, paragraph)
for match in matches:
incorrect_legal_references.append((match.group(), correct_term))
return len(incorrect_legal_references) == 0, incorrect_legal_references
def table_caption_check(doc, doc_type):
"""
Check for correctly formatted table captions in the document.
Supports both numeric (Table 1-2) and alphanumeric (Table C-1) formats.
"""
if doc_type in ["Advisory Circular", "Order"]:
# Pattern for "Table X-Y" where X and Y can be either letters or numbers
table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
else:
# Pattern for "Table X" where X can be either letters or numbers
table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
incorrect_captions = []
for paragraph in doc:
paragraph_strip = paragraph.strip()
if paragraph_strip.lower().startswith("table"):
if not table_caption_pattern.match(paragraph_strip):
incorrect_captions.append(paragraph_strip)
return len(incorrect_captions) == 0, incorrect_captions
def figure_caption_check(doc, doc_type):
"""
Check for correctly formatted figure captions in the document.
Supports both numeric (Figure 1-2) and alphanumeric (Figure C-1) formats.
"""
if doc_type in ["Advisory Circular", "Order"]:
# Pattern for "Figure X-Y" where X and Y can be either letters or numbers
figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
else:
# Pattern for "Figure X" where X can be either letters or numbers
figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
incorrect_fig_captions = []
for paragraph in doc:
paragraph_strip = paragraph.strip()
if paragraph_strip.lower().startswith("figure"):
if not figure_caption_pattern.match(paragraph_strip):
incorrect_fig_captions.append(paragraph_strip)
return len(incorrect_fig_captions) == 0, incorrect_fig_captions
def table_figure_reference_check(doc, doc_type):
"""Check for incorrect references to tables and figures in the document."""
incorrect_table_figure_references = []
if doc_type in ["Advisory Circular", "Order"]:
# For Advisory Circulars and Orders, correct references are "Table X-Y" or "Figure X-Y"
incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
else:
# For other document types, correct references are "Table X" or "Figure X"
incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
for paragraph in doc:
paragraph_strip = paragraph.strip()
# Exclude captions
starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
if not starts_with_table_or_figure:
# Find incorrect table references
incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
if incorrect_tables:
incorrect_table_figure_references.extend(incorrect_tables)
# Find incorrect figure references
incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
if incorrect_figures:
incorrect_table_figure_references.extend(incorrect_figures)
# Return False if any incorrect references are found
return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
def document_title_check(doc_path, doc_type):
incorrect_titles = []
doc = Document(doc_path)
# Updated pattern to capture titles correctly
ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
# Define formatting rules for different document types
formatting_rules = {
"Advisory Circular": {"italics": True, "quotes": False},
"Airworthiness Criteria": {"italics": False, "quotes": True},
"Deviation Memo": {"italics": False, "quotes": True},
"Exemption": {"italics": False, "quotes": True},
"Federal Register Notice": {"italics": False, "quotes": True},
"Handbook/Manual": {"italics": False, "quotes": False},
"Order": {"italics": False, "quotes": True},
"Policy Statement": {"italics": False, "quotes": False},
"Rule": {"italics": False, "quotes": True},
"Special Condition": {"italics": False, "quotes": True},
"Technical Standard Order": {"italics": False, "quotes": True},
"Other": {"italics": False, "quotes": False}
}
# Get the rules for the current document type
if doc_type not in formatting_rules:
raise ValueError(f"Unsupported document type: {doc_type}")
required_format = formatting_rules[doc_type]
for paragraph in doc.paragraphs:
text = paragraph.text
matches = ac_pattern.finditer(text)
for match in matches:
full_match = match.group(0)
title_text = match.group(1).strip()
# Get the position where the title starts
title_start = match.start(1)
# Check for any type of quotation marks, including smart quotes
title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
# Check the formatting of the title
title_is_italicized = False
current_pos = 0
for run in paragraph.runs:
run_length = len(run.text)
if current_pos <= title_start < current_pos + run_length:
relative_pos = title_start - current_pos
title_is_italicized = run.italic
break
current_pos += run_length
# Check if formatting matches the required format
formatting_incorrect = False
issue_message = []
# Check italics requirement
if required_format["italics"] and not title_is_italicized:
formatting_incorrect = True
issue_message.append("should be italicized")
elif not required_format["italics"] and title_is_italicized:
formatting_incorrect = True
issue_message.append("should not be italicized")
# Check quotes requirement
if required_format["quotes"] and not title_in_quotes:
formatting_incorrect = True
issue_message.append("should be in quotes")
elif not required_format["quotes"] and title_in_quotes:
formatting_incorrect = True
issue_message.append("should not be in quotes")
if formatting_incorrect:
incorrect_titles.append({
'text': full_match,
'issue': ', '.join(issue_message)
})
return len(incorrect_titles) == 0, incorrect_titles
def get_document_checks(doc_type, template_type):
"""Return expected outline and required headings based on document type and template type."""
document_checks = {
"Advisory Circular": {
"Short AC template AC": {
"required_headings": [
"PURPOSE.",
"APPLICABILITY.",
"CANCELLATION.",
"RELATED MATERIAL.",
"DEFINITION OF KEY TERMS."
]
},
"Long AC template AC": {
"required_headings": [
"Purpose.",
"Applicability.",
"Cancellation.",
"Related Material.",
"Definition of Key Terms."
]
}
},
"Airworthiness Criteria": {
"required_headings": [
"TBD - Need to research"
]
},
"Deviation Memo": {
"required_headings": [
"TBD - Need to research"
]
},
"Exemption": {
"required_headings": [
"TBD - Need to research"
]
},
"Federal Register Notice": {
"required_headings": [
"Purpose of This Notice",
"Audience",
"Where can I Find This Notice"
]
},
"Handbook/Manual": {
"required_headings": [
"TBD - Need to research"
]
},
"Order": {
"required_headings": [
"Purpose of This Order.",
"Audience.",
"Where to Find This Order."
]
},
"Policy Statement": {
"required_headings": [
"SUMMARY",
"CURRENT REGULATORY AND ADVISORY MATERIAL",
"RELEVANT PAST PRACTICE",
"POLICY",
"EFFECT OF POLICY",
"CONCLUSION"
]
},
"Rule": {
"required_headings": [
"TBD - Need to research"
]
},
"Special Condition": {
"required_headings": [
"TBD - Need to research"
]
},
"Technical Standard Order": {
"required_headings": [
"PURPOSE.",
"APPLICABILITY.",
"REQUIREMENTS.",
"MARKING.",
"APPLICATION DATA REQUIREMENTS.",
"MANUFACTURER DATA REQUIREMENTS.",
"FURNISHED DATA REQUIREMENTS.",
"HOW TO GET REFERENCED DOCUMENTS."
]
},
"Other": {
"required_headings": [
"N/A"
]
}
}
# Add debugging logs
logger = logging.getLogger(__name__)
logger.info(f"Requested document type: {doc_type}")
logger.info(f"Requested template type: {template_type}")
if doc_type == "Advisory Circular":
checks = document_checks.get(doc_type, {}).get(template_type, {})
else:
checks = document_checks.get(doc_type, {})
logger.info(f"Retrieved checks: {checks}")
return checks
def double_period_check(doc):
"""Check for sentences that end with two periods."""
incorrect_sentences = []
for paragraph in doc:
# Split the paragraph into sentences based on common sentence-ending punctuation
sentences = re.split(r'(?<=[.!?]) +', paragraph)
for sentence in sentences:
if sentence.endswith('..'):
incorrect_sentences.append(sentence.strip()) # Log the incorrectly formatted sentence
return len(incorrect_sentences) == 0, incorrect_sentences # Return True if no double periods are found, along with any incorrect sentences
def spacing_check(doc):
"""
Check for correct spacing in US federal regulatory documents.
Checks for:
- Spacing between document type and number (e.g., "AC 20-114")
- Spacing around section symbols (e.g., "Β§ 25.301")
- Spacing around part numbers (e.g., "Part 25")
- Spacing around paragraph indications (e.g., "(a)", "(1)")
- Double spaces between words
"""
incorrect_spacing = []
# Regex patterns to find incorrect spacing
doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
section_symbol_pattern = re.compile(r'(?<!\s)(Β§|Β§Β§)(\d+\.\d+)', re.IGNORECASE)
part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
double_space_pattern = re.compile(r'\s{2,}')
for paragraph in doc:
# Check for incorrect document type spacing
if doc_type_pattern.search(paragraph):
incorrect_spacing.append(paragraph)
# Check for incorrect section symbol spacing
if section_symbol_pattern.search(paragraph):
incorrect_spacing.append(paragraph)
# Check for incorrect part number spacing
if part_number_pattern.search(paragraph):
incorrect_spacing.append(paragraph)
# Check for incorrect paragraph indication spacing
if paragraph_pattern.search(paragraph):
incorrect_spacing.append(paragraph)
# Check for double spaces
if double_space_pattern.search(paragraph):
incorrect_spacing.append(paragraph)
return len(incorrect_spacing) == 0, incorrect_spacing
def check_prohibited_phrases(doc):
"""Check for prohibited words or phrases."""
prohibited_phrases = [
r'\babove\b',
r'\bbelow\b',
r'\bthere is\b',
r'\bthere are\b'
]
issues = []
for paragraph in doc:
for phrase in prohibited_phrases:
if re.search(phrase, paragraph, re.IGNORECASE):
issues.append((phrase.strip(r'\b'), paragraph.strip()))
return issues
def check_abbreviation_usage(doc):
"""Check for abbreviation consistency after first definition."""
abbreviations = {}
issues = []
for paragraph in doc:
# Find definitions like "Federal Aviation Administration (FAA)"
defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
for full_term, acronym in defined_matches:
if acronym not in abbreviations:
abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
# Check for full term usage after definition
for acronym, data in abbreviations.items():
full_term = data["full_term"]
if full_term in paragraph:
# Ignore first usage where it's defined
if data["defined"]:
data["defined"] = False # Mark it as now defined
else:
# Only flag subsequent occurrences
issues.append((full_term, acronym, paragraph.strip()))
return issues
def check_date_formats(doc):
"""Check for inconsistent date formats."""
date_issues = []
correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b') # MM/DD/YYYY
for paragraph in doc:
if date_pattern.search(paragraph):
dates = date_pattern.findall(paragraph)
for date in dates:
if not correct_date_pattern.match(date):
date_issues.append((date, paragraph.strip()))
return date_issues
def check_placeholders(doc):
"""Check for placeholders that should be removed."""
placeholder_phrases = [
r'\bTBD\b',
r'\bTo be determined\b',
r'\bTo be added\b'
]
issues = []
for paragraph in doc:
for phrase in placeholder_phrases:
if re.search(phrase, paragraph, re.IGNORECASE):
issues.append((phrase.strip(r'\b'), paragraph.strip()))
return issues
def process_file(file_obj, doc_type, template_type):
"""
Process the uploaded file and return results with error handling
"""
if file_obj is None:
return "Please upload a document first."
try:
# Convert bytes to BytesIO object that Document can read
if isinstance(file_obj, bytes):
doc_bytes = io.BytesIO(file_obj)
else:
doc_bytes = io.BytesIO(file_obj.read())
# Process the document and get results
results = process_document(doc_bytes, doc_type, template_type)
return results
except Exception as e:
error_trace = traceback.format_exc()
print(f"Error processing file: {str(e)}")
print(f"Full traceback: {error_trace}")
error_message = f"""An error occurred while processing the document:
Error: {str(e)}
Please ensure:
1. The file is a valid Word document (.docx)
2. The file is not corrupted
3. The file is not password protected
Technical details: {str(e)}"""
return error_message
def process_document(file_obj, doc_type, template_type):
"""Process the document and perform checks."""
try:
# Read the Word document
doc = Document(file_obj)
print("Document read successfully.")
# Get required headings based on document type
required_headings = get_document_checks(doc_type, template_type).get("required_headings", [])
# Perform checks
heading_valid, headings_found = heading_title_check(doc, required_headings)
acronyms_valid, undefined_acronyms = acronym_check(doc)
legal_valid, incorrect_legal_references = legal_check(doc) # Replace placeholder
table_valid, incorrect_captions = table_caption_check(doc, doc_type) # Replace placeholder
figure_valid, incorrect_fig_captions = figure_caption_check(doc, doc_type) # Replace placeholder
references_valid, incorrect_table_figure_references = table_figure_reference_check(doc, doc_type) # Replace placeholder
title_style_valid, incorrect_titles = document_title_check(doc, doc_type) # Replace placeholder
double_period_valid, incorrect_sentences = double_period_check(doc) # Replace placeholder
spacing_valid, incorrect_spacing = spacing_check(doc) # Replace placeholder
abbreviation_issues = check_abbreviation_usage(doc) # Replace placeholder
date_issues = check_date_formats(doc) # Replace placeholder
placeholder_issues = check_placeholders(doc) # Replace placeholder
# Format results
results = format_results_for_gradio(
heading_valid=heading_valid,
headings_found=headings_found,
acronyms_valid=acronyms_valid,
undefined_acronyms=undefined_acronyms,
legal_valid=legal_valid,
incorrect_legal_references=incorrect_legal_references,
table_valid=table_valid,
incorrect_captions=incorrect_captions,
figure_valid=figure_valid,
incorrect_fig_captions=incorrect_fig_captions,
references_valid=references_valid,
incorrect_table_figure_references=incorrect_table_figure_references,
title_style_valid=title_style_valid,
incorrect_titles=incorrect_titles,
required_headings=required_headings,
doc_type=doc_type,
double_period_valid=double_period_valid,
incorrect_sentences=incorrect_sentences,
spacing_valid=spacing_valid,
incorrect_spacing=incorrect_spacing,
abbreviation_issues=abbreviation_issues,
date_issues=date_issues,
placeholder_issues=placeholder_issues
)
return results
except Exception as e:
print(f"Error in process_document: {str(e)}")
raise
def get_document_checks(doc_type, template_type):
"""Return the required headings and other checks based on document type."""
if doc_type == "Advisory Circular":
if template_type == "Short AC template AC":
return {
"required_headings": ["Purpose", "Applicability", "Related Reading Material",
"Background", "Discussion"]
}
else: # Long AC template
return {
"required_headings": ["Purpose", "Applicability", "Audience", "Related Reading Material",
"Background", "Discussion", "Conclusion"]
}
# Add other document types as needed
return {"required_headings": []}
def format_results_for_gradio(**kwargs):
"""Format the results for display in Gradio."""
results = []
results.append("# Document Check Results\n")
# Required Headings Check
results.append("## Required Headings Check")
if kwargs['heading_valid']:
results.append("βœ… All required headings are present.\n")
else:
missing_headings = set(kwargs['required_headings']) - set(kwargs['headings_found'])
results.append("❌ Missing Required Headings:")
for heading in missing_headings:
results.append(f"- {heading}")
results.append("")
# Acronym Check
results.append("## Acronym Check")
if kwargs['acronyms_valid']:
results.append("βœ… All acronyms are properly defined.\n")
else:
results.append("❌ The following acronyms need to be defined at first use:")
for acronym in kwargs['undefined_acronyms']:
results.append(f"- {acronym}")
results.append("")
# Legal Check
results.append("## Legal Terminology Check")
if kwargs['legal_valid']:
results.append("βœ… All legal references are properly formatted.\n")
else:
results.append("❌ Incorrect Legal Terminology:")
for incorrect_term, correct_term in kwargs['incorrect_legal_references']:
results.append(f"- Use '{correct_term}' instead of '{incorrect_term}'")
results.append("")
# Table Caption Check
results.append("## Table Caption Check")
if kwargs['table_valid']:
results.append("βœ… All table captions are correctly formatted.\n")
else:
results.append("❌ Incorrect Table Captions:")
for caption in kwargs['incorrect_captions']:
results.append(f"- {caption}")
results.append("")
# Figure Caption Check
results.append("## Figure Caption Check")
if kwargs['figure_valid']:
results.append("βœ… All figure captions are correctly formatted.\n")
else:
results.append("❌ Incorrect Figure Captions:")
for caption in kwargs['incorrect_fig_captions']:
results.append(f"- {caption}")
results.append("")
# Table and Figure References Check
results.append("## Table and Figure References Check")
if kwargs['references_valid']:
results.append("βœ… All table and figure references are correctly formatted.\n")
else:
results.append("❌ Incorrect Table/Figure References:")
for ref in kwargs['incorrect_table_figure_references']:
results.append(f"- {ref}")
results.append("")
# Document Title Style Check
results.append("## Document Title Style Check")
if kwargs['title_style_valid']:
results.append("βœ… All document title references are properly styled.\n")
else:
results.append("❌ Incorrect Document Title Styling:")
for title in kwargs['incorrect_titles']:
results.append(f"- {title['text']}")
results.append(f" - Issue: {title['issue']}")
# Add formatting guidance
formatting_notes = {
"Advisory Circular": "Document titles should be italicized, not in quotation marks.",
"Order": "Document titles should be in quotation marks, not italicized.",
"Federal Register Notice": "Document titles should be in quotation marks, not italicized.",
"Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
}
doc_type = kwargs.get('doc_type', 'Unknown')
if doc_type in formatting_notes:
results.append(f"\nNote: {formatting_notes[doc_type]}")
else:
results.append("\nNote: Please verify the correct formatting style for this document type.")
results.append("")
# Double Period Check
results.append("## Double Period Check")
if kwargs['double_period_valid']:
results.append("βœ… No double periods found.\n")
else:
results.append("❌ Sentences found with double periods:")
for sentence in kwargs['incorrect_sentences']:
results.append(f"- {sentence}")
results.append("")
# Spacing Check
results.append("## Spacing Check")
if kwargs['spacing_valid']:
results.append("βœ… All spacing is correct.\n")
else:
results.append("❌ Incorrect spacing found in:")
for spacing in kwargs['incorrect_spacing']:
results.append(f"- {spacing}")
results.append("")
# Date Format Consistency
results.append("## Date Format Consistency")
if not kwargs['date_issues']:
results.append("βœ… All dates are in the correct format.\n")
else:
results.append("❌ Date Format Issues:")
for date, paragraph in kwargs['date_issues']:
results.append(f"- Incorrect date format '{date}' in: {paragraph}")
results.append("")
# Placeholder Check
results.append("## Placeholder Check")
if not kwargs['placeholder_issues']:
results.append("βœ… No future references or placeholders found.\n")
else:
results.append("❌ Placeholders Found:")
for phrase, paragraph in kwargs['placeholder_issues']:
results.append(f"- Placeholder '{phrase}' in: {paragraph}")
return "\n".join(results)
def process_file(file_obj, doc_type, template_type):
"""Process the uploaded file and return results with error handling."""
if file_obj is None:
return "Please upload a document first."
try:
# Convert bytes to BytesIO object
doc_bytes = io.BytesIO(file_obj) if isinstance(file_obj, bytes) else io.BytesIO(file_obj.read())
# Process the document
results = process_document(doc_bytes, doc_type, template_type)
return results
except Exception as e:
error_message = f"""An error occurred while processing the document:
Error: {str(e)}
Please ensure:
1. The file is a valid Word document (.docx)
2. The file is not corrupted
3. The file is not password protected
Technical details: {str(e)}"""
print(f"Error processing file: {str(e)}")
return error_message
# Create the Gradio interface
demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
with demo:
gr.Markdown("# Document Checker Tool")
gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
gr.Markdown("### This tool is still in development")
gr.Markdown("Contact Eric Putnam if you have questions and comments.")
document_types = [
"Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
"Federal Register Notice", "Handbook/Manual", "Order", "Policy Statement",
"Rule", "Special Condition", "Technical Standard Order", "Other"
]
template_types = ["Short AC template AC", "Long AC template AC"]
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload Word Document (.docx)",
file_types=[".docx"],
type="binary"
)
doc_type = gr.Dropdown(
choices=document_types,
label="Document Type",
value="Advisory Circular"
)
template_type = gr.Radio(
choices=template_types,
label="Template Type (Only for Advisory Circular)",
visible=True,
value="Short AC template AC"
)
submit_btn = gr.Button("Check Document", variant="primary")
with gr.Column(scale=2):
output = gr.Markdown(
label="Check Results",
value="Results will appear here after processing..."
)
# Update template type visibility based on document type
def update_template_visibility(doc_type):
return gr.update(visible=doc_type == "Advisory Circular")
doc_type.change(
fn=update_template_visibility,
inputs=[doc_type],
outputs=[template_type]
)
# Process file when submit button is clicked
submit_btn.click(
fn=process_file,
inputs=[file_input, doc_type, template_type],
outputs=[output]
)
# Launch the demo
demo.launch()