Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from docx import Document
|
|
10 |
import io
|
11 |
import os
|
12 |
import traceback
|
|
|
13 |
|
14 |
@dataclass
|
15 |
class DocumentCheckResult:
|
@@ -934,42 +935,171 @@ class FAADocumentChecker(DocumentChecker):
|
|
934 |
|
935 |
return results
|
936 |
|
937 |
-
|
938 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
939 |
try:
|
940 |
-
#
|
|
|
|
|
|
|
941 |
if isinstance(file_obj, bytes):
|
942 |
file_obj = io.BytesIO(file_obj)
|
943 |
-
|
944 |
-
|
945 |
doc = Document(file_obj)
|
946 |
paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
|
947 |
-
|
948 |
-
# Rewind
|
949 |
file_obj.seek(0)
|
950 |
-
|
951 |
# Run all checks
|
952 |
-
results =
|
953 |
-
|
954 |
-
results
|
955 |
-
results
|
956 |
-
|
957 |
-
results['section_symbol_check'] = checker.check_section_symbol_usage(paragraphs)
|
958 |
-
results['table_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Table')
|
959 |
-
results['figure_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Figure')
|
960 |
-
results['references_check'] = checker.table_figure_reference_check(paragraphs, doc_type)
|
961 |
-
results['title_check'] = checker.document_title_check(file_obj, doc_type)
|
962 |
-
results['double_period_check'] = checker.double_period_check(paragraphs)
|
963 |
-
results['spacing_check'] = checker.spacing_check(paragraphs)
|
964 |
-
results['abbreviation_check'] = checker.check_abbreviation_usage(paragraphs)
|
965 |
-
results['date_check'] = checker.check_date_formats(paragraphs)
|
966 |
-
results['placeholder_check'] = checker.check_placeholders(paragraphs)
|
967 |
-
|
968 |
-
return format_results_for_gradio(results, doc_type)
|
969 |
except Exception as e:
|
970 |
-
|
971 |
-
traceback.print_exc()
|
972 |
-
return f"
|
973 |
|
974 |
def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
975 |
"""Format the results for display in Gradio."""
|
@@ -1026,67 +1156,123 @@ def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type:
|
|
1026 |
|
1027 |
return "\n".join(output)
|
1028 |
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
with demo:
|
1033 |
-
gr.Markdown("# Document Checker Tool")
|
1034 |
-
gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
|
1035 |
-
gr.Markdown("*This tool is still in development and you might get false positives in your results*")
|
1036 |
-
gr.Markdown("Contact Eric Putnam if you have questions and comments.")
|
1037 |
-
gr.Markdown("""
|
1038 |
-
1. Upload a clean (no track changes or comments) Word file.
|
1039 |
-
2. Choose **Check Document**.""")
|
1040 |
|
1041 |
document_types = [
|
1042 |
-
"Advisory Circular",
|
1043 |
-
"
|
1044 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1045 |
]
|
1046 |
|
1047 |
template_types = ["Short AC template AC", "Long AC template AC"]
|
1048 |
|
1049 |
-
|
1050 |
-
|
1051 |
-
|
1052 |
-
|
1053 |
-
|
1054 |
-
|
1055 |
-
|
1056 |
-
|
1057 |
-
|
1058 |
-
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
-
|
1066 |
-
)
|
1067 |
-
submit_btn = gr.Button("Check Document", variant="primary")
|
1068 |
-
|
1069 |
-
with gr.Column(scale=2):
|
1070 |
-
output = gr.Markdown(
|
1071 |
-
label="Check Results",
|
1072 |
-
value="Results will appear here after processing..."
|
1073 |
-
)
|
1074 |
-
|
1075 |
-
def update_template_visibility(doc_type):
|
1076 |
-
return gr.update(visible=doc_type == "Advisory Circular")
|
1077 |
|
1078 |
-
|
1079 |
-
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1083 |
|
1084 |
-
|
1085 |
-
fn=process_document,
|
1086 |
-
inputs=[file_input, doc_type, template_type],
|
1087 |
-
outputs=[output]
|
1088 |
-
)
|
1089 |
|
1090 |
-
#
|
1091 |
if __name__ == "__main__":
|
|
|
1092 |
demo.launch()
|
|
|
10 |
import io
|
11 |
import os
|
12 |
import traceback
|
13 |
+
from datetime import datetime
|
14 |
|
15 |
@dataclass
|
16 |
class DocumentCheckResult:
|
|
|
935 |
|
936 |
return results
|
937 |
|
938 |
+
@dataclass
|
939 |
+
class DocumentCheckResult:
|
940 |
+
"""Structured result for document checks."""
|
941 |
+
success: bool
|
942 |
+
issues: List[Dict[str, Any]]
|
943 |
+
details: Optional[Dict[str, Any]] = None
|
944 |
+
|
945 |
+
def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
946 |
+
"""Format check results into a Markdown string for display."""
|
947 |
+
output = []
|
948 |
+
|
949 |
+
# Add header with timestamp
|
950 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
951 |
+
output.extend([
|
952 |
+
f"# Document Check Results - {current_time}",
|
953 |
+
f"## Document Type: {doc_type}",
|
954 |
+
"---\n"
|
955 |
+
])
|
956 |
+
|
957 |
+
# Count issues
|
958 |
+
total_issues = sum(1 for r in results.values() if not r.success)
|
959 |
+
|
960 |
+
if total_issues == 0:
|
961 |
+
output.append("β
**All checks passed successfully!**\n")
|
962 |
+
return "\n".join(output)
|
963 |
+
|
964 |
+
output.append(f"β Found issues in {total_issues} check categories\n")
|
965 |
+
|
966 |
+
# Define check categories and their display names
|
967 |
+
check_categories = {
|
968 |
+
'heading_title_check': {
|
969 |
+
'title': 'π Required Headings',
|
970 |
+
'priority': 1
|
971 |
+
},
|
972 |
+
'heading_title_period_check': {
|
973 |
+
'title': 'π Heading Period Usage',
|
974 |
+
'priority': 1
|
975 |
+
},
|
976 |
+
'acronym_check': {
|
977 |
+
'title': 'π Acronym Definitions',
|
978 |
+
'priority': 2
|
979 |
+
},
|
980 |
+
'terminology_check': {
|
981 |
+
'title': 'π Terminology Usage',
|
982 |
+
'priority': 2
|
983 |
+
},
|
984 |
+
'section_symbol_usage_check': {
|
985 |
+
'title': 'Β§ Section Symbol Usage',
|
986 |
+
'priority': 2
|
987 |
+
},
|
988 |
+
'caption_check_table': {
|
989 |
+
'title': 'π Table Captions',
|
990 |
+
'priority': 3
|
991 |
+
},
|
992 |
+
'caption_check_figure': {
|
993 |
+
'title': 'πΌοΈ Figure Captions',
|
994 |
+
'priority': 3
|
995 |
+
},
|
996 |
+
'table_figure_reference_check': {
|
997 |
+
'title': 'π Table/Figure References',
|
998 |
+
'priority': 3
|
999 |
+
},
|
1000 |
+
'document_title_check': {
|
1001 |
+
'title': 'π Document Title Format',
|
1002 |
+
'priority': 1
|
1003 |
+
},
|
1004 |
+
'double_period_check': {
|
1005 |
+
'title': 'β‘ Double Periods',
|
1006 |
+
'priority': 4
|
1007 |
+
},
|
1008 |
+
'spacing_check': {
|
1009 |
+
'title': 'β¨οΈ Spacing Issues',
|
1010 |
+
'priority': 4
|
1011 |
+
},
|
1012 |
+
'abbreviation_usage_check': {
|
1013 |
+
'title': 'π Abbreviation Usage',
|
1014 |
+
'priority': 3
|
1015 |
+
},
|
1016 |
+
'date_formats_check': {
|
1017 |
+
'title': 'π
Date Formats',
|
1018 |
+
'priority': 3
|
1019 |
+
},
|
1020 |
+
'placeholders_check': {
|
1021 |
+
'title': 'π© Placeholder Content',
|
1022 |
+
'priority': 1
|
1023 |
+
}
|
1024 |
+
}
|
1025 |
+
|
1026 |
+
# Sort checks by priority
|
1027 |
+
sorted_checks = sorted(
|
1028 |
+
[(name, result) for name, result in results.items()],
|
1029 |
+
key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
|
1030 |
+
)
|
1031 |
+
|
1032 |
+
# Process each check result
|
1033 |
+
for check_name, result in sorted_checks:
|
1034 |
+
if not result.success:
|
1035 |
+
category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
|
1036 |
+
|
1037 |
+
output.append(f"### {category['title']}")
|
1038 |
+
|
1039 |
+
if isinstance(result.issues, list):
|
1040 |
+
for issue in result.issues[:5]: # Show first 5 issues
|
1041 |
+
if isinstance(issue, dict):
|
1042 |
+
# Format dictionary issues
|
1043 |
+
for key, value in issue.items():
|
1044 |
+
if isinstance(value, list):
|
1045 |
+
output.extend([f"- {item}" for item in value])
|
1046 |
+
else:
|
1047 |
+
output.append(f"- {key}: {value}")
|
1048 |
+
else:
|
1049 |
+
output.append(f"- {issue}")
|
1050 |
+
|
1051 |
+
# Show count of remaining issues
|
1052 |
+
if len(result.issues) > 5:
|
1053 |
+
output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
|
1054 |
+
|
1055 |
+
output.append("") # Add spacing between sections
|
1056 |
+
|
1057 |
+
# Add summary and recommendations
|
1058 |
+
output.extend([
|
1059 |
+
"## π Summary and Recommendations",
|
1060 |
+
"",
|
1061 |
+
"### Priority Order for Fixes:",
|
1062 |
+
"1. π΄ Critical: Heading formats, required content, and document structure",
|
1063 |
+
"2. π‘ Important: Terminology, acronyms, and references",
|
1064 |
+
"3. π’ Standard: Formatting, spacing, and style consistency",
|
1065 |
+
"",
|
1066 |
+
"### Next Steps:",
|
1067 |
+
"1. Address issues in priority order",
|
1068 |
+
"2. Use search/replace for consistent fixes",
|
1069 |
+
"3. Re-run checker after making changes",
|
1070 |
+
"4. Update your document template if needed",
|
1071 |
+
""
|
1072 |
+
])
|
1073 |
+
|
1074 |
+
return "\n".join(output)
|
1075 |
+
|
1076 |
+
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
1077 |
+
"""Process document and run all checks."""
|
1078 |
try:
|
1079 |
+
# Initialize checker
|
1080 |
+
checker = FAADocumentChecker()
|
1081 |
+
|
1082 |
+
# Convert file object to BytesIO if needed
|
1083 |
if isinstance(file_obj, bytes):
|
1084 |
file_obj = io.BytesIO(file_obj)
|
1085 |
+
|
1086 |
+
# Extract paragraphs
|
1087 |
doc = Document(file_obj)
|
1088 |
paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
|
1089 |
+
|
1090 |
+
# Rewind file object
|
1091 |
file_obj.seek(0)
|
1092 |
+
|
1093 |
# Run all checks
|
1094 |
+
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
1095 |
+
|
1096 |
+
# Format results for display
|
1097 |
+
return format_check_results(results, doc_type)
|
1098 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1099 |
except Exception as e:
|
1100 |
+
logging.error(f"Error processing document: {str(e)}")
|
1101 |
+
traceback.print_exc()
|
1102 |
+
return f"β Error processing document: {str(e)}\n\nPlease ensure the file is a valid .docx document and try again."
|
1103 |
|
1104 |
def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
1105 |
"""Format the results for display in Gradio."""
|
|
|
1156 |
|
1157 |
return "\n".join(output)
|
1158 |
|
1159 |
+
def create_interface():
|
1160 |
+
"""Create and configure the Gradio interface."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1161 |
|
1162 |
document_types = [
|
1163 |
+
"Advisory Circular",
|
1164 |
+
"Airworthiness Criteria",
|
1165 |
+
"Deviation Memo",
|
1166 |
+
"Exemption",
|
1167 |
+
"Federal Register Notice",
|
1168 |
+
"Order",
|
1169 |
+
"Policy Statement",
|
1170 |
+
"Rule",
|
1171 |
+
"Special Condition",
|
1172 |
+
"Technical Standard Order",
|
1173 |
+
"Other"
|
1174 |
]
|
1175 |
|
1176 |
template_types = ["Short AC template AC", "Long AC template AC"]
|
1177 |
|
1178 |
+
# Custom CSS for better styling
|
1179 |
+
custom_css = """
|
1180 |
+
.gradio-container {
|
1181 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
1182 |
+
}
|
1183 |
+
.container {
|
1184 |
+
max-width: 900px;
|
1185 |
+
margin: auto;
|
1186 |
+
}
|
1187 |
+
.alert {
|
1188 |
+
padding: 1rem;
|
1189 |
+
margin-bottom: 1rem;
|
1190 |
+
border-radius: 0.5rem;
|
1191 |
+
background-color: #f8f9fa;
|
1192 |
+
border: 1px solid #dee2e6;
|
1193 |
+
}
|
1194 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1195 |
|
1196 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
|
1197 |
+
gr.Markdown(
|
1198 |
+
"""
|
1199 |
+
# π Document Checker Tool
|
1200 |
+
|
1201 |
+
### Purpose
|
1202 |
+
This tool checks Word documents for compliance with U.S. federal documentation standards.
|
1203 |
+
|
1204 |
+
### How to Use
|
1205 |
+
1. Upload your Word document (.docx format)
|
1206 |
+
2. Select the document type
|
1207 |
+
3. Click "Check Document"
|
1208 |
+
|
1209 |
+
> **Note:** Please ensure your document is clean (no track changes or comments)
|
1210 |
+
"""
|
1211 |
+
)
|
1212 |
+
|
1213 |
+
with gr.Row():
|
1214 |
+
with gr.Column(scale=1):
|
1215 |
+
file_input = gr.File(
|
1216 |
+
label="π Upload Word Document (.docx)",
|
1217 |
+
file_types=[".docx"],
|
1218 |
+
type="binary"
|
1219 |
+
)
|
1220 |
+
|
1221 |
+
doc_type = gr.Dropdown(
|
1222 |
+
choices=document_types,
|
1223 |
+
label="π Document Type",
|
1224 |
+
value="Advisory Circular",
|
1225 |
+
info="Select the type of document you're checking"
|
1226 |
+
)
|
1227 |
+
|
1228 |
+
template_type = gr.Radio(
|
1229 |
+
choices=template_types,
|
1230 |
+
label="π Template Type",
|
1231 |
+
visible=False,
|
1232 |
+
info="Only applicable for Advisory Circulars"
|
1233 |
+
)
|
1234 |
+
|
1235 |
+
submit_btn = gr.Button(
|
1236 |
+
"π Check Document",
|
1237 |
+
variant="primary"
|
1238 |
+
)
|
1239 |
+
|
1240 |
+
with gr.Column(scale=2):
|
1241 |
+
results = gr.Markdown(
|
1242 |
+
label="Check Results",
|
1243 |
+
value="Results will appear here after processing...",
|
1244 |
+
elem_classes=["results-panel"]
|
1245 |
+
)
|
1246 |
+
|
1247 |
+
# Update template type visibility based on document type
|
1248 |
+
def update_template_visibility(doc_type):
|
1249 |
+
return gr.update(visible=doc_type == "Advisory Circular")
|
1250 |
+
|
1251 |
+
doc_type.change(
|
1252 |
+
fn=update_template_visibility,
|
1253 |
+
inputs=[doc_type],
|
1254 |
+
outputs=[template_type]
|
1255 |
+
)
|
1256 |
+
|
1257 |
+
# Handle document processing
|
1258 |
+
submit_btn.click(
|
1259 |
+
fn=process_document,
|
1260 |
+
inputs=[file_input, doc_type, template_type],
|
1261 |
+
outputs=[results]
|
1262 |
+
)
|
1263 |
+
|
1264 |
+
gr.Markdown(
|
1265 |
+
"""
|
1266 |
+
### π Important Notes
|
1267 |
+
- This tool is in development; you may encounter false positives
|
1268 |
+
- For questions or feedback, contact Eric Putnam
|
1269 |
+
- Results are not stored or saved
|
1270 |
+
"""
|
1271 |
+
)
|
1272 |
|
1273 |
+
return demo
|
|
|
|
|
|
|
|
|
1274 |
|
1275 |
+
# Initialize and launch the interface
|
1276 |
if __name__ == "__main__":
|
1277 |
+
demo = create_interface()
|
1278 |
demo.launch()
|