Hoctar77 commited on
Commit
bb6584a
Β·
verified Β·
1 Parent(s): 736fba7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +267 -81
app.py CHANGED
@@ -10,6 +10,7 @@ from docx import Document
10
  import io
11
  import os
12
  import traceback
 
13
 
14
  @dataclass
15
  class DocumentCheckResult:
@@ -934,42 +935,171 @@ class FAADocumentChecker(DocumentChecker):
934
 
935
  return results
936
 
937
- def process_document(file_obj, doc_type, template_type):
938
- """Process the document and run all checks."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
939
  try:
940
- # Convert file object to BytesIO
 
 
 
941
  if isinstance(file_obj, bytes):
942
  file_obj = io.BytesIO(file_obj)
943
-
944
- checker = FAADocumentChecker()
945
  doc = Document(file_obj)
946
  paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
947
-
948
- # Rewind the file object for additional processing
949
  file_obj.seek(0)
950
-
951
  # Run all checks
952
- results = {}
953
- results['heading_check'] = checker.heading_title_check(paragraphs, doc_type)
954
- results['heading_period_check'] = checker.heading_title_period_check(paragraphs, doc_type)
955
- results['acronym_check'] = checker.acronym_check(paragraphs)
956
- results['terminology_check'] = checker.check_terminology(paragraphs)
957
- results['section_symbol_check'] = checker.check_section_symbol_usage(paragraphs)
958
- results['table_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Table')
959
- results['figure_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Figure')
960
- results['references_check'] = checker.table_figure_reference_check(paragraphs, doc_type)
961
- results['title_check'] = checker.document_title_check(file_obj, doc_type)
962
- results['double_period_check'] = checker.double_period_check(paragraphs)
963
- results['spacing_check'] = checker.spacing_check(paragraphs)
964
- results['abbreviation_check'] = checker.check_abbreviation_usage(paragraphs)
965
- results['date_check'] = checker.check_date_formats(paragraphs)
966
- results['placeholder_check'] = checker.check_placeholders(paragraphs)
967
-
968
- return format_results_for_gradio(results, doc_type)
969
  except Exception as e:
970
- print(f"Error in process_document: {str(e)}")
971
- traceback.print_exc() # This will print the full traceback
972
- return f"An error occurred while processing the document: {str(e)}"
973
 
974
  def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
975
  """Format the results for display in Gradio."""
@@ -1026,67 +1156,123 @@ def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type:
1026
 
1027
  return "\n".join(output)
1028
 
1029
- # Create the Gradio interface
1030
- demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
1031
-
1032
- with demo:
1033
- gr.Markdown("# Document Checker Tool")
1034
- gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
1035
- gr.Markdown("*This tool is still in development and you might get false positives in your results*")
1036
- gr.Markdown("Contact Eric Putnam if you have questions and comments.")
1037
- gr.Markdown("""
1038
- 1. Upload a clean (no track changes or comments) Word file.
1039
- 2. Choose **Check Document**.""")
1040
 
1041
  document_types = [
1042
- "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
1043
- "Federal Register Notice", "Order", "Policy Statement",
1044
- "Rule", "Special Condition", "Technical Standard Order", "Other"
 
 
 
 
 
 
 
 
1045
  ]
1046
 
1047
  template_types = ["Short AC template AC", "Long AC template AC"]
1048
 
1049
- with gr.Row():
1050
- with gr.Column(scale=1):
1051
- file_input = gr.File(
1052
- label="Upload Word Document (.docx)",
1053
- file_types=[".docx"],
1054
- type="binary"
1055
- )
1056
- doc_type = gr.Dropdown(
1057
- choices=document_types,
1058
- label="Document Type",
1059
- value="Advisory Circular"
1060
- )
1061
- template_type = gr.Radio(
1062
- choices=template_types,
1063
- label="Template Type (Only for Advisory Circular)",
1064
- visible=True,
1065
- value="Short AC template AC"
1066
- )
1067
- submit_btn = gr.Button("Check Document", variant="primary")
1068
-
1069
- with gr.Column(scale=2):
1070
- output = gr.Markdown(
1071
- label="Check Results",
1072
- value="Results will appear here after processing..."
1073
- )
1074
-
1075
- def update_template_visibility(doc_type):
1076
- return gr.update(visible=doc_type == "Advisory Circular")
1077
 
1078
- doc_type.change(
1079
- fn=update_template_visibility,
1080
- inputs=[doc_type],
1081
- outputs=[template_type]
1082
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1083
 
1084
- submit_btn.click(
1085
- fn=process_document,
1086
- inputs=[file_input, doc_type, template_type],
1087
- outputs=[output]
1088
- )
1089
 
1090
- # Launch the demo
1091
  if __name__ == "__main__":
 
1092
  demo.launch()
 
10
  import io
11
  import os
12
  import traceback
13
+ from datetime import datetime
14
 
15
  @dataclass
16
  class DocumentCheckResult:
 
935
 
936
  return results
937
 
938
+ @dataclass
939
+ class DocumentCheckResult:
940
+ """Structured result for document checks."""
941
+ success: bool
942
+ issues: List[Dict[str, Any]]
943
+ details: Optional[Dict[str, Any]] = None
944
+
945
+ def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
946
+ """Format check results into a Markdown string for display."""
947
+ output = []
948
+
949
+ # Add header with timestamp
950
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
951
+ output.extend([
952
+ f"# Document Check Results - {current_time}",
953
+ f"## Document Type: {doc_type}",
954
+ "---\n"
955
+ ])
956
+
957
+ # Count issues
958
+ total_issues = sum(1 for r in results.values() if not r.success)
959
+
960
+ if total_issues == 0:
961
+ output.append("βœ… **All checks passed successfully!**\n")
962
+ return "\n".join(output)
963
+
964
+ output.append(f"❗ Found issues in {total_issues} check categories\n")
965
+
966
+ # Define check categories and their display names
967
+ check_categories = {
968
+ 'heading_title_check': {
969
+ 'title': 'πŸ“‹ Required Headings',
970
+ 'priority': 1
971
+ },
972
+ 'heading_title_period_check': {
973
+ 'title': 'πŸ” Heading Period Usage',
974
+ 'priority': 1
975
+ },
976
+ 'acronym_check': {
977
+ 'title': 'πŸ“ Acronym Definitions',
978
+ 'priority': 2
979
+ },
980
+ 'terminology_check': {
981
+ 'title': 'πŸ“– Terminology Usage',
982
+ 'priority': 2
983
+ },
984
+ 'section_symbol_usage_check': {
985
+ 'title': 'Β§ Section Symbol Usage',
986
+ 'priority': 2
987
+ },
988
+ 'caption_check_table': {
989
+ 'title': 'πŸ“Š Table Captions',
990
+ 'priority': 3
991
+ },
992
+ 'caption_check_figure': {
993
+ 'title': 'πŸ–ΌοΈ Figure Captions',
994
+ 'priority': 3
995
+ },
996
+ 'table_figure_reference_check': {
997
+ 'title': 'πŸ”— Table/Figure References',
998
+ 'priority': 3
999
+ },
1000
+ 'document_title_check': {
1001
+ 'title': 'πŸ“‘ Document Title Format',
1002
+ 'priority': 1
1003
+ },
1004
+ 'double_period_check': {
1005
+ 'title': '⚑ Double Periods',
1006
+ 'priority': 4
1007
+ },
1008
+ 'spacing_check': {
1009
+ 'title': '⌨️ Spacing Issues',
1010
+ 'priority': 4
1011
+ },
1012
+ 'abbreviation_usage_check': {
1013
+ 'title': 'πŸ“Ž Abbreviation Usage',
1014
+ 'priority': 3
1015
+ },
1016
+ 'date_formats_check': {
1017
+ 'title': 'πŸ“… Date Formats',
1018
+ 'priority': 3
1019
+ },
1020
+ 'placeholders_check': {
1021
+ 'title': '🚩 Placeholder Content',
1022
+ 'priority': 1
1023
+ }
1024
+ }
1025
+
1026
+ # Sort checks by priority
1027
+ sorted_checks = sorted(
1028
+ [(name, result) for name, result in results.items()],
1029
+ key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
1030
+ )
1031
+
1032
+ # Process each check result
1033
+ for check_name, result in sorted_checks:
1034
+ if not result.success:
1035
+ category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
1036
+
1037
+ output.append(f"### {category['title']}")
1038
+
1039
+ if isinstance(result.issues, list):
1040
+ for issue in result.issues[:5]: # Show first 5 issues
1041
+ if isinstance(issue, dict):
1042
+ # Format dictionary issues
1043
+ for key, value in issue.items():
1044
+ if isinstance(value, list):
1045
+ output.extend([f"- {item}" for item in value])
1046
+ else:
1047
+ output.append(f"- {key}: {value}")
1048
+ else:
1049
+ output.append(f"- {issue}")
1050
+
1051
+ # Show count of remaining issues
1052
+ if len(result.issues) > 5:
1053
+ output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
1054
+
1055
+ output.append("") # Add spacing between sections
1056
+
1057
+ # Add summary and recommendations
1058
+ output.extend([
1059
+ "## πŸ“‹ Summary and Recommendations",
1060
+ "",
1061
+ "### Priority Order for Fixes:",
1062
+ "1. πŸ”΄ Critical: Heading formats, required content, and document structure",
1063
+ "2. 🟑 Important: Terminology, acronyms, and references",
1064
+ "3. 🟒 Standard: Formatting, spacing, and style consistency",
1065
+ "",
1066
+ "### Next Steps:",
1067
+ "1. Address issues in priority order",
1068
+ "2. Use search/replace for consistent fixes",
1069
+ "3. Re-run checker after making changes",
1070
+ "4. Update your document template if needed",
1071
+ ""
1072
+ ])
1073
+
1074
+ return "\n".join(output)
1075
+
1076
+ def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
1077
+ """Process document and run all checks."""
1078
  try:
1079
+ # Initialize checker
1080
+ checker = FAADocumentChecker()
1081
+
1082
+ # Convert file object to BytesIO if needed
1083
  if isinstance(file_obj, bytes):
1084
  file_obj = io.BytesIO(file_obj)
1085
+
1086
+ # Extract paragraphs
1087
  doc = Document(file_obj)
1088
  paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
1089
+
1090
+ # Rewind file object
1091
  file_obj.seek(0)
1092
+
1093
  # Run all checks
1094
+ results = checker.run_all_checks(file_obj, doc_type, template_type)
1095
+
1096
+ # Format results for display
1097
+ return format_check_results(results, doc_type)
1098
+
 
 
 
 
 
 
 
 
 
 
 
 
1099
  except Exception as e:
1100
+ logging.error(f"Error processing document: {str(e)}")
1101
+ traceback.print_exc()
1102
+ return f"❌ Error processing document: {str(e)}\n\nPlease ensure the file is a valid .docx document and try again."
1103
 
1104
  def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
1105
  """Format the results for display in Gradio."""
 
1156
 
1157
  return "\n".join(output)
1158
 
1159
+ def create_interface():
1160
+ """Create and configure the Gradio interface."""
 
 
 
 
 
 
 
 
 
1161
 
1162
  document_types = [
1163
+ "Advisory Circular",
1164
+ "Airworthiness Criteria",
1165
+ "Deviation Memo",
1166
+ "Exemption",
1167
+ "Federal Register Notice",
1168
+ "Order",
1169
+ "Policy Statement",
1170
+ "Rule",
1171
+ "Special Condition",
1172
+ "Technical Standard Order",
1173
+ "Other"
1174
  ]
1175
 
1176
  template_types = ["Short AC template AC", "Long AC template AC"]
1177
 
1178
+ # Custom CSS for better styling
1179
+ custom_css = """
1180
+ .gradio-container {
1181
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
1182
+ }
1183
+ .container {
1184
+ max-width: 900px;
1185
+ margin: auto;
1186
+ }
1187
+ .alert {
1188
+ padding: 1rem;
1189
+ margin-bottom: 1rem;
1190
+ border-radius: 0.5rem;
1191
+ background-color: #f8f9fa;
1192
+ border: 1px solid #dee2e6;
1193
+ }
1194
+ """
 
 
 
 
 
 
 
 
 
 
 
1195
 
1196
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
1197
+ gr.Markdown(
1198
+ """
1199
+ # πŸ“‘ Document Checker Tool
1200
+
1201
+ ### Purpose
1202
+ This tool checks Word documents for compliance with U.S. federal documentation standards.
1203
+
1204
+ ### How to Use
1205
+ 1. Upload your Word document (.docx format)
1206
+ 2. Select the document type
1207
+ 3. Click "Check Document"
1208
+
1209
+ > **Note:** Please ensure your document is clean (no track changes or comments)
1210
+ """
1211
+ )
1212
+
1213
+ with gr.Row():
1214
+ with gr.Column(scale=1):
1215
+ file_input = gr.File(
1216
+ label="πŸ“Ž Upload Word Document (.docx)",
1217
+ file_types=[".docx"],
1218
+ type="binary"
1219
+ )
1220
+
1221
+ doc_type = gr.Dropdown(
1222
+ choices=document_types,
1223
+ label="πŸ“‹ Document Type",
1224
+ value="Advisory Circular",
1225
+ info="Select the type of document you're checking"
1226
+ )
1227
+
1228
+ template_type = gr.Radio(
1229
+ choices=template_types,
1230
+ label="πŸ“‘ Template Type",
1231
+ visible=False,
1232
+ info="Only applicable for Advisory Circulars"
1233
+ )
1234
+
1235
+ submit_btn = gr.Button(
1236
+ "πŸ” Check Document",
1237
+ variant="primary"
1238
+ )
1239
+
1240
+ with gr.Column(scale=2):
1241
+ results = gr.Markdown(
1242
+ label="Check Results",
1243
+ value="Results will appear here after processing...",
1244
+ elem_classes=["results-panel"]
1245
+ )
1246
+
1247
+ # Update template type visibility based on document type
1248
+ def update_template_visibility(doc_type):
1249
+ return gr.update(visible=doc_type == "Advisory Circular")
1250
+
1251
+ doc_type.change(
1252
+ fn=update_template_visibility,
1253
+ inputs=[doc_type],
1254
+ outputs=[template_type]
1255
+ )
1256
+
1257
+ # Handle document processing
1258
+ submit_btn.click(
1259
+ fn=process_document,
1260
+ inputs=[file_input, doc_type, template_type],
1261
+ outputs=[results]
1262
+ )
1263
+
1264
+ gr.Markdown(
1265
+ """
1266
+ ### πŸ“Œ Important Notes
1267
+ - This tool is in development; you may encounter false positives
1268
+ - For questions or feedback, contact Eric Putnam
1269
+ - Results are not stored or saved
1270
+ """
1271
+ )
1272
 
1273
+ return demo
 
 
 
 
1274
 
1275
+ # Initialize and launch the interface
1276
  if __name__ == "__main__":
1277
+ demo = create_interface()
1278
  demo.launch()