Hoctar77 commited on
Commit
d178ff4
Β·
verified Β·
1 Parent(s): 12c8dbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +375 -34
app.py CHANGED
@@ -11,6 +11,8 @@ import io
11
  import os
12
  import traceback
13
  from datetime import datetime
 
 
14
 
15
  # Core data structures
16
  @dataclass
@@ -868,44 +870,29 @@ class FAADocumentChecker(DocumentChecker):
868
  def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
869
  """Process document and run all checks."""
870
  try:
871
- # Initialize checker
872
  checker = FAADocumentChecker()
873
 
874
- # Convert file object to BytesIO if needed
875
  if isinstance(file_obj, bytes):
876
  file_obj = io.BytesIO(file_obj)
877
 
878
- # Run all checks
879
  results = checker.run_all_checks(file_obj, doc_type, template_type)
880
-
881
- # Format results using DocumentCheckResultsFormatter
882
- formatter = DocumentCheckResultsFormatter()
883
- formatted_results = formatter.format_results(results, doc_type)
884
-
885
- # Convert the formatted results to HTML
886
- html_content = f"""
887
- <div id="document-checker-results"></div>
888
- <script type="module">
889
- import DocumentCheckerResults from './components/DocumentCheckerResults.jsx';
890
- const results = {json.dumps(formatted_results)};
891
- const root = document.getElementById('document-checker-results');
892
- ReactDOM.render(React.createElement(DocumentCheckerResults, {{ results }}), root);
893
- </script>
894
- """
895
-
896
- return html_content
897
 
898
  except Exception as e:
899
  logging.error(f"Error processing document: {str(e)}")
900
  traceback.print_exc()
901
- error_html = f"""
902
- <div class="error-message" style="color: red; padding: 1rem;">
903
- ❌ Error processing document: {str(e)}
904
- <br><br>
905
- Please ensure the file is a valid .docx document and try again.
906
- </div>
907
- """
908
- return error_html
 
 
 
 
909
 
910
  def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
911
  """Format check results into a Markdown string for Gradio display."""
@@ -988,6 +975,190 @@ def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: s
988
 
989
  return "\n".join(output)
990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  def create_interface():
992
  """Create and configure the Gradio interface."""
993
 
@@ -1006,7 +1177,153 @@ def create_interface():
1006
  ]
1007
 
1008
  template_types = ["Short AC template AC", "Long AC template AC"]
1009
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1010
  with gr.Blocks() as demo:
1011
  gr.Markdown(
1012
  """
@@ -1052,10 +1369,34 @@ def create_interface():
1052
  )
1053
 
1054
  with gr.Column(scale=2):
1055
- results = gr.Component("DocumentCheckerResults",
1056
- value=None,
1057
- render=False
1058
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
 
1060
  # Update template type visibility based on document type
1061
  def update_template_visibility(doc_type):
@@ -1069,7 +1410,7 @@ def create_interface():
1069
 
1070
  # Handle document processing
1071
  submit_btn.click(
1072
- fn=process_document,
1073
  inputs=[file_input, doc_type, template_type],
1074
  outputs=[results]
1075
  )
 
11
  import os
12
  import traceback
13
  from datetime import datetime
14
+ import textwrap
15
+ from colorama import init, Fore, Style
16
 
17
  # Core data structures
18
  @dataclass
 
870
  def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
871
  """Process document and run all checks."""
872
  try:
 
873
  checker = FAADocumentChecker()
874
 
 
875
  if isinstance(file_obj, bytes):
876
  file_obj = io.BytesIO(file_obj)
877
 
 
878
  results = checker.run_all_checks(file_obj, doc_type, template_type)
879
+ return format_markdown_results(results, doc_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
 
881
  except Exception as e:
882
  logging.error(f"Error processing document: {str(e)}")
883
  traceback.print_exc()
884
+ return f"""
885
+ # ❌ Error Processing Document
886
+
887
+ **Error Details:** {str(e)}
888
+
889
+ Please ensure:
890
+ 1. The file is a valid .docx document
891
+ 2. The file is not corrupted or password protected
892
+ 3. The file is properly formatted
893
+
894
+ Try again after checking these issues. If the problem persists, contact support.
895
+ """
896
 
897
  def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
898
  """Format check results into a Markdown string for Gradio display."""
 
975
 
976
  return "\n".join(output)
977
 
978
+ class DocumentCheckResultsFormatter:
979
+ """Formats document check results in a user-friendly way with detailed examples and fixes."""
980
+
981
+ def __init__(self):
982
+ """Initialize the formatter with colorama for cross-platform color support."""
983
+ init() # Initialize colorama
984
+
985
+ # Enhanced issue categories with examples and specific fixes
986
+ self.issue_categories = {
987
+ 'acronym_check': {
988
+ 'title': 'Acronym Definition Issues',
989
+ 'description': 'Acronyms must be defined at their first use in the document.',
990
+ 'solution': 'Define each acronym at its first use, e.g., "Federal Aviation Administration (FAA)"',
991
+ 'example_fix': {
992
+ 'before': 'The FAA published new guidelines.',
993
+ 'after': 'The Federal Aviation Administration (FAA) published new guidelines.'
994
+ }
995
+ },
996
+ 'terminology_check': {
997
+ 'title': 'Incorrect Terminology',
998
+ 'description': 'Non-standard or prohibited terms and phrases detected. Avoid relative position references.',
999
+ 'solution': 'Use explicit references to paragraphs, sections, tables, and figures',
1000
+ 'example_fix': {
1001
+ 'before': 'See above section for details | Refer to below table | shall comply with',
1002
+ 'after': 'See paragraph 3.2 for details | Refer to table 2-1 | must comply with'
1003
+ }
1004
+ },
1005
+ 'section_symbol_usage_check': {
1006
+ 'title': 'Section Symbol (Β§) Format Issues',
1007
+ 'description': 'Incorrect formatting of section symbols in references.',
1008
+ 'solution': 'Format section symbols correctly and never start sentences with them',
1009
+ 'example_fix': {
1010
+ 'before': 'Β§ 25.25 states | 14 CFR Β§ 21.21',
1011
+ 'after': 'Section 25.25 states | 14 CFR 21.21'
1012
+ }
1013
+ },
1014
+ 'caption_check_table': {
1015
+ 'title': 'Table Caption Format Issues',
1016
+ 'description': 'Table captions do not follow the required format.',
1017
+ 'solution': 'Use consistent table numbering format',
1018
+ 'example_fix': {
1019
+ 'before': 'Table 5. | Table A | Tables',
1020
+ 'after': 'Table 5-1. | Table 1-1 | Table 2-1'
1021
+ }
1022
+ },
1023
+ 'caption_check_figure': {
1024
+ 'title': 'Figure Caption Format Issues',
1025
+ 'description': 'Figure captions do not follow the required format.',
1026
+ 'solution': 'Use consistent figure numbering format',
1027
+ 'example_fix': {
1028
+ 'before': 'Figure 5. | Figure A | Figures',
1029
+ 'after': 'Figure 5-1. | Figure 1-1 | Figure 2-1'
1030
+ }
1031
+ },
1032
+ 'document_title_check': {
1033
+ 'title': 'Document Title Format Issues',
1034
+ 'description': 'Document titles are not properly formatted.',
1035
+ 'solution': 'Format titles according to document type requirements',
1036
+ 'example_fix': {
1037
+ 'before': '"AC 20-114" | "Advisory Circular"',
1038
+ 'after': 'AC 20-114 | Advisory Circular'
1039
+ }
1040
+ },
1041
+ 'double_period_check': {
1042
+ 'title': 'Multiple Period Issues',
1043
+ 'description': 'Sentences ending with multiple periods.',
1044
+ 'solution': 'Use single period to end sentences',
1045
+ 'example_fix': {
1046
+ 'before': 'The following ACs are related to the guidance in this document..',
1047
+ 'after': 'The following ACs are related to the guidance in this document.'
1048
+ }
1049
+ },
1050
+ 'spacing_check': {
1051
+ 'title': 'Spacing Issues',
1052
+ 'description': 'Incorrect spacing in text.',
1053
+ 'solution': 'Maintain consistent spacing throughout the document',
1054
+ 'example_fix': {
1055
+ 'before': 'AC25.25 | The following ACs (double spaces)',
1056
+ 'after': 'AC 25.25 | The following ACs (single space)'
1057
+ }
1058
+ },
1059
+ 'date_formats_check': {
1060
+ 'title': 'Date Format Issues',
1061
+ 'description': 'Dates not in the required format.',
1062
+ 'solution': 'Use the format "Month Day, Year"',
1063
+ 'example_fix': {
1064
+ 'before': '01/15/2024 | 2024-01-15',
1065
+ 'after': 'January 15, 2024'
1066
+ }
1067
+ },
1068
+ 'placeholders_check': {
1069
+ 'title': 'Placeholder Content',
1070
+ 'description': 'Placeholder text remains in the document.',
1071
+ 'solution': 'Replace all placeholder content with actual content',
1072
+ 'example_fix': {
1073
+ 'before': 'TBD | To be determined | [Insert text]',
1074
+ 'after': 'Actual content specific to the context'
1075
+ }
1076
+ }
1077
+ }
1078
+
1079
+ def format_results(self, results: Dict[str, Any], doc_type: str) -> str:
1080
+ """Format check results into a Markdown string for display."""
1081
+ output = []
1082
+
1083
+ # Add header
1084
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1085
+ output.extend([
1086
+ "================================================================================",
1087
+ "Document Check Results Summary",
1088
+ "================================================================================\n"
1089
+ ])
1090
+
1091
+ # Count issues
1092
+ total_issues = sum(1 for r in results.values() if not r.success)
1093
+
1094
+ if total_issues == 0:
1095
+ output.append("βœ… All checks passed successfully!\n")
1096
+ return "\n".join(output)
1097
+
1098
+ output.append(f"Found {total_issues} categories of issues that need attention:\n")
1099
+
1100
+ # Process each check result
1101
+ for check_name, result in results.items():
1102
+ if not result.success and check_name in self.issue_categories:
1103
+ category = self.issue_categories[check_name]
1104
+
1105
+ # Add section header
1106
+ output.append(f"β–  {category['title']}")
1107
+ output.append(f" {category['description']}")
1108
+ output.append(f" How to fix: {category['solution']}\n")
1109
+
1110
+ # Add example
1111
+ output.append(" Example Fix:")
1112
+ output.append(f" ❌ Incorrect: {category['example_fix']['before']}")
1113
+ output.append(f" βœ“ Correct : {category['example_fix']['after']}\n")
1114
+
1115
+ # Add actual issues
1116
+ output.append(" Issues found in your document:")
1117
+ issue_count = len(result.issues)
1118
+ shown_issues = result.issues[:3] # Show first 3 issues
1119
+
1120
+ for issue in shown_issues:
1121
+ if isinstance(issue, dict):
1122
+ if 'sentence' in issue:
1123
+ sentence = textwrap.fill(
1124
+ issue['sentence'],
1125
+ width=76,
1126
+ initial_indent=' β€’ ',
1127
+ subsequent_indent=' '
1128
+ )
1129
+ output.append(sentence)
1130
+ elif 'missing_headings' in issue:
1131
+ missing = ', '.join(issue['missing_headings'])
1132
+ output.append(f" β€’ Missing required headings: {missing}")
1133
+ else:
1134
+ issue_str = ', '.join(f"{k}: {v}" for k, v in issue.items())
1135
+ output.append(f" β€’ {issue_str}")
1136
+
1137
+ if issue_count > 3:
1138
+ output.append(f"\n ... and {issue_count - 3} more similar issues.")
1139
+
1140
+ output.append("") # Add spacing between categories
1141
+
1142
+ return "\n".join(output)
1143
+
1144
+ def save_report(self, results: Dict[str, Any], filepath: str, doc_type: str):
1145
+ """Save the formatted results to a file with proper formatting."""
1146
+ try:
1147
+ with open(filepath, 'w', encoding='utf-8') as f:
1148
+ # Create a report without color codes
1149
+ report = self.format_results(results, doc_type)
1150
+
1151
+ # Strip color codes
1152
+ for color in [Fore.CYAN, Fore.GREEN, Fore.YELLOW, Fore.RED, Style.RESET_ALL]:
1153
+ report = report.replace(str(color), '')
1154
+
1155
+ # Convert markdown-style italics to alternative formatting for plain text
1156
+ report = report.replace('*', '_')
1157
+
1158
+ f.write(report)
1159
+ except Exception as e:
1160
+ print(f"Error saving report: {e}")
1161
+
1162
  def create_interface():
1163
  """Create and configure the Gradio interface."""
1164
 
 
1177
  ]
1178
 
1179
  template_types = ["Short AC template AC", "Long AC template AC"]
1180
+
1181
+ def format_results_as_html(text_results):
1182
+ """Convert the text results into styled HTML."""
1183
+ if not text_results:
1184
+ return """
1185
+ <div class="p-4 text-gray-600">
1186
+ Results will appear here after processing...
1187
+ </div>
1188
+ """
1189
+
1190
+ # Split into sections
1191
+ sections = text_results.split('β– ')
1192
+ header = sections[0]
1193
+ issues = sections[1:]
1194
+
1195
+ # Format header
1196
+ header_html = f"""
1197
+ <div class="max-w-4xl mx-auto p-4 bg-white rounded-lg shadow-sm mb-6">
1198
+ <h1 class="text-2xl font-bold text-gray-800 mb-4">Document Check Results Summary</h1>
1199
+ <div class="text-lg {'text-green-600' if 'All checks passed' in header else 'text-amber-600'}">
1200
+ {header.strip()}
1201
+ </div>
1202
+ </div>
1203
+ """
1204
+
1205
+ # Format each issue section
1206
+ issues_html = ""
1207
+ for section in issues:
1208
+ if not section.strip():
1209
+ continue
1210
+
1211
+ lines = section.strip().split('\n')
1212
+ title = lines[0]
1213
+ content = '\n'.join(lines[1:])
1214
+
1215
+ # Split content into description, how to fix, and examples
1216
+ parts = content.split('Example Fix:')
1217
+ description = parts[0]
1218
+ examples = parts[1] if len(parts) > 1 else ""
1219
+
1220
+ issues_html += f"""
1221
+ <div class="bg-white rounded-lg shadow-sm mb-6 overflow-hidden">
1222
+ <div class="bg-gray-50 px-6 py-4 border-b">
1223
+ <h2 class="text-lg font-semibold text-gray-800">{title.strip()}</h2>
1224
+ </div>
1225
+
1226
+ <div class="px-6 py-4">
1227
+ <div class="text-gray-600 mb-4">
1228
+ {description.strip()}
1229
+ </div>
1230
+
1231
+ <div class="bg-green-50 rounded p-4 mb-4">
1232
+ <div class="text-green-800">
1233
+ <span class="font-medium">How to fix:</span>
1234
+ {description.split('How to fix:')[1].strip() if 'How to fix:' in description else ''}
1235
+ </div>
1236
+ </div>
1237
+ """
1238
+
1239
+ if examples:
1240
+ examples_lines = examples.strip().split('\n')
1241
+ issues_html += """
1242
+ <div class="mb-4">
1243
+ <h3 class="font-medium text-gray-800 mb-2">Examples:</h3>
1244
+ <div class="space-y-2 ml-4">
1245
+ """
1246
+
1247
+ for line in examples_lines:
1248
+ line = line.strip()
1249
+ if line.startswith('❌'):
1250
+ issues_html += f"""
1251
+ <div class="text-red-600">
1252
+ <span class="inline-block w-4">❌</span>
1253
+ {line.replace('❌ Incorrect:', '').strip()}
1254
+ </div>
1255
+ """
1256
+ elif line.startswith('βœ“'):
1257
+ issues_html += f"""
1258
+ <div class="text-green-600">
1259
+ <span class="inline-block w-4">βœ“</span>
1260
+ {line.replace('βœ“ Correct :', '').strip()}
1261
+ </div>
1262
+ """
1263
+ elif line.startswith('β€’'):
1264
+ issues_html += f"""
1265
+ <div class="text-gray-600 ml-4">
1266
+ β€’ {line.replace('β€’', '').strip()}
1267
+ </div>
1268
+ """
1269
+ elif 'more similar issues' in line:
1270
+ issues_html += f"""
1271
+ <div class="text-gray-500 italic mt-2">
1272
+ {line.strip()}
1273
+ </div>
1274
+ """
1275
+
1276
+ issues_html += """
1277
+ </div>
1278
+ </div>
1279
+ """
1280
+
1281
+ issues_html += """
1282
+ </div>
1283
+ </div>
1284
+ """
1285
+
1286
+ # Combine all HTML
1287
+ full_html = f"""
1288
+ <div class="mx-auto p-4" style="font-family: system-ui, -apple-system, sans-serif;">
1289
+ <style>
1290
+ .text-2xl {{ font-size: 1.5rem; }}
1291
+ .text-lg {{ font-size: 1.125rem; }}
1292
+ .font-bold {{ font-weight: 700; }}
1293
+ .font-semibold {{ font-weight: 600; }}
1294
+ .font-medium {{ font-weight: 500; }}
1295
+ .text-gray-800 {{ color: #1f2937; }}
1296
+ .text-gray-600 {{ color: #4b5563; }}
1297
+ .text-gray-500 {{ color: #6b7280; }}
1298
+ .text-green-600 {{ color: #059669; }}
1299
+ .text-green-800 {{ color: #065f46; }}
1300
+ .text-red-600 {{ color: #dc2626; }}
1301
+ .text-amber-600 {{ color: #d97706; }}
1302
+ .bg-white {{ background-color: #ffffff; }}
1303
+ .bg-gray-50 {{ background-color: #f9fafb; }}
1304
+ .bg-green-50 {{ background-color: #ecfdf5; }}
1305
+ .rounded-lg {{ border-radius: 0.5rem; }}
1306
+ .shadow-sm {{ box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05); }}
1307
+ .mb-6 {{ margin-bottom: 1.5rem; }}
1308
+ .mb-4 {{ margin-bottom: 1rem; }}
1309
+ .mb-2 {{ margin-bottom: 0.5rem; }}
1310
+ .ml-4 {{ margin-left: 1rem; }}
1311
+ .mt-2 {{ margin-top: 0.5rem; }}
1312
+ .p-4 {{ padding: 1rem; }}
1313
+ .px-6 {{ padding-left: 1.5rem; padding-right: 1.5rem; }}
1314
+ .py-4 {{ padding-top: 1rem; padding-bottom: 1rem; }}
1315
+ .space-y-2 > * + * {{ margin-top: 0.5rem; }}
1316
+ .italic {{ font-style: italic; }}
1317
+ .border-b {{ border-bottom: 1px solid #e5e7eb; }}
1318
+ .overflow-hidden {{ overflow: hidden; }}
1319
+ </style>
1320
+ {header_html}
1321
+ {issues_html}
1322
+ </div>
1323
+ """
1324
+
1325
+ return full_html
1326
+
1327
  with gr.Blocks() as demo:
1328
  gr.Markdown(
1329
  """
 
1369
  )
1370
 
1371
  with gr.Column(scale=2):
1372
+ results = gr.HTML()
1373
+
1374
+ def process_and_format(file_obj, doc_type, template_type):
1375
+ """Process document and format results as HTML."""
1376
+ try:
1377
+ # Get text results from original process_document function
1378
+ checker = FAADocumentChecker()
1379
+ if isinstance(file_obj, bytes):
1380
+ file_obj = io.BytesIO(file_obj)
1381
+ results = checker.run_all_checks(file_obj, doc_type, template_type)
1382
+
1383
+ # Format results using DocumentCheckResultsFormatter
1384
+ formatter = DocumentCheckResultsFormatter()
1385
+ text_results = formatter.format_results(results, doc_type)
1386
+
1387
+ # Convert to HTML
1388
+ return format_results_as_html(text_results)
1389
+
1390
+ except Exception as e:
1391
+ logging.error(f"Error processing document: {str(e)}")
1392
+ traceback.print_exc()
1393
+ return f"""
1394
+ <div style="color: red; padding: 1rem;">
1395
+ ❌ Error processing document: {str(e)}
1396
+ <br><br>
1397
+ Please ensure the file is a valid .docx document and try again.
1398
+ </div>
1399
+ """
1400
 
1401
  # Update template type visibility based on document type
1402
  def update_template_visibility(doc_type):
 
1410
 
1411
  # Handle document processing
1412
  submit_btn.click(
1413
+ fn=process_and_format,
1414
  inputs=[file_input, doc_type, template_type],
1415
  outputs=[results]
1416
  )