Hoctar77 commited on
Commit
dc51583
·
verified ·
1 Parent(s): 15d6396

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +987 -579
app.py CHANGED
@@ -1,609 +1,1018 @@
1
  import gradio as gr
2
  import logging
3
  import re
 
 
 
 
 
4
  from docx import Document
5
  import io
 
6
  import traceback
7
 
8
- def heading_title_check(paragraphs, required_headings):
9
- headings_found = []
10
- required_headings_set = set(required_headings)
11
-
12
- for para in paragraphs:
13
- para_strip = para.strip()
14
- if para_strip in required_headings_set:
15
- headings_found.append(para_strip)
16
-
17
- all_headings_present = set(headings_found) == required_headings_set
18
- return all_headings_present, headings_found
19
-
20
- def acronym_check(paragraphs):
21
- defined_acronyms = set()
22
- undefined_acronyms = set()
23
- acronym_pattern = re.compile(r'(\b[A-Z]{2,}\b)')
24
- defined_pattern = re.compile(r'(\b\w+\b) \((\b[A-Z]{2,}\b)\)')
25
-
26
- for paragraph in paragraphs:
27
- defined_matches = defined_pattern.findall(paragraph)
28
- for full_term, acronym in defined_matches:
29
- defined_acronyms.add(acronym)
30
-
31
- usage_matches = acronym_pattern.findall(paragraph)
32
- for acronym in usage_matches:
33
- if acronym not in defined_acronyms:
34
- undefined_acronyms.add(acronym)
35
-
36
- return len(undefined_acronyms) == 0, undefined_acronyms
37
-
38
- def legal_check(paragraphs):
39
- incorrect_variations = {
40
- r"\bUSC\b": "U.S.C.",
41
- r"\bCFR Part\b": "CFR part",
42
- r"\bC\.F\.R\.\b": "CFR",
43
- r"\bWe\b": "The FAA",
44
- r"\bwe\b": "the FAA",
45
- r"\bcancelled\b": "canceled",
46
- r"\bshall\b": "must or will",
47
- r"\b&\b": "and"
48
- }
49
- incorrect_legal_references = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- for paragraph in paragraphs:
52
- title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
53
- matches = re.finditer(title_14_pattern, paragraph)
54
-
55
- for match in matches:
56
- prefix = match.group('prefix')
57
- current_title = match.group('title')
58
- if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
59
- if current_title != "Title 14":
60
- incorrect_legal_references.append((current_title, "Title 14"))
61
- elif prefix.isspace() and current_title != "title 14":
62
- incorrect_legal_references.append((current_title, "title 14"))
63
 
64
- for incorrect_pattern, correct_term in incorrect_variations.items():
65
- matches = re.finditer(incorrect_pattern, paragraph)
66
- for match in matches:
67
- incorrect_legal_references.append((match.group(), correct_term))
68
-
69
- return len(incorrect_legal_references) == 0, incorrect_legal_references
 
 
 
70
 
71
- def table_caption_check(paragraphs, doc_type):
72
- if doc_type in ["Advisory Circular", "Order"]:
73
- table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
74
- else:
75
- table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
76
-
77
- incorrect_captions = []
 
 
 
 
 
 
 
78
 
79
- for paragraph in paragraphs:
80
- paragraph_strip = paragraph.strip()
81
- if paragraph_strip.lower().startswith("table"):
82
- if not table_caption_pattern.match(paragraph_strip):
83
- incorrect_captions.append(paragraph_strip)
84
 
85
- return len(incorrect_captions) == 0, incorrect_captions
 
 
 
86
 
87
- def figure_caption_check(paragraphs, doc_type):
88
- if doc_type in ["Advisory Circular", "Order"]:
89
- figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
90
- else:
91
- figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
92
-
93
- incorrect_fig_captions = []
94
- for paragraph in paragraphs:
95
- paragraph_strip = paragraph.strip()
96
- if paragraph_strip.lower().startswith("figure"):
97
- if not figure_caption_pattern.match(paragraph_strip):
98
- incorrect_fig_captions.append(paragraph_strip)
99
 
100
- return len(incorrect_fig_captions) == 0, incorrect_fig_captions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- def table_figure_reference_check(paragraphs, doc_type):
103
- incorrect_table_figure_references = []
104
-
105
- if doc_type in ["Advisory Circular", "Order"]:
106
- incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
107
- incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
108
- else:
109
- incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
110
- incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
111
-
112
- for paragraph in paragraphs:
113
- paragraph_strip = paragraph.strip()
114
- starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
115
- if not starts_with_table_or_figure:
116
- incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
117
- if incorrect_tables:
118
- incorrect_table_figure_references.extend(incorrect_tables)
119
- incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
120
- if incorrect_figures:
121
- incorrect_table_figure_references.extend(incorrect_figures)
122
-
123
- return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
124
 
125
- def document_title_check(doc_path, doc_type):
126
- incorrect_titles = []
127
- doc = Document(doc_path)
128
-
129
- # Updated pattern to capture titles correctly
130
- ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
131
-
132
- # Define formatting rules for different document types
133
- formatting_rules = {
134
- "Advisory Circular": {"italics": True, "quotes": False},
135
- "Airworthiness Criteria": {"italics": False, "quotes": True},
136
- "Deviation Memo": {"italics": False, "quotes": True},
137
- "Exemption": {"italics": False, "quotes": True},
138
- "Federal Register Notice": {"italics": False, "quotes": True},
139
- "Handbook/Manual": {"italics": False, "quotes": False},
140
- "Order": {"italics": False, "quotes": True},
141
- "Policy Statement": {"italics": False, "quotes": False},
142
- "Rule": {"italics": False, "quotes": True},
143
- "Special Condition": {"italics": False, "quotes": True},
144
- "Technical Standard Order": {"italics": False, "quotes": True},
145
- "Other": {"italics": False, "quotes": False}
146
- }
147
-
148
- # Get the rules for the current document type
149
- if doc_type not in formatting_rules:
150
- raise ValueError(f"Unsupported document type: {doc_type}")
151
-
152
- required_format = formatting_rules[doc_type]
153
-
154
- for paragraph in doc.paragraphs:
155
- text = paragraph.text
156
- matches = ac_pattern.finditer(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- for match in matches:
159
- full_match = match.group(0)
160
- title_text = match.group(1).strip()
161
-
162
- # Get the position where the title starts
163
- title_start = match.start(1)
164
-
165
- # Check for any type of quotation marks, including smart quotes
166
- title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
167
-
168
- # Check the formatting of the title
169
- title_is_italicized = False
170
- current_pos = 0
171
- for run in paragraph.runs:
172
- run_length = len(run.text)
173
- if current_pos <= title_start < current_pos + run_length:
174
- relative_pos = title_start - current_pos
175
- title_is_italicized = run.italic
176
- break
177
- current_pos += run_length
178
-
179
- # Check if formatting matches the required format
180
- formatting_incorrect = False
181
- issue_message = []
182
-
183
- # Check italics requirement
184
- if required_format["italics"] and not title_is_italicized:
185
- formatting_incorrect = True
186
- issue_message.append("should be italicized")
187
- elif not required_format["italics"] and title_is_italicized:
188
- formatting_incorrect = True
189
- issue_message.append("should not be italicized")
190
-
191
- # Check quotes requirement
192
- if required_format["quotes"] and not title_in_quotes:
193
- formatting_incorrect = True
194
- issue_message.append("should be in quotes")
195
- elif not required_format["quotes"] and title_in_quotes:
196
- formatting_incorrect = True
197
- issue_message.append("should not be in quotes")
198
-
199
- if formatting_incorrect:
200
- incorrect_titles.append({
201
- 'text': full_match,
202
- 'issue': ', '.join(issue_message)
203
- })
204
-
205
- return len(incorrect_titles) == 0, incorrect_titles
206
-
207
- def get_document_checks(doc_type, template_type):
208
- """Return the required headings and other checks based on document type."""
209
- document_checks = {
210
- "Advisory Circular": {
211
- "Short AC template AC": {
212
- "required_headings": [
213
- "PURPOSE.",
214
- "APPLICABILITY.",
215
- "CANCELLATION.",
216
- "RELATED MATERIAL.",
217
- "DEFINITION OF KEY TERMS."
218
- ]
219
- },
220
- "Long AC template AC": {
221
- "required_headings": [
222
- "Purpose.",
223
- "Applicability.",
224
- "Cancellation.",
225
- "Related Material.",
226
- "Definition of Key Terms."
227
- ]
228
- }
229
- },
230
- "Airworthiness Criteria": {
231
- "required_headings": [
232
- "TBD - Need to research"
233
- ]
234
- },
235
- "Deviation Memo": {
236
- "required_headings": [
237
- "TBD - Need to research"
238
- ]
239
- },
240
- "Exemption": {
241
- "required_headings": [
242
- "TBD - Need to research"
243
- ]
244
- },
245
- "Federal Register Notice": {
246
- "required_headings": [
247
- "Purpose of This Notice",
248
- "Audience",
249
- "Where can I Find This Notice"
250
- ]
251
- },
252
- "Handbook/Manual": {
253
- "required_headings": [
254
- "TBD - Need to research"
255
- ]
256
- },
257
- "Order": {
258
- "required_headings": [
259
- "Purpose of This Order.",
260
- "Audience.",
261
- "Where to Find This Order."
262
- ]
263
- },
264
- "Policy Statement": {
265
- "required_headings": [
266
- "SUMMARY",
267
- "CURRENT REGULATORY AND ADVISORY MATERIAL",
268
- "RELEVANT PAST PRACTICE",
269
- "POLICY",
270
- "EFFECT OF POLICY",
271
- "CONCLUSION"
272
- ]
273
- },
274
- "Rule": {
275
- "required_headings": [
276
- "TBD - Need to research"
277
- ]
278
- },
279
- "Special Condition": {
280
- "required_headings": [
281
- "TBD - Need to research"
282
- ]
283
- },
284
- "Technical Standard Order": {
285
- "required_headings": [
286
- "PURPOSE.",
287
- "APPLICABILITY.",
288
- "REQUIREMENTS.",
289
- "MARKING.",
290
- "APPLICATION DATA REQUIREMENTS.",
291
- "MANUFACTURER DATA REQUIREMENTS.",
292
- "FURNISHED DATA REQUIREMENTS.",
293
- "HOW TO GET REFERENCED DOCUMENTS."
294
- ]
295
- },
296
- "Other": {
297
- "required_headings": [
298
- "N/A"
299
- ]
300
  }
301
- }
302
-
303
- # Add debugging logs
304
- logger = logging.getLogger(__name__)
305
- logger.info(f"Requested document type: {doc_type}")
306
- logger.info(f"Requested template type: {template_type}")
307
-
308
- if doc_type == "Advisory Circular":
309
- checks = document_checks.get(doc_type, {}).get(template_type, {})
310
- else:
311
- checks = document_checks.get(doc_type, {})
312
-
313
- logger.info(f"Retrieved checks: {checks}")
314
- return checks
315
-
316
- def double_period_check(paragraphs):
317
- incorrect_sentences = []
318
-
319
- for paragraph in paragraphs:
320
- sentences = re.split(r'(?<=[.!?]) +', paragraph)
321
- for sentence in sentences:
322
- if sentence.endswith('..'):
323
- incorrect_sentences.append(sentence.strip())
324
-
325
- return len(incorrect_sentences) == 0, incorrect_sentences
326
-
327
- def spacing_check(paragraphs):
328
- incorrect_spacing = []
329
- doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
330
- section_symbol_pattern = re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE)
331
- part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
332
- paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
333
- double_space_pattern = re.compile(r'\s{2,}')
334
-
335
- for paragraph in paragraphs:
336
- if doc_type_pattern.search(paragraph) or \
337
- section_symbol_pattern.search(paragraph) or \
338
- part_number_pattern.search(paragraph) or \
339
- paragraph_pattern.search(paragraph) or \
340
- double_space_pattern.search(paragraph):
341
- incorrect_spacing.append(paragraph)
342
-
343
- return len(incorrect_spacing) == 0, incorrect_spacing
344
-
345
- def check_prohibited_phrases(paragraphs):
346
- prohibited_phrases = [
347
- r'\babove\b',
348
- r'\bbelow\b',
349
- r'\bthere is\b',
350
- r'\bthere are\b'
351
- ]
352
- issues = []
353
- for paragraph in paragraphs:
354
- for phrase in prohibited_phrases:
355
- if re.search(phrase, paragraph, re.IGNORECASE):
356
- issues.append((phrase.strip(r'\b'), paragraph.strip()))
357
- return issues
358
-
359
- def check_abbreviation_usage(paragraphs):
360
- """Check for abbreviation consistency after first definition."""
361
- abbreviations = {}
362
- issues = []
363
- for paragraph in paragraphs:
364
- # Find definitions like "Federal Aviation Administration (FAA)"
365
- defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
366
- for full_term, acronym in defined_matches:
367
- if acronym not in abbreviations:
368
- abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
- # Check for full term usage after definition
371
- for acronym, data in abbreviations.items():
372
- full_term = data["full_term"]
373
- if full_term in paragraph:
374
- # Ignore first usage where it's defined
375
- if data["defined"]:
376
- data["defined"] = False # Mark it as now defined
377
- else:
378
- # Only flag subsequent occurrences
379
- issues.append((full_term, acronym, paragraph.strip()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
- return issues
382
-
383
- def check_date_formats(paragraphs):
384
- """Check for inconsistent date formats."""
385
- date_issues = []
386
- correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
387
- date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b') # MM/DD/YYYY
388
- for paragraph in paragraphs:
389
- if date_pattern.search(paragraph):
390
- dates = date_pattern.findall(paragraph)
391
- for date in dates:
392
- if not correct_date_pattern.match(date):
393
- date_issues.append((date, paragraph.strip()))
394
- return date_issues
395
-
396
- def check_placeholders(paragraphs):
397
- """Check for placeholders that should be removed."""
398
- placeholder_phrases = [
399
- r'\bTBD\b',
400
- r'\bTo be determined\b',
401
- r'\bTo be added\b'
402
- ]
403
- issues = []
404
- for paragraph in paragraphs:
405
- for phrase in placeholder_phrases:
406
- if re.search(phrase, paragraph, re.IGNORECASE):
407
- issues.append((phrase.strip(r'\b'), paragraph.strip()))
408
- return issues
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
- def process_document(file_obj, doc_type, template_type):
411
- try:
412
- doc = Document(file_obj)
413
- paragraphs = [para.text for para in doc.paragraphs]
414
- required_headings = get_document_checks(doc_type, template_type).get("required_headings", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
- # Perform each check with `paragraphs` as input
417
- heading_valid, headings_found = heading_title_check(paragraphs, required_headings)
418
- acronyms_valid, undefined_acronyms = acronym_check(paragraphs)
419
- legal_valid, incorrect_legal_references = legal_check(paragraphs)
420
- table_valid, incorrect_captions = table_caption_check(paragraphs, doc_type)
421
- figure_valid, incorrect_fig_captions = figure_caption_check(paragraphs, doc_type)
422
- references_valid, incorrect_table_figure_references = table_figure_reference_check(paragraphs, doc_type)
423
- title_style_valid, incorrect_titles = document_title_check(file_obj, doc_type) if doc_type in ["Advisory Circular", "Order"] else (True, [])
424
- double_period_valid, incorrect_sentences = double_period_check(paragraphs)
425
- spacing_valid, incorrect_spacing = spacing_check(paragraphs)
426
- date_issues = check_date_formats(paragraphs) # Pass paragraphs here
427
- placeholder_issues = check_placeholders(paragraphs) # Pass paragraphs here
428
 
429
- # Format results
430
- results = format_results_for_gradio(
431
- heading_valid=heading_valid, headings_found=headings_found,
432
- acronyms_valid=acronyms_valid, undefined_acronyms=undefined_acronyms,
433
- legal_valid=legal_valid, incorrect_legal_references=incorrect_legal_references,
434
- table_valid=table_valid, incorrect_captions=incorrect_captions,
435
- figure_valid=figure_valid, incorrect_fig_captions=incorrect_fig_captions,
436
- references_valid=references_valid, incorrect_table_figure_references=incorrect_table_figure_references,
437
- title_style_valid=title_style_valid, incorrect_titles=incorrect_titles,
438
- double_period_valid=double_period_valid, incorrect_sentences=incorrect_sentences,
439
- spacing_valid=spacing_valid, incorrect_spacing=incorrect_spacing,
440
- date_issues=date_issues, # Added date_issues
441
- placeholder_issues=placeholder_issues, # Added placeholder_issues
442
- required_headings=required_headings, doc_type=doc_type
443
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  except Exception as e:
446
  print(f"Error in process_document: {str(e)}")
447
  return f"An error occurred while processing the document: {str(e)}"
448
 
449
- def format_results_for_gradio(**kwargs):
450
  """Format the results for display in Gradio."""
451
- results = []
452
- results.append("# Document Check Results\n")
453
-
454
- # Required Headings Check
455
- results.append("## Required Headings Check")
456
- if kwargs['heading_valid']:
457
- results.append("✅ All required headings are present.\n")
458
- else:
459
- missing_headings = set(kwargs['required_headings']) - set(kwargs['headings_found'])
460
- results.append("❌ Missing Required Headings:")
461
- for heading in missing_headings:
462
- results.append(f"- {heading}")
463
- results.append("")
464
-
465
- # Acronym Check
466
- results.append("## Acronym Check")
467
- if kwargs['acronyms_valid']:
468
- results.append("✅ All acronyms are properly defined.\n")
469
- else:
470
- results.append("❌ The following acronyms need to be defined at first use:")
471
- for acronym in kwargs['undefined_acronyms']:
472
- results.append(f"- {acronym}")
473
- results.append("")
474
-
475
- # Legal Check
476
- results.append("## Legal Terminology Check")
477
- if kwargs['legal_valid']:
478
- results.append("✅ All legal references are properly formatted.\n")
479
- else:
480
- results.append("❌ Incorrect Legal Terminology:")
481
- for incorrect_term, correct_term in kwargs['incorrect_legal_references']:
482
- results.append(f"- Use '{correct_term}' instead of '{incorrect_term}'")
483
- results.append("")
484
-
485
- # Table Caption Check
486
- results.append("## Table Caption Check")
487
- if kwargs['table_valid']:
488
- results.append("✅ All table captions are correctly formatted.\n")
489
- else:
490
- results.append("❌ Incorrect Table Captions:")
491
- for caption in kwargs['incorrect_captions']:
492
- results.append(f"- {caption}")
493
- results.append("")
494
-
495
- # Figure Caption Check
496
- results.append("## Figure Caption Check")
497
- if kwargs['figure_valid']:
498
- results.append("✅ All figure captions are correctly formatted.\n")
499
- else:
500
- results.append("❌ Incorrect Figure Captions:")
501
- for caption in kwargs['incorrect_fig_captions']:
502
- results.append(f"- {caption}")
503
- results.append("")
504
-
505
- # Table and Figure References Check
506
- results.append("## Table and Figure References Check")
507
- if kwargs['references_valid']:
508
- results.append("✅ All table and figure references are correctly formatted.\n")
509
- else:
510
- results.append("❌ Incorrect Table/Figure References:")
511
- for ref in kwargs['incorrect_table_figure_references']:
512
- results.append(f"- {ref}")
513
- results.append("")
514
-
515
- # Document Title Style Check
516
- results.append("## Document Title Style Check")
517
- if kwargs['title_style_valid']:
518
- results.append("✅ All document title references are properly styled.\n")
519
- else:
520
- results.append("❌ Incorrect Document Title Styling:")
521
- for title in kwargs['incorrect_titles']:
522
- results.append(f"- {title['text']}")
523
- results.append(f" - Issue: {title['issue']}")
524
-
525
- # Add formatting guidance
526
- formatting_notes = {
527
- "Advisory Circular": "Document titles should be italicized, not in quotation marks.",
528
- "Order": "Document titles should be in quotation marks, not italicized.",
529
- "Federal Register Notice": "Document titles should be in quotation marks, not italicized.",
530
- "Policy Statement": "Document titles should not have any special formatting (no italics, no quotation marks)."
531
- }
532
-
533
- doc_type = kwargs.get('doc_type', 'Unknown')
534
- if doc_type in formatting_notes:
535
- results.append(f"\nNote: {formatting_notes[doc_type]}")
536
- else:
537
- results.append("\nNote: Please verify the correct formatting style for this document type.")
538
- results.append("")
539
-
540
- # Double Period Check
541
- results.append("## Double Period Check")
542
- if kwargs['double_period_valid']:
543
- results.append("✅ No double periods found.\n")
544
- else:
545
- results.append("❌ Sentences found with double periods:")
546
- for sentence in kwargs['incorrect_sentences']:
547
- results.append(f"- {sentence}")
548
- results.append("")
549
-
550
- # Spacing Check
551
- results.append("## Spacing Check")
552
- if kwargs['spacing_valid']:
553
- results.append("✅ All spacing is correct.\n")
554
- else:
555
- results.append("❌ Incorrect spacing found in:")
556
- for spacing in kwargs['incorrect_spacing']:
557
- results.append(f"- {spacing}")
558
- results.append("")
559
-
560
- # Date Format Consistency
561
- results.append("## Date Format Consistency")
562
- if not kwargs['date_issues']:
563
- results.append("✅ All dates are in the correct format.\n")
564
- else:
565
- results.append("❌ Date Format Issues:")
566
- for date, paragraph in kwargs['date_issues']:
567
- results.append(f"- Incorrect date format '{date}' in: {paragraph}")
568
- results.append("")
569
-
570
- # Placeholder Check
571
- results.append("## Placeholder Check")
572
- if not kwargs['placeholder_issues']:
573
- results.append("✅ No future references or placeholders found.\n")
574
- else:
575
- results.append("❌ Placeholders Found:")
576
- for phrase, paragraph in kwargs['placeholder_issues']:
577
- results.append(f"- Placeholder '{phrase}' in: {paragraph}")
578
-
579
- return "\n".join(results)
580
 
581
- def process_file(file_obj, doc_type, template_type):
582
- """Process the uploaded file and return results with error handling."""
583
- if file_obj is None:
584
- return "Please upload a document first."
585
-
586
- try:
587
- # Convert bytes to BytesIO object
588
- doc_bytes = io.BytesIO(file_obj) if isinstance(file_obj, bytes) else io.BytesIO(file_obj.read())
589
-
590
- # Process the document
591
- results = process_document(doc_bytes, doc_type, template_type)
592
- return results
593
-
594
- except Exception as e:
595
- error_message = f"""An error occurred while processing the document:
596
-
597
- Error: {str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
- Please ensure:
600
- 1. The file is a valid Word document (.docx)
601
- 2. The file is not corrupted
602
- 3. The file is not password protected
 
 
 
 
 
 
603
 
604
- Technical details: {str(e)}"""
605
- print(f"Error processing file: {str(e)}")
606
- return error_message
607
 
608
  # Create the Gradio interface
609
  demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
@@ -619,7 +1028,7 @@ with demo:
619
 
620
  document_types = [
621
  "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
622
- "Federal Register Notice", "Handbook/Manual", "Order", "Policy Statement",
623
  "Rule", "Special Condition", "Technical Standard Order", "Other"
624
  ]
625
 
@@ -651,7 +1060,6 @@ with demo:
651
  value="Results will appear here after processing..."
652
  )
653
 
654
- # Update template type visibility based on document type
655
  def update_template_visibility(doc_type):
656
  return gr.update(visible=doc_type == "Advisory Circular")
657
 
@@ -661,12 +1069,12 @@ with demo:
661
  outputs=[template_type]
662
  )
663
 
664
- # Process file when submit button is clicked
665
  submit_btn.click(
666
- fn=process_file,
667
  inputs=[file_input, doc_type, template_type],
668
  outputs=[output]
669
  )
670
 
671
  # Launch the demo
672
- demo.launch()
 
 
1
  import gradio as gr
2
  import logging
3
  import re
4
+ import json
5
+ import time
6
+ from typing import Dict, List, Any, Tuple, Optional
7
+ from dataclasses import dataclass
8
+ from functools import wraps
9
  from docx import Document
10
  import io
11
+ import os
12
  import traceback
13
 
14
+ @dataclass
15
+ class DocumentCheckResult:
16
+ """Structured result for document checks."""
17
+ success: bool
18
+ issues: List[Dict[str, Any]]
19
+ details: Optional[Dict[str, Any]] = None
20
+
21
+ def profile_performance(func):
22
+ """Decorator to profile function performance."""
23
+ @wraps(func)
24
+ def wrapper(*args, **kwargs):
25
+ start_time = time.time()
26
+ result = func(*args, **kwargs)
27
+ end_time = time.time()
28
+ # Get logger from the class instance (first argument)
29
+ logger = args[0].logger if hasattr(args[0], 'logger') else logging.getLogger(__name__)
30
+ logger.info(
31
+ f"Performance: {func.__name__} took {end_time - start_time:.4f} seconds"
32
+ )
33
+ return result
34
+ return wrapper
35
+
36
+ class DocumentCheckerConfig:
37
+ """Configuration management for document checks."""
38
+ def __init__(self, config_path: Optional[str] = None):
39
+ """Initialize configuration with optional config file."""
40
+ self.config = self._load_config(config_path)
41
+ self.logger = self._setup_logger()
42
+
43
+ def _load_config(self, config_path: Optional[str] = None) -> Dict[str, Any]:
44
+ """Load configuration from JSON file or use default settings."""
45
+ default_config = {
46
+ "logging": {
47
+ "level": "INFO",
48
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
49
+ },
50
+ "checks": {
51
+ "acronyms": True,
52
+ "terminology_check": True,
53
+ "headings": True
54
+ },
55
+ "document_types": {
56
+ "Advisory Circular": {
57
+ "required_headings": [
58
+ "Purpose.",
59
+ "Applicability.",
60
+ "Cancellation.",
61
+ "Related Material.",
62
+ "Definition of Key Terms."
63
+ ],
64
+ "skip_title_check": False
65
+ },
66
+ "Federal Register Notice": {
67
+ "required_headings": [
68
+ "Purpose of This Notice",
69
+ "Audience",
70
+ "Where can I Find This Notice"
71
+ ],
72
+ "skip_title_check": False
73
+ },
74
+ "Order": {
75
+ "required_headings": [
76
+ "Purpose of This Order.",
77
+ "Audience.",
78
+ "Where to Find This Order."
79
+ ],
80
+ "skip_title_check": False
81
+ },
82
+ "Policy Statement": {
83
+ "required_headings": [
84
+ "SUMMARY",
85
+ "CURRENT REGULATORY AND ADVISORY MATERIAL",
86
+ "RELEVANT PAST PRACTICE",
87
+ "POLICY",
88
+ "EFFECT OF POLICY",
89
+ "CONCLUSION"
90
+ ],
91
+ "skip_title_check": False
92
+ },
93
+ "Technical Standard Order": {
94
+ "required_headings": [
95
+ "PURPOSE.",
96
+ "APPLICABILITY.",
97
+ "REQUIREMENTS.",
98
+ "MARKING.",
99
+ "APPLICATION DATA REQUIREMENTS.",
100
+ "MANUFACTURER DATA REQUIREMENTS.",
101
+ "FURNISHED DATA REQUIREMENTS.",
102
+ "HOW TO GET REFERENCED DOCUMENTS."
103
+ ],
104
+ "skip_title_check": False
105
+ },
106
+ "Other": {
107
+ "required_headings": [],
108
+ "skip_title_check": True
109
+ }
110
+ }
111
+ }
112
+
113
+ if config_path and os.path.exists(config_path):
114
+ try:
115
+ with open(config_path, 'r') as f:
116
+ user_config = json.load(f)
117
+ self._deep_merge(default_config, user_config)
118
+ except (json.JSONDecodeError, IOError) as e:
119
+ logging.warning(f"Error loading config: {e}. Using default config.")
120
+
121
+ return default_config
122
+
123
+ def _deep_merge(self, base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
124
+ """Recursively merge two dictionaries."""
125
+ for key, value in update.items():
126
+ if isinstance(value, dict) and key in base and isinstance(base[key], dict):
127
+ self._deep_merge(base[key], value)
128
+ else:
129
+ base[key] = value
130
+ return base
131
+
132
+ def _setup_logger(self) -> logging.Logger:
133
+ """Set up and configure logging based on configuration."""
134
+ logger = logging.getLogger(__name__)
135
+ log_level = getattr(logging, self.config['logging']['level'].upper())
136
+ formatter = logging.Formatter(self.config['logging']['format'])
137
+ console_handler = logging.StreamHandler()
138
+ console_handler.setFormatter(formatter)
139
+ console_handler.setLevel(log_level)
140
+ logger.addHandler(console_handler)
141
+ logger.setLevel(log_level)
142
+ return logger
143
+
144
+ class DocumentChecker:
145
+ """Base class for document checking."""
146
+ def __init__(self, config_path: Optional[str] = None):
147
+ self.config_manager = DocumentCheckerConfig(config_path)
148
+ self.logger = self.config_manager.logger
149
+
150
+ @staticmethod
151
+ def validate_input(doc: List[str]) -> bool:
152
+ """Validate input document."""
153
+ return doc is not None and isinstance(doc, list) and len(doc) > 0
154
+
155
+ @classmethod
156
+ def extract_paragraphs(cls, doc_path: str) -> List[str]:
157
+ """Extract plain text paragraphs from a document."""
158
+ try:
159
+ doc = Document(doc_path)
160
+ return [para.text for para in doc.paragraphs if para.text.strip()]
161
+ except Exception as e:
162
+ logging.error(f"Error extracting paragraphs: {e}")
163
+ return []
164
+
165
+ class FAADocumentChecker(DocumentChecker):
166
+ def __init__(self, config_path: Optional[str] = None):
167
+ super().__init__(config_path)
168
+
169
+ @profile_performance # Use the decorator directly
170
+ def heading_title_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
171
+ """Check headings for a specific document type."""
172
+ if not self.validate_input(doc):
173
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
174
+
175
+ # Use configuration for document-specific headings
176
+ checks = self.config_manager.config['document_types'].get(
177
+ doc_type, {}
178
+ )
179
+ required_headings = checks.get('required_headings', [])
180
+
181
+ headings_found = []
182
+
183
+ # Create a set for faster lookup
184
+ required_headings_set = set(required_headings)
185
+
186
+ for para in doc:
187
+ para_strip = para.strip()
188
+ # Check if the paragraph is in the required headings list
189
+ if para_strip in required_headings_set:
190
+ headings_found.append(para_strip)
191
+
192
+ # Check if all required headings are found
193
+ all_headings_present = set(headings_found) == required_headings_set
194
+
195
+ issues = []
196
+ if not all_headings_present:
197
+ missing_headings = required_headings_set - set(headings_found)
198
+ issues.append({'missing_headings': list(missing_headings)})
199
+
200
+ return DocumentCheckResult(
201
+ success=all_headings_present,
202
+ issues=issues,
203
+ details={
204
+ 'found_headings': headings_found,
205
+ 'required_headings': required_headings
206
+ }
207
+ )
208
 
209
+ @profile_performance
210
+ def heading_title_period_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
211
+ """
212
+ Check if headings end with periods according to document type requirements.
 
 
 
 
 
 
 
 
213
 
214
+ Args:
215
+ doc (List[str]): List of document paragraphs
216
+ doc_type (str): Type of document being checked
217
+
218
+ Returns:
219
+ DocumentCheckResult: Result of the heading period check
220
+ """
221
+ if not self.validate_input(doc):
222
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
223
 
224
+ # Define document types requiring periods in headings
225
+ period_required = {
226
+ "Advisory Circular": True,
227
+ "Airworthiness Criteria": False,
228
+ "Deviation Memo": False,
229
+ "Exemption": False,
230
+ "Federal Register Notice": False,
231
+ "Order": True,
232
+ "Policy Statement": False,
233
+ "Rule": False,
234
+ "Special Condition": False,
235
+ "Technical Standard Order": True,
236
+ "Other": False
237
+ }
238
 
239
+ # Get whether periods are required for this document type
240
+ should_have_period = period_required.get(doc_type, False)
 
 
 
241
 
242
+ # Get the headings configuration for this document type
243
+ checks = self.config_manager.config['document_types'].get(doc_type, {})
244
+ required_headings = checks.get('required_headings', [])
245
+ required_headings_set = set(required_headings)
246
 
247
+ issues = []
248
+ checked_headings = []
249
+
250
+ for para in doc:
251
+ para_strip = para.strip()
252
+ # Check only if paragraph is a heading
253
+ if para_strip in required_headings_set:
254
+ ends_with_period = para_strip.endswith('.')
 
 
 
 
255
 
256
+ if should_have_period and not ends_with_period:
257
+ issues.append({
258
+ 'heading': para_strip,
259
+ 'issue': 'missing_period',
260
+ 'message': f"Heading should end with a period: '{para_strip}'"
261
+ })
262
+ checked_headings.append({
263
+ 'heading': para_strip,
264
+ 'has_period': False,
265
+ 'needs_period': True
266
+ })
267
+ elif not should_have_period and ends_with_period:
268
+ issues.append({
269
+ 'heading': para_strip,
270
+ 'issue': 'unexpected_period',
271
+ 'message': f"Heading should not end with a period: '{para_strip}'"
272
+ })
273
+ checked_headings.append({
274
+ 'heading': para_strip,
275
+ 'has_period': True,
276
+ 'needs_period': False
277
+ })
278
+ else:
279
+ checked_headings.append({
280
+ 'heading': para_strip,
281
+ 'has_period': ends_with_period,
282
+ 'needs_period': should_have_period
283
+ })
284
 
285
+ success = len(issues) == 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ return DocumentCheckResult(
288
+ success=success,
289
+ issues=issues,
290
+ details={
291
+ 'document_type': doc_type,
292
+ 'periods_required': should_have_period,
293
+ 'checked_headings': checked_headings
294
+ }
295
+ )
296
+
297
+ @profile_performance
298
+ def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
299
+ """Check if acronyms are defined at their first use, only flagging the first instance of undefined acronyms."""
300
+ if not self.validate_input(doc):
301
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
302
+
303
+ defined_acronyms = set()
304
+ first_occurrences = {} # Track first occurrence of each acronym
305
+ undefined_acronyms = []
306
+
307
+ acronym_pattern = re.compile(r'\b[A-Z]{2,}\b')
308
+ defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
309
+
310
+ # Predefined acronyms
311
+ defined_acronyms.add("14 CFR")
312
+
313
+ for paragraph in doc:
314
+ # Check for definitions first
315
+ defined_matches = defined_pattern.findall(paragraph)
316
+ for full_term, acronym in defined_matches:
317
+ defined_acronyms.add(acronym)
318
+ # If this was previously marked as undefined, remove it since we found its definition
319
+ if acronym in first_occurrences:
320
+ del first_occurrences[acronym]
321
+
322
+ # Check for acronyms in the paragraph
323
+ usage_matches = acronym_pattern.findall(paragraph)
324
+ for acronym in usage_matches:
325
+ if acronym not in defined_acronyms:
326
+ # Only process if we haven't seen this acronym before
327
+ if acronym not in first_occurrences:
328
+ # Find the sentence containing the first undefined acronym
329
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
330
+ for sentence in sentences:
331
+ if acronym in sentence:
332
+ first_occurrences[acronym] = {
333
+ 'acronym': acronym,
334
+ 'sentence': sentence.strip()
335
+ }
336
+ break
337
+
338
+ # Convert first occurrences to list of issues
339
+ undefined_acronyms = list(first_occurrences.values())
340
 
341
+ success = len(undefined_acronyms) == 0
342
+ issues = undefined_acronyms if not success else []
343
+
344
+ return DocumentCheckResult(success=success, issues=issues)
345
+
346
+ @profile_performance
347
+ def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
348
+ """
349
+ Check document terminology for:
350
+ 1. Legal reference formatting and preferred terms
351
+ 2. Prohibited phrases and constructions
352
+ """
353
+ if not self.validate_input(doc):
354
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
355
+
356
+ # Dictionary of terms that should be replaced with preferred alternatives
357
+ term_replacements = {
358
+ r'\bUSC\b': 'U.S.C.',
359
+ r'\bCFR Part\b': 'CFR part',
360
+ r'\bC\.F\.R\.\b': 'CFR',
361
+ r'\b14 CFR\s*§': '14 CFR',
362
+ r'\bWe\b': 'The FAA',
363
+ r'\bwe\b': 'the FAA',
364
+ r'\bcancelled\b': 'canceled',
365
+ r'\bshall\b': 'must',
366
+ r'\b\&\b': 'and',
367
+ r'\bflight crew\b': 'flightcrew'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  }
369
+
370
+ # Prohibited phrases that should be flagged
371
+ prohibited_phrases = [
372
+ r'\babove\b',
373
+ r'\bbelow\b',
374
+ r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b' # Matches 'There is/are' at start of sentences
375
+ ]
376
+
377
+ issues = []
378
+
379
+ for paragraph in doc:
380
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
381
+ for sentence in sentences:
382
+ # Check for incorrect terms that need replacement
383
+ for incorrect_pattern, correct_term in term_replacements.items():
384
+ matches = re.finditer(incorrect_pattern, sentence)
385
+ for match in matches:
386
+ incorrect_term = match.group()
387
+ issues.append({
388
+ 'type': 'incorrect_term',
389
+ 'incorrect_term': incorrect_term,
390
+ 'correct_term': correct_term,
391
+ 'sentence': sentence.strip()
392
+ })
393
+
394
+ # Check for prohibited phrases
395
+ for phrase_pattern in prohibited_phrases:
396
+ match = re.search(phrase_pattern, sentence, re.IGNORECASE)
397
+ if match:
398
+ issues.append({
399
+ 'type': 'prohibited_phrase',
400
+ 'phrase': match.group().strip(),
401
+ 'sentence': sentence.strip()
402
+ })
403
+
404
+ success = len(issues) == 0
405
+ return DocumentCheckResult(success=success, issues=issues)
406
+
407
+ @profile_performance
408
+ def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
409
+ """Check for various section symbol (§) usage issues."""
410
+ if not self.validate_input(doc):
411
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
412
+
413
+ issues = []
414
+
415
+ # Patterns to identify issues
416
+ sentences_starting_with_section_symbol = []
417
+ incorrect_14_CFR_section_symbol_usage = []
418
+ single_section_symbol_multiple_sections = []
419
+ missing_section_symbol_in_multiple_sections = []
420
+
421
+ # Pattern to find '14 CFR §25.25'
422
+ pattern_14_CFR_section = re.compile(r'\b14 CFR §\s*\d+\.\d+\b')
423
+
424
+ # Patterns for multiple sections with single '§'
425
+ pattern_single_section_symbol_and = re.compile(r'§\s*\d+\.\d+\s+and\s+\d+\.\d+')
426
+ pattern_single_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+\d+\.\d+')
427
+ pattern_single_section_symbol_through = re.compile(r'§\s*\d+\.\d+\s+through\s+\d+\.\d+')
428
+
429
+ # Pattern for missing '§' before subsequent sections with 'or'
430
+ pattern_missing_section_symbol_or = re.compile(r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+')
431
+
432
+ for paragraph in doc:
433
+ # Check for sentences starting with '§'
434
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
435
+ for sentence in sentences:
436
+ if sentence.strip().startswith('§'):
437
+ sentences_starting_with_section_symbol.append(sentence.strip())
438
+
439
+ # Check for '14 CFR §25.25' usage
440
+ matches_14_CFR = pattern_14_CFR_section.findall(paragraph)
441
+ for match in matches_14_CFR:
442
+ incorrect_14_CFR_section_symbol_usage.append(match)
443
+
444
+ # Check for single '§' with multiple sections using 'and'
445
+ matches_and = pattern_single_section_symbol_and.findall(paragraph)
446
+ for match in matches_and:
447
+ single_section_symbol_multiple_sections.append(match)
448
+
449
+ # Check for single '§' with multiple sections using 'or'
450
+ matches_or = pattern_single_section_symbol_or.findall(paragraph)
451
+ for match in matches_or:
452
+ single_section_symbol_multiple_sections.append(match)
453
+
454
+ # Check for single '§' with multiple sections using 'through'
455
+ matches_through = pattern_single_section_symbol_through.findall(paragraph)
456
+ for match in matches_through:
457
+ single_section_symbol_multiple_sections.append(match)
458
+
459
+ # Check for missing '§' before subsequent sections with 'or'
460
+ matches_missing_or = pattern_missing_section_symbol_or.findall(paragraph)
461
+ for match in matches_missing_or:
462
+ missing_section_symbol_in_multiple_sections.append(match)
463
+
464
+ if sentences_starting_with_section_symbol:
465
+ issues.append({
466
+ 'issue': 'sentences_starting_with_section_symbol',
467
+ 'sentences': sentences_starting_with_section_symbol
468
+ })
469
+ if incorrect_14_CFR_section_symbol_usage:
470
+ issues.append({
471
+ 'issue': 'incorrect_14_CFR_section_symbol_usage',
472
+ 'matches': incorrect_14_CFR_section_symbol_usage
473
+ })
474
+ if single_section_symbol_multiple_sections:
475
+ issues.append({
476
+ 'issue': 'single_section_symbol_multiple_sections',
477
+ 'matches': single_section_symbol_multiple_sections
478
+ })
479
+ if missing_section_symbol_in_multiple_sections:
480
+ issues.append({
481
+ 'issue': 'missing_section_symbol_in_multiple_sections',
482
+ 'matches': missing_section_symbol_in_multiple_sections
483
+ })
484
+
485
+ success = len(issues) == 0
486
+
487
+ return DocumentCheckResult(success=success, issues=issues)
488
+
489
+ @profile_performance
490
+ def caption_check(self, doc: List[str], doc_type: str, caption_type: str) -> DocumentCheckResult:
491
+ """Check for correctly formatted captions (Table or Figure)."""
492
+ if not self.validate_input(doc):
493
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
494
+
495
+ # Determine the caption pattern based on document type
496
+ if doc_type in ["Advisory Circular", "Order"]:
497
+ caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
498
+ correct_format = f"{caption_type} X-Y"
499
+ else:
500
+ caption_pattern = re.compile(rf'^{caption_type}\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
501
+ correct_format = f"{caption_type} X"
502
+
503
+ incorrect_captions = []
504
+ in_toc = False
505
+
506
+ for paragraph in doc:
507
+ # Check for start or end of Table of Contents (TOC)
508
+ if "Table of Contents" in paragraph or "Contents" in paragraph:
509
+ in_toc = True
510
+ continue
511
+ elif in_toc and paragraph.strip() == "":
512
+ in_toc = False # Assume blank line marks the end of TOC
513
+
514
+ # If within TOC, skip this paragraph
515
+ if in_toc:
516
+ continue
517
+
518
+ # Only check paragraphs that start with "Table" or "Figure" for proper caption format
519
+ paragraph_strip = paragraph.strip()
520
+ if paragraph_strip.lower().startswith(caption_type.lower()):
521
+ if not caption_pattern.match(paragraph_strip):
522
+ incorrect_captions.append({
523
+ 'incorrect_caption': paragraph_strip,
524
+ 'correct_format': correct_format
525
+ })
526
+
527
+ success = len(incorrect_captions) == 0
528
+
529
+ return DocumentCheckResult(success=success, issues=incorrect_captions)
530
+
531
+ @profile_performance
532
+ def table_figure_reference_check(self, doc: List[str], doc_type: str) -> DocumentCheckResult:
533
+ """
534
+ Check for incorrect references to tables and figures in the document.
535
+ References should be lowercase within sentences and capitalized at sentence start.
536
+ """
537
+ if not self.validate_input(doc):
538
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
539
 
540
+ incorrect_references = []
541
+
542
+ # Define patterns based on document type
543
+ if doc_type in ["Advisory Circular", "Order"]:
544
+ # Matches both capitalized and lowercase variations
545
+ table_pattern = r'\b[Tt]able\s+\d+-\d+\b'
546
+ figure_pattern = r'\b[Ff]igure\s+\d+-\d+\b'
547
+ correct_mid_table_format = "table X-Y"
548
+ correct_start_table_format = "Table X-Y"
549
+ correct_mid_figure_format = "figure X-Y"
550
+ correct_start_figure_format = "Figure X-Y"
551
+ else:
552
+ table_pattern = r'\b[Tt]able\s+\d+\b'
553
+ figure_pattern = r'\b[Ff]igure\s+\d+\b'
554
+ correct_mid_table_format = "table X"
555
+ correct_start_table_format = "Table X"
556
+ correct_mid_figure_format = "figure X"
557
+ correct_start_figure_format = "Figure X"
558
+
559
+ table_ref_pattern = re.compile(table_pattern)
560
+ figure_ref_pattern = re.compile(figure_pattern)
561
+
562
+ for paragraph in doc:
563
+ paragraph_strip = paragraph.strip()
564
+ # Exclude captions
565
+ starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
566
+
567
+ if not starts_with_table_or_figure:
568
+ # Split into sentences while preserving the original text
569
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
570
+
571
+ for sentence in sentences:
572
+ sentence = sentence.strip()
573
 
574
+ # Check table references
575
+ matches = table_ref_pattern.finditer(sentence)
576
+ for match in matches:
577
+ ref = match.group()
578
+ # Get the text before the reference
579
+ text_before = sentence[:match.start()].strip()
580
+
581
+ # Determine if reference is at start of sentence
582
+ is_sentence_start = text_before == ""
583
+
584
+ # Check if capitalization is correct
585
+ if is_sentence_start and not ref.startswith('Table'):
586
+ incorrect_references.append({
587
+ 'incorrect_ref': ref,
588
+ 'correct_format': correct_start_table_format,
589
+ 'sentence': sentence,
590
+ 'issue': "Table reference at sentence start should be capitalized"
591
+ })
592
+ elif not is_sentence_start and not ref.startswith('table'):
593
+ incorrect_references.append({
594
+ 'incorrect_ref': ref,
595
+ 'correct_format': correct_mid_table_format,
596
+ 'sentence': sentence,
597
+ 'issue': "Table reference within sentence should be lowercase"
598
+ })
599
+
600
+ # Check figure references
601
+ matches = figure_ref_pattern.finditer(sentence)
602
+ for match in matches:
603
+ ref = match.group()
604
+ # Get the text before the reference
605
+ text_before = sentence[:match.start()].strip()
606
+
607
+ # Determine if reference is at start of sentence
608
+ is_sentence_start = text_before == ""
609
+
610
+ # Check if capitalization is correct
611
+ if is_sentence_start and not ref.startswith('Figure'):
612
+ incorrect_references.append({
613
+ 'incorrect_ref': ref,
614
+ 'correct_format': correct_start_figure_format,
615
+ 'sentence': sentence,
616
+ 'issue': "Figure reference at sentence start should be capitalized"
617
+ })
618
+ elif not is_sentence_start and not ref.startswith('figure'):
619
+ incorrect_references.append({
620
+ 'incorrect_ref': ref,
621
+ 'correct_format': correct_mid_figure_format,
622
+ 'sentence': sentence,
623
+ 'issue': "Figure reference within sentence should be lowercase"
624
+ })
625
+
626
+ success = len(incorrect_references) == 0
627
+ return DocumentCheckResult(success=success, issues=incorrect_references)
628
 
629
+ @profile_performance
630
+ def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
631
+ """Check for correct formatting of document titles."""
632
+ try:
633
+ doc = Document(doc_path)
634
+ except Exception as e:
635
+ self.logger.error(f"Error reading the document in title check: {e}")
636
+ return DocumentCheckResult(success=False, issues=[{'error': str(e)}])
637
+
638
+ incorrect_titles = []
639
+
640
+ # Define formatting rules for different document types
641
+ formatting_rules = {
642
+ "Advisory Circular": {"italics": True, "quotes": False},
643
+ "Airworthiness Criteria": {"italics": False, "quotes": True},
644
+ "Deviation Memo": {"italics": False, "quotes": True},
645
+ "Exemption": {"italics": False, "quotes": True},
646
+ "Federal Register Notice": {"italics": False, "quotes": True},
647
+ "Order": {"italics": False, "quotes": True},
648
+ "Policy Statement": {"italics": False, "quotes": False},
649
+ "Rule": {"italics": False, "quotes": True},
650
+ "Special Condition": {"italics": False, "quotes": True},
651
+ "Technical Standard Order": {"italics": False, "quotes": True},
652
+ "Other": {"italics": False, "quotes": False}
653
+ }
654
+
655
+ if doc_type not in formatting_rules:
656
+ self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
657
+ return DocumentCheckResult(success=True, issues=[])
658
+
659
+ required_format = formatting_rules[doc_type]
660
+
661
+ ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
662
+
663
+ for paragraph in doc.paragraphs:
664
+ text = paragraph.text
665
+ matches = ac_pattern.finditer(text)
666
+
667
+ for match in matches:
668
+ full_match = match.group(0)
669
+ title_text = match.group(2).strip()
670
+
671
+ # Get the position where the title starts
672
+ title_start = match.start(2)
673
+ title_end = match.end(2)
674
+
675
+ # Check for any type of quotation marks, including smart quotes
676
+ title_in_quotes = any(q in title_text for q in ['"', "'", '“', '”', '‘', '’'])
677
+
678
+ # Check the formatting of the title
679
+ title_is_italicized = False
680
+ current_pos = 0
681
+ for run in paragraph.runs:
682
+ run_length = len(run.text)
683
+ run_start = current_pos
684
+ run_end = current_pos + run_length
685
+ if run_start <= title_start < run_end:
686
+ title_is_italicized = run.italic
687
+ break
688
+ current_pos += run_length
689
+
690
+ # Check if formatting matches the required format
691
+ formatting_incorrect = False
692
+ issue_message = []
693
+
694
+ # Check italics requirement
695
+ if required_format["italics"] and not title_is_italicized:
696
+ formatting_incorrect = True
697
+ issue_message.append("should be italicized")
698
+ elif not required_format["italics"] and title_is_italicized:
699
+ formatting_incorrect = True
700
+ issue_message.append("should not be italicized")
701
+
702
+ # Check quotes requirement
703
+ if required_format["quotes"] and not title_in_quotes:
704
+ formatting_incorrect = True
705
+ issue_message.append("should be in quotes")
706
+ elif not required_format["quotes"] and title_in_quotes:
707
+ formatting_incorrect = True
708
+ issue_message.append("should not be in quotes")
709
+
710
+ if formatting_incorrect:
711
+ incorrect_titles.append({
712
+ 'text': title_text,
713
+ 'issue': ', '.join(issue_message),
714
+ 'sentence': text.strip()
715
+ })
716
+
717
+ success = len(incorrect_titles) == 0
718
+
719
+ return DocumentCheckResult(success=success, issues=incorrect_titles)
720
+
721
+ @profile_performance
722
+ def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
723
+ """Check for sentences that end with two periods."""
724
+ if not self.validate_input(doc):
725
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
726
+
727
+ incorrect_sentences = []
728
+
729
+ for paragraph in doc:
730
+ # Split the paragraph into sentences based on common sentence-ending punctuation
731
+ sentences = re.split(r'(?<=[.!?]) +', paragraph)
732
+ for sentence in sentences:
733
+ if sentence.endswith('..'):
734
+ incorrect_sentences.append({'sentence': sentence.strip()})
735
+
736
+ success = len(incorrect_sentences) == 0
737
+
738
+ return DocumentCheckResult(success=success, issues=incorrect_sentences)
739
+
740
+ @profile_performance
741
+ def spacing_check(self, doc: List[str]) -> DocumentCheckResult:
742
+ """Check for correct spacing in the document."""
743
+ if not self.validate_input(doc):
744
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
745
+
746
+ incorrect_spacing = []
747
+
748
+ # Regex patterns to find incorrect spacing
749
+ patterns = [
750
+ (re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE), "Missing space between document type and number"),
751
+ (re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE), "Missing space after section symbol (§)"),
752
+ (re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE), "Missing space between 'Part' and number"),
753
+ (re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE), "Missing space before paragraph indication"),
754
+ (re.compile(r'\s{2,}'), "Double spaces between words")
755
+ ]
756
+
757
+ for paragraph in doc:
758
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
759
+ for sentence in sentences:
760
+ for pattern, issue in patterns:
761
+ if pattern.search(sentence):
762
+ incorrect_spacing.append({
763
+ 'issue_description': issue,
764
+ 'sentence': sentence.strip()
765
+ })
766
+
767
+ success = len(incorrect_spacing) == 0
768
+
769
+ return DocumentCheckResult(success=success, issues=incorrect_spacing)
770
+
771
+ @profile_performance
772
+ def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
773
+ """Check for abbreviation consistency after first definition."""
774
+ if not self.validate_input(doc):
775
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
776
+
777
+ abbreviations = {}
778
+ issues = []
779
+ for paragraph in doc:
780
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
781
+ for sentence in sentences:
782
+ # Find definitions like "Federal Aviation Administration (FAA)"
783
+ defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', sentence)
784
+ for full_term, acronym in defined_matches:
785
+ if acronym not in abbreviations:
786
+ abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
787
+
788
+ # Check for full term usage after definition
789
+ for acronym, data in abbreviations.items():
790
+ full_term = data["full_term"]
791
+ if full_term in sentence:
792
+ # Ignore first usage where it's defined
793
+ if data["defined"]:
794
+ data["defined"] = False # Mark it as now defined
795
+ else:
796
+ # Only flag subsequent occurrences
797
+ issues.append({
798
+ 'full_term': full_term,
799
+ 'acronym': acronym,
800
+ 'sentence': sentence.strip()
801
+ })
802
+
803
+ success = len(issues) == 0
804
+
805
+ return DocumentCheckResult(success=success, issues=issues)
806
+
807
+ @profile_performance
808
+ def check_date_formats(self, doc: List[str]) -> DocumentCheckResult:
809
+ """Check for inconsistent date formats while ignoring aviation reference numbers."""
810
+ if not self.validate_input(doc):
811
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
812
 
813
+ date_issues = []
 
 
 
 
 
 
 
 
 
 
 
814
 
815
+ # Patterns to ignore (aviation references)
816
+ ignore_patterns = [
817
+ r'\bAD \d{4}-\d{2}-\d{2}\b', # Airworthiness Directive references
818
+ r'\bSWPM \d{2}-\d{2}-\d{2}\b', # Standard Wiring Practices Manual references
819
+ r'\bAMM \d{2}-\d{2}-\d{2}\b', # Aircraft Maintenance Manual references
820
+ r'\bSOPM \d{2}-\d{2}-\d{2}\b', # Standard Operating Procedure references
821
+ r'\b[A-Z]{2,4} \d{2}-\d{2}-\d{2}\b' # Generic manual reference pattern
822
+ ]
823
+
824
+ # Combine ignore patterns into one
825
+ ignore_regex = '|'.join(ignore_patterns)
826
+ ignore_pattern = re.compile(ignore_regex)
827
+
828
+ # Correct date pattern: 'Month Day, Year' e.g., 'January 1, 2020'
829
+ correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
830
+
831
+ # Incorrect date patterns
832
+ date_patterns = [
833
+ (re.compile(r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM/DD/YYYY'"),
834
+ (re.compile(r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'MM-DD-YYYY'"),
835
+ (re.compile(r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])'), "Use 'Month Day, Year' format instead of 'YYYY-MM-DD'")
836
+ ]
837
+
838
+ for paragraph in doc:
839
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
840
+ for sentence in sentences:
841
+ # First, identify and temporarily remove text that should be ignored
842
+ ignored_matches = list(ignore_pattern.finditer(sentence))
843
+ working_sentence = sentence
844
+
845
+ # Replace ignored patterns with placeholders
846
+ for match in reversed(ignored_matches):
847
+ start, end = match.span()
848
+ working_sentence = working_sentence[:start] + 'X' * (end - start) + working_sentence[end:]
849
+
850
+ # Now check for date patterns in the modified sentence
851
+ for pattern, issue in date_patterns:
852
+ matches = pattern.finditer(working_sentence)
853
+ for match in matches:
854
+ # Get the original text from the match position
855
+ original_date = sentence[match.start():match.end()]
856
+ date_issues.append({
857
+ 'date': original_date,
858
+ 'issue': issue,
859
+ 'sentence': sentence.strip()
860
+ })
861
+
862
+ success = len(date_issues) == 0
863
+ return DocumentCheckResult(success=success, issues=date_issues)
864
+
865
+ @profile_performance
866
+ def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
867
+ """Check for placeholders that should be removed."""
868
+ if not self.validate_input(doc):
869
+ return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
870
+
871
+ placeholder_phrases = [
872
+ r'\bTBD\b',
873
+ r'\bTo be determined\b',
874
+ r'\bTo be added\b'
875
+ ]
876
+ issues = []
877
+ for paragraph in doc:
878
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
879
+ for sentence in sentences:
880
+ for phrase in placeholder_phrases:
881
+ match = re.search(phrase, sentence, re.IGNORECASE)
882
+ if match:
883
+ issues.append({
884
+ 'placeholder': match.group().strip(),
885
+ 'sentence': sentence.strip()
886
+ })
887
+
888
+ success = len(issues) == 0
889
+
890
+ return DocumentCheckResult(success=success, issues=issues)
891
+
892
+ def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
893
+ """
894
+ Run all checks on the document.
895
+
896
+ Args:
897
+ doc_path (str): Path to the document.
898
+ doc_type (str): Type of the document.
899
+ template_type (str, optional): Template type, if applicable.
900
+
901
+ Returns:
902
+ Dict[str, DocumentCheckResult]: Dictionary of check names to results.
903
+ """
904
+ # Read the document
905
+ doc = self.extract_paragraphs(doc_path)
906
+
907
+ # Retrieve any specific flags
908
+ checks_config = self.config_manager.config['document_types'].get(doc_type, {})
909
+ skip_title_check = checks_config.get('skip_title_check', False)
910
+
911
+ # Run checks
912
+ results = {}
913
+ results['heading_title_check'] = self.heading_title_check(doc, doc_type)
914
+ results['heading_title_period_check'] = self.heading_title_period_check(doc, doc_type)
915
+ results['acronym_check'] = self.acronym_check(doc)
916
+ results['terminology_check'] = self.check_terminology(doc)
917
+ results['section_symbol_usage_check'] = self.check_section_symbol_usage(doc)
918
+ results['caption_check_table'] = self.caption_check(doc, doc_type, 'Table')
919
+ results['caption_check_figure'] = self.caption_check(doc, doc_type, 'Figure')
920
+ results['table_figure_reference_check'] = self.table_figure_reference_check(doc, doc_type)
921
+ if not skip_title_check:
922
+ results['document_title_check'] = self.document_title_check(doc_path, doc_type)
923
+ else:
924
+ results['document_title_check'] = DocumentCheckResult(success=True, issues=[])
925
+ results['double_period_check'] = self.double_period_check(doc)
926
+ results['spacing_check'] = self.spacing_check(doc)
927
+ results['abbreviation_usage_check'] = self.check_abbreviation_usage(doc)
928
+ results['date_formats_check'] = self.check_date_formats(doc)
929
+ results['placeholders_check'] = self.check_placeholders(doc)
930
+
931
  return results
932
+
933
+ def process_document(file_obj, doc_type, template_type):
934
+ """Process the document and run all checks."""
935
+ try:
936
+ checker = FAADocumentChecker()
937
+ doc = Document(file_obj)
938
+ paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
939
+
940
+ # Run all checks
941
+ results = {}
942
+ results['heading_check'] = checker.heading_title_check(paragraphs, doc_type)
943
+ results['heading_period_check'] = checker.heading_title_period_check(paragraphs, doc_type)
944
+ results['acronym_check'] = checker.acronym_check(paragraphs)
945
+ results['terminology_check'] = checker.check_terminology(paragraphs)
946
+ results['section_symbol_check'] = checker.check_section_symbol_usage(paragraphs)
947
+ results['table_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Table')
948
+ results['figure_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Figure')
949
+ results['references_check'] = checker.table_figure_reference_check(paragraphs, doc_type)
950
+ results['title_check'] = checker.document_title_check(file_obj, doc_type)
951
+ results['double_period_check'] = checker.double_period_check(paragraphs)
952
+ results['spacing_check'] = checker.spacing_check(paragraphs)
953
+ results['abbreviation_check'] = checker.check_abbreviation_usage(paragraphs)
954
+ results['date_check'] = checker.check_date_formats(paragraphs)
955
+ results['placeholder_check'] = checker.check_placeholders(paragraphs)
956
+
957
+ return format_results_for_gradio(results, doc_type)
958
  except Exception as e:
959
  print(f"Error in process_document: {str(e)}")
960
  return f"An error occurred while processing the document: {str(e)}"
961
 
962
+ def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
963
  """Format the results for display in Gradio."""
964
+ output = ["# Document Check Results\n"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965
 
966
+ # Map check names to display titles
967
+ check_titles = {
968
+ 'heading_check': "Required Headings Check",
969
+ 'heading_period_check': "Heading Period Check",
970
+ 'acronym_check': "Acronym Check",
971
+ 'terminology_check': "Terminology Check",
972
+ 'section_symbol_check': "Section Symbol Usage",
973
+ 'table_caption_check': "Table Caption Format",
974
+ 'figure_caption_check': "Figure Caption Format",
975
+ 'references_check': "Table and Figure References",
976
+ 'title_check': "Document Title Style",
977
+ 'double_period_check': "Double Period Check",
978
+ 'spacing_check': "Spacing Check",
979
+ 'abbreviation_check': "Abbreviation Usage",
980
+ 'date_check': "Date Format Check",
981
+ 'placeholder_check': "Placeholder Check"
982
+ }
983
+
984
+ for check_name, result in results.items():
985
+ title = check_titles.get(check_name, check_name.replace('_', ' ').title())
986
+ output.append(f"## {title}")
987
+
988
+ if result.success:
989
+ output.append("✅ All checks passed.\n")
990
+ else:
991
+ output.append("❌ Issues found:")
992
+ for issue in result.issues:
993
+ if isinstance(issue, dict):
994
+ for key, value in issue.items():
995
+ if isinstance(value, list):
996
+ for item in value:
997
+ output.append(f"- {item}")
998
+ else:
999
+ output.append(f"- {key}: {value}")
1000
+ else:
1001
+ output.append(f"- {issue}")
1002
+ output.append("")
1003
 
1004
+ if result.details:
1005
+ output.append("Additional Details:")
1006
+ for key, value in result.details.items():
1007
+ if isinstance(value, list):
1008
+ output.append(f"- {key}:")
1009
+ for item in value:
1010
+ output.append(f" - {item}")
1011
+ else:
1012
+ output.append(f"- {key}: {value}")
1013
+ output.append("")
1014
 
1015
+ return "\n".join(output)
 
 
1016
 
1017
  # Create the Gradio interface
1018
  demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
 
1028
 
1029
  document_types = [
1030
  "Advisory Circular", "Airworthiness Criteria", "Deviation Memo", "Exemption",
1031
+ "Federal Register Notice", "Order", "Policy Statement",
1032
  "Rule", "Special Condition", "Technical Standard Order", "Other"
1033
  ]
1034
 
 
1060
  value="Results will appear here after processing..."
1061
  )
1062
 
 
1063
  def update_template_visibility(doc_type):
1064
  return gr.update(visible=doc_type == "Advisory Circular")
1065
 
 
1069
  outputs=[template_type]
1070
  )
1071
 
 
1072
  submit_btn.click(
1073
+ fn=process_document,
1074
  inputs=[file_input, doc_type, template_type],
1075
  outputs=[output]
1076
  )
1077
 
1078
  # Launch the demo
1079
+ if __name__ == "__main__":
1080
+ demo.launch()