Hoctar77 commited on
Commit
48d0918
·
verified ·
1 Parent(s): 8b708c8

Updated document title check

Browse files
Files changed (1) hide show
  1. app.py +99 -82
app.py CHANGED
@@ -1231,7 +1231,19 @@ class FAADocumentChecker(DocumentChecker):
1231
 
1232
  @profile_performance
1233
  def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
1234
- """Check for correct formatting of document titles."""
 
 
 
 
 
 
 
 
 
 
 
 
1235
  try:
1236
  doc = Document(doc_path)
1237
  except Exception as e:
@@ -1240,86 +1252,82 @@ class FAADocumentChecker(DocumentChecker):
1240
 
1241
  incorrect_titles = []
1242
 
1243
- # Define formatting rules for different document types
1244
- formatting_rules = {
1245
- "Advisory Circular": {"italics": True, "quotes": False},
1246
- "Airworthiness Criteria": {"italics": False, "quotes": True},
1247
- "Deviation Memo": {"italics": False, "quotes": True},
1248
- "Exemption": {"italics": False, "quotes": True},
1249
- "Federal Register Notice": {"italics": False, "quotes": True},
1250
- "Order": {"italics": False, "quotes": True},
1251
- "Policy Statement": {"italics": False, "quotes": False},
1252
- "Rule": {"italics": False, "quotes": True},
1253
- "Special Condition": {"italics": False, "quotes": True},
1254
- "Technical Standard Order": {"italics": False, "quotes": True},
1255
- "Other": {"italics": False, "quotes": False}
1256
- }
1257
 
1258
- if doc_type not in formatting_rules:
1259
- self.logger.warning(f"Unsupported document type: {doc_type}. Skipping title check.")
1260
- return DocumentCheckResult(success=True, issues=[])
1261
-
1262
- required_format = formatting_rules[doc_type]
1263
-
1264
- ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
1265
 
1266
  for paragraph in doc.paragraphs:
1267
- text = paragraph.text
1268
- matches = ac_pattern.finditer(text)
1269
-
1270
  for match in matches:
1271
- full_match = match.group(0)
1272
- title_text = match.group(2).strip()
 
1273
 
1274
- # Get the position where the title starts
1275
- title_start = match.start(2)
1276
- title_end = match.end(2)
1277
-
1278
- # Check for any type of quotation marks, including smart quotes
1279
- title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', '"', '"'])
1280
-
1281
- # Check the formatting of the title
1282
  title_is_italicized = False
 
1283
  current_pos = 0
 
1284
  for run in paragraph.runs:
1285
  run_length = len(run.text)
1286
  run_start = current_pos
1287
  run_end = current_pos + run_length
1288
- if run_start <= title_start < run_end:
1289
- title_is_italicized = run.italic
1290
- break
 
 
 
 
 
 
 
 
1291
  current_pos += run_length
1292
 
1293
- # Check if formatting matches the required format
1294
  formatting_incorrect = False
1295
  issue_message = []
1296
 
1297
- # Check italics requirement
1298
- if required_format["italics"] and not title_is_italicized:
1299
- formatting_incorrect = True
1300
- issue_message.append("should be italicized")
1301
- elif not required_format["italics"] and title_is_italicized:
1302
- formatting_incorrect = True
1303
- issue_message.append("should not be italicized")
1304
-
1305
- # Check quotes requirement
1306
- if required_format["quotes"] and not title_in_quotes:
1307
- formatting_incorrect = True
1308
- issue_message.append("should be in quotes")
1309
- elif not required_format["quotes"] and title_in_quotes:
1310
- formatting_incorrect = True
1311
- issue_message.append("should not be in quotes")
1312
 
1313
  if formatting_incorrect:
1314
  incorrect_titles.append({
1315
  'text': title_text,
1316
- 'issue': ', '.join(issue_message),
1317
- 'sentence': text.strip()
 
1318
  })
1319
 
1320
  success = len(incorrect_titles) == 0
1321
 
1322
- return DocumentCheckResult(success=success, issues=incorrect_titles)
 
 
 
 
 
 
 
1323
 
1324
  @profile_performance
1325
  def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
@@ -2368,6 +2376,15 @@ class DocumentCheckResultsFormatter:
2368
  'before': 'The operator must ensure that all required maintenance procedures are performed in accordance with the manufacturer\'s specifications and that proper documentation is maintained throughout the entire process to demonstrate compliance with regulatory requirements.',
2369
  'after': 'The operator must ensure all required maintenance procedures are performed according to manufacturer specifications. Additionally, proper documentation must be maintained to demonstrate regulatory compliance.'
2370
  }
 
 
 
 
 
 
 
 
 
2371
  }
2372
  }
2373
 
@@ -2631,20 +2648,13 @@ class DocumentCheckResultsFormatter:
2631
  "quotes_only": {
2632
  "types": [
2633
  "Airworthiness Criteria", "Deviation Memo", "Exemption",
2634
- "Federal Register Notice", "Order", "Rule", "Special Condition",
2635
- "Technical Standard Order"
2636
  ],
2637
  "italics": False,
2638
  "quotes": True,
2639
  "description": "For this document type, referenced document titles should be in quotes without italics.",
2640
  "example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
2641
- },
2642
- "no_formatting": {
2643
- "types": ["Policy Statement", "Other"],
2644
- "italics": False,
2645
- "quotes": False,
2646
- "description": "For this document type, referenced document titles should not use italics or quotes.",
2647
- "example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
2648
  }
2649
  }
2650
 
@@ -2655,24 +2665,31 @@ class DocumentCheckResultsFormatter:
2655
  format_group = rules
2656
  break
2657
 
2658
- # Use default if document type not found
2659
  if not format_group:
2660
- format_group = formatting_rules["no_formatting"]
2661
-
2662
- # Update the document title check category
2663
- self.issue_categories['document_title_check'] = {
2664
- 'title': 'Referenced Document Title Format Issues',
2665
- 'description': format_group['description'],
2666
- 'solution': "Format referenced document titles as follows: " + (
2667
- "Italicize the title" if format_group['italics'] else
2668
- "Put the title in quotes" if format_group['quotes'] else
2669
- "No special formatting required"
2670
- ),
2671
- 'example_fix': {
2672
- 'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
2673
- 'after': format_group['example']
 
 
 
 
 
 
 
 
2674
  }
2675
- }
2676
 
2677
  output = []
2678
 
 
1231
 
1232
  @profile_performance
1233
  def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
1234
+ """
1235
+ Check for correct formatting of document titles.
1236
+
1237
+ For Advisory Circulars: Use italics without quotes
1238
+ For all other document types: Use quotes without italics
1239
+
1240
+ Args:
1241
+ doc_path: Path to the document
1242
+ doc_type: Type of document being checked
1243
+
1244
+ Returns:
1245
+ DocumentCheckResult: Results of document title check
1246
+ """
1247
  try:
1248
  doc = Document(doc_path)
1249
  except Exception as e:
 
1252
 
1253
  incorrect_titles = []
1254
 
1255
+ # Define formatting rules based on document type
1256
+ use_italics = doc_type == "Advisory Circular"
1257
+ use_quotes = doc_type != "Advisory Circular"
 
 
 
 
 
 
 
 
 
 
 
1258
 
1259
+ # Pattern to match document references (e.g., "AC 25.1309-1B, System Design and Analysis")
1260
+ doc_ref_pattern = re.compile(
1261
+ r'(?:AC|Order|Policy|Notice)\s+[\d.-]+[A-Z]?,\s+([^,.]+)(?:[,.]|$)'
1262
+ )
 
 
 
1263
 
1264
  for paragraph in doc.paragraphs:
1265
+ matches = doc_ref_pattern.finditer(paragraph.text)
1266
+
 
1267
  for match in matches:
1268
+ title_text = match.group(1).strip()
1269
+ title_start = match.start(1)
1270
+ title_end = match.end(1)
1271
 
1272
+ # Check formatting within the matched range
 
 
 
 
 
 
 
1273
  title_is_italicized = False
1274
+ title_in_quotes = False
1275
  current_pos = 0
1276
+
1277
  for run in paragraph.runs:
1278
  run_length = len(run.text)
1279
  run_start = current_pos
1280
  run_end = current_pos + run_length
1281
+
1282
+ # Check if this run overlaps with the title
1283
+ if (run_start <= title_start < run_end or
1284
+ run_start < title_end <= run_end or
1285
+ title_start <= run_start < title_end):
1286
+ title_is_italicized = title_is_italicized or run.italic
1287
+ # Check for any type of quotation marks
1288
+ title_in_quotes = title_in_quotes or any(
1289
+ q in run.text for q in ['"', "'", '"', '"', '"', '"']
1290
+ )
1291
+
1292
  current_pos += run_length
1293
 
1294
+ # Determine if formatting is incorrect
1295
  formatting_incorrect = False
1296
  issue_message = []
1297
 
1298
+ if use_italics:
1299
+ if not title_is_italicized:
1300
+ formatting_incorrect = True
1301
+ issue_message.append("should be italicized")
1302
+ if title_in_quotes:
1303
+ formatting_incorrect = True
1304
+ issue_message.append("should not be in quotes")
1305
+ else: # use quotes
1306
+ if title_is_italicized:
1307
+ formatting_incorrect = True
1308
+ issue_message.append("should not be italicized")
1309
+ if not title_in_quotes:
1310
+ formatting_incorrect = True
1311
+ issue_message.append("should be in quotes")
 
1312
 
1313
  if formatting_incorrect:
1314
  incorrect_titles.append({
1315
  'text': title_text,
1316
+ 'issue': ' and '.join(issue_message),
1317
+ 'sentence': paragraph.text.strip(),
1318
+ 'correct_format': 'italics' if use_italics else 'quotes'
1319
  })
1320
 
1321
  success = len(incorrect_titles) == 0
1322
 
1323
+ return DocumentCheckResult(
1324
+ success=success,
1325
+ issues=incorrect_titles,
1326
+ details={
1327
+ 'document_type': doc_type,
1328
+ 'formatting_rule': 'italics' if use_italics else 'quotes'
1329
+ }
1330
+ )
1331
 
1332
  @profile_performance
1333
  def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
 
2376
  'before': 'The operator must ensure that all required maintenance procedures are performed in accordance with the manufacturer\'s specifications and that proper documentation is maintained throughout the entire process to demonstrate compliance with regulatory requirements.',
2377
  'after': 'The operator must ensure all required maintenance procedures are performed according to manufacturer specifications. Additionally, proper documentation must be maintained to demonstrate regulatory compliance.'
2378
  }
2379
+ },
2380
+ 'document_title_check': {
2381
+ 'title': 'Referenced Document Title Format Issues',
2382
+ 'description': 'Checks document title formatting based on document type. Advisory Circulars require italics without quotes, while all other document types require quotes without italics.',
2383
+ 'solution': 'Format document titles according to document type: use italics for Advisory Circulars, quotes for all other document types.',
2384
+ 'example_fix': {
2385
+ 'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
2386
+ 'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
2387
+ }
2388
  }
2389
  }
2390
 
 
2648
  "quotes_only": {
2649
  "types": [
2650
  "Airworthiness Criteria", "Deviation Memo", "Exemption",
2651
+ "Federal Register Notice", "Order", "Policy Statement", "Rule",
2652
+ "Special Condition", "Technical Standard Order", "Other"
2653
  ],
2654
  "italics": False,
2655
  "quotes": True,
2656
  "description": "For this document type, referenced document titles should be in quotes without italics.",
2657
  "example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
 
 
 
 
 
 
 
2658
  }
2659
  }
2660
 
 
2665
  format_group = rules
2666
  break
2667
 
2668
+ # Use quotes_only as default if document type not found
2669
  if not format_group:
2670
+ format_group = formatting_rules["quotes_only"]
2671
+
2672
+ # Update document title check category based on document type
2673
+ if doc_type == "Advisory Circular":
2674
+ self.issue_categories['document_title_check'] = {
2675
+ 'title': 'Referenced Document Title Format Issues',
2676
+ 'description': 'For Advisory Circulars, all referenced document titles must be italicized.',
2677
+ 'solution': 'Format document titles using italics for Advisory Circulars.',
2678
+ 'example_fix': {
2679
+ 'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
2680
+ 'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
2681
+ }
2682
+ }
2683
+ else:
2684
+ self.issue_categories['document_title_check'] = {
2685
+ 'title': 'Referenced Document Title Format Issues',
2686
+ 'description': f'For {doc_type}s, all referenced document titles must be enclosed in quotation marks.',
2687
+ 'solution': 'Format document titles using quotation marks.',
2688
+ 'example_fix': {
2689
+ 'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
2690
+ 'after': 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
2691
+ }
2692
  }
 
2693
 
2694
  output = []
2695