Spaces:
Sleeping
Sleeping
Updated document title check
Browse files
app.py
CHANGED
@@ -1231,7 +1231,19 @@ class FAADocumentChecker(DocumentChecker):
|
|
1231 |
|
1232 |
@profile_performance
|
1233 |
def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
|
1234 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1235 |
try:
|
1236 |
doc = Document(doc_path)
|
1237 |
except Exception as e:
|
@@ -1240,86 +1252,82 @@ class FAADocumentChecker(DocumentChecker):
|
|
1240 |
|
1241 |
incorrect_titles = []
|
1242 |
|
1243 |
-
# Define formatting rules
|
1244 |
-
|
1245 |
-
|
1246 |
-
"Airworthiness Criteria": {"italics": False, "quotes": True},
|
1247 |
-
"Deviation Memo": {"italics": False, "quotes": True},
|
1248 |
-
"Exemption": {"italics": False, "quotes": True},
|
1249 |
-
"Federal Register Notice": {"italics": False, "quotes": True},
|
1250 |
-
"Order": {"italics": False, "quotes": True},
|
1251 |
-
"Policy Statement": {"italics": False, "quotes": False},
|
1252 |
-
"Rule": {"italics": False, "quotes": True},
|
1253 |
-
"Special Condition": {"italics": False, "quotes": True},
|
1254 |
-
"Technical Standard Order": {"italics": False, "quotes": True},
|
1255 |
-
"Other": {"italics": False, "quotes": False}
|
1256 |
-
}
|
1257 |
|
1258 |
-
|
1259 |
-
|
1260 |
-
|
1261 |
-
|
1262 |
-
required_format = formatting_rules[doc_type]
|
1263 |
-
|
1264 |
-
ac_pattern = re.compile(r'(AC\s+\d+(?:-\d+)?(?:,|\s)+)(.+?)(?=\.|,|$)')
|
1265 |
|
1266 |
for paragraph in doc.paragraphs:
|
1267 |
-
|
1268 |
-
|
1269 |
-
|
1270 |
for match in matches:
|
1271 |
-
|
1272 |
-
|
|
|
1273 |
|
1274 |
-
#
|
1275 |
-
title_start = match.start(2)
|
1276 |
-
title_end = match.end(2)
|
1277 |
-
|
1278 |
-
# Check for any type of quotation marks, including smart quotes
|
1279 |
-
title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', '"', '"'])
|
1280 |
-
|
1281 |
-
# Check the formatting of the title
|
1282 |
title_is_italicized = False
|
|
|
1283 |
current_pos = 0
|
|
|
1284 |
for run in paragraph.runs:
|
1285 |
run_length = len(run.text)
|
1286 |
run_start = current_pos
|
1287 |
run_end = current_pos + run_length
|
1288 |
-
|
1289 |
-
|
1290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1291 |
current_pos += run_length
|
1292 |
|
1293 |
-
#
|
1294 |
formatting_incorrect = False
|
1295 |
issue_message = []
|
1296 |
|
1297 |
-
|
1298 |
-
|
1299 |
-
|
1300 |
-
|
1301 |
-
|
1302 |
-
|
1303 |
-
|
1304 |
-
|
1305 |
-
|
1306 |
-
|
1307 |
-
|
1308 |
-
|
1309 |
-
|
1310 |
-
|
1311 |
-
issue_message.append("should not be in quotes")
|
1312 |
|
1313 |
if formatting_incorrect:
|
1314 |
incorrect_titles.append({
|
1315 |
'text': title_text,
|
1316 |
-
'issue': '
|
1317 |
-
'sentence': text.strip()
|
|
|
1318 |
})
|
1319 |
|
1320 |
success = len(incorrect_titles) == 0
|
1321 |
|
1322 |
-
return DocumentCheckResult(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1323 |
|
1324 |
@profile_performance
|
1325 |
def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
|
@@ -2368,6 +2376,15 @@ class DocumentCheckResultsFormatter:
|
|
2368 |
'before': 'The operator must ensure that all required maintenance procedures are performed in accordance with the manufacturer\'s specifications and that proper documentation is maintained throughout the entire process to demonstrate compliance with regulatory requirements.',
|
2369 |
'after': 'The operator must ensure all required maintenance procedures are performed according to manufacturer specifications. Additionally, proper documentation must be maintained to demonstrate regulatory compliance.'
|
2370 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2371 |
}
|
2372 |
}
|
2373 |
|
@@ -2631,20 +2648,13 @@ class DocumentCheckResultsFormatter:
|
|
2631 |
"quotes_only": {
|
2632 |
"types": [
|
2633 |
"Airworthiness Criteria", "Deviation Memo", "Exemption",
|
2634 |
-
"Federal Register Notice", "Order", "
|
2635 |
-
"Technical Standard Order"
|
2636 |
],
|
2637 |
"italics": False,
|
2638 |
"quotes": True,
|
2639 |
"description": "For this document type, referenced document titles should be in quotes without italics.",
|
2640 |
"example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
|
2641 |
-
},
|
2642 |
-
"no_formatting": {
|
2643 |
-
"types": ["Policy Statement", "Other"],
|
2644 |
-
"italics": False,
|
2645 |
-
"quotes": False,
|
2646 |
-
"description": "For this document type, referenced document titles should not use italics or quotes.",
|
2647 |
-
"example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
|
2648 |
}
|
2649 |
}
|
2650 |
|
@@ -2655,24 +2665,31 @@ class DocumentCheckResultsFormatter:
|
|
2655 |
format_group = rules
|
2656 |
break
|
2657 |
|
2658 |
-
# Use default if document type not found
|
2659 |
if not format_group:
|
2660 |
-
format_group = formatting_rules["
|
2661 |
-
|
2662 |
-
# Update
|
2663 |
-
|
2664 |
-
'
|
2665 |
-
|
2666 |
-
|
2667 |
-
|
2668 |
-
|
2669 |
-
|
2670 |
-
|
2671 |
-
|
2672 |
-
|
2673 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2674 |
}
|
2675 |
-
}
|
2676 |
|
2677 |
output = []
|
2678 |
|
|
|
1231 |
|
1232 |
@profile_performance
|
1233 |
def document_title_check(self, doc_path: str, doc_type: str) -> DocumentCheckResult:
|
1234 |
+
"""
|
1235 |
+
Check for correct formatting of document titles.
|
1236 |
+
|
1237 |
+
For Advisory Circulars: Use italics without quotes
|
1238 |
+
For all other document types: Use quotes without italics
|
1239 |
+
|
1240 |
+
Args:
|
1241 |
+
doc_path: Path to the document
|
1242 |
+
doc_type: Type of document being checked
|
1243 |
+
|
1244 |
+
Returns:
|
1245 |
+
DocumentCheckResult: Results of document title check
|
1246 |
+
"""
|
1247 |
try:
|
1248 |
doc = Document(doc_path)
|
1249 |
except Exception as e:
|
|
|
1252 |
|
1253 |
incorrect_titles = []
|
1254 |
|
1255 |
+
# Define formatting rules based on document type
|
1256 |
+
use_italics = doc_type == "Advisory Circular"
|
1257 |
+
use_quotes = doc_type != "Advisory Circular"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1258 |
|
1259 |
+
# Pattern to match document references (e.g., "AC 25.1309-1B, System Design and Analysis")
|
1260 |
+
doc_ref_pattern = re.compile(
|
1261 |
+
r'(?:AC|Order|Policy|Notice)\s+[\d.-]+[A-Z]?,\s+([^,.]+)(?:[,.]|$)'
|
1262 |
+
)
|
|
|
|
|
|
|
1263 |
|
1264 |
for paragraph in doc.paragraphs:
|
1265 |
+
matches = doc_ref_pattern.finditer(paragraph.text)
|
1266 |
+
|
|
|
1267 |
for match in matches:
|
1268 |
+
title_text = match.group(1).strip()
|
1269 |
+
title_start = match.start(1)
|
1270 |
+
title_end = match.end(1)
|
1271 |
|
1272 |
+
# Check formatting within the matched range
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1273 |
title_is_italicized = False
|
1274 |
+
title_in_quotes = False
|
1275 |
current_pos = 0
|
1276 |
+
|
1277 |
for run in paragraph.runs:
|
1278 |
run_length = len(run.text)
|
1279 |
run_start = current_pos
|
1280 |
run_end = current_pos + run_length
|
1281 |
+
|
1282 |
+
# Check if this run overlaps with the title
|
1283 |
+
if (run_start <= title_start < run_end or
|
1284 |
+
run_start < title_end <= run_end or
|
1285 |
+
title_start <= run_start < title_end):
|
1286 |
+
title_is_italicized = title_is_italicized or run.italic
|
1287 |
+
# Check for any type of quotation marks
|
1288 |
+
title_in_quotes = title_in_quotes or any(
|
1289 |
+
q in run.text for q in ['"', "'", '"', '"', '"', '"']
|
1290 |
+
)
|
1291 |
+
|
1292 |
current_pos += run_length
|
1293 |
|
1294 |
+
# Determine if formatting is incorrect
|
1295 |
formatting_incorrect = False
|
1296 |
issue_message = []
|
1297 |
|
1298 |
+
if use_italics:
|
1299 |
+
if not title_is_italicized:
|
1300 |
+
formatting_incorrect = True
|
1301 |
+
issue_message.append("should be italicized")
|
1302 |
+
if title_in_quotes:
|
1303 |
+
formatting_incorrect = True
|
1304 |
+
issue_message.append("should not be in quotes")
|
1305 |
+
else: # use quotes
|
1306 |
+
if title_is_italicized:
|
1307 |
+
formatting_incorrect = True
|
1308 |
+
issue_message.append("should not be italicized")
|
1309 |
+
if not title_in_quotes:
|
1310 |
+
formatting_incorrect = True
|
1311 |
+
issue_message.append("should be in quotes")
|
|
|
1312 |
|
1313 |
if formatting_incorrect:
|
1314 |
incorrect_titles.append({
|
1315 |
'text': title_text,
|
1316 |
+
'issue': ' and '.join(issue_message),
|
1317 |
+
'sentence': paragraph.text.strip(),
|
1318 |
+
'correct_format': 'italics' if use_italics else 'quotes'
|
1319 |
})
|
1320 |
|
1321 |
success = len(incorrect_titles) == 0
|
1322 |
|
1323 |
+
return DocumentCheckResult(
|
1324 |
+
success=success,
|
1325 |
+
issues=incorrect_titles,
|
1326 |
+
details={
|
1327 |
+
'document_type': doc_type,
|
1328 |
+
'formatting_rule': 'italics' if use_italics else 'quotes'
|
1329 |
+
}
|
1330 |
+
)
|
1331 |
|
1332 |
@profile_performance
|
1333 |
def double_period_check(self, doc: List[str]) -> DocumentCheckResult:
|
|
|
2376 |
'before': 'The operator must ensure that all required maintenance procedures are performed in accordance with the manufacturer\'s specifications and that proper documentation is maintained throughout the entire process to demonstrate compliance with regulatory requirements.',
|
2377 |
'after': 'The operator must ensure all required maintenance procedures are performed according to manufacturer specifications. Additionally, proper documentation must be maintained to demonstrate regulatory compliance.'
|
2378 |
}
|
2379 |
+
},
|
2380 |
+
'document_title_check': {
|
2381 |
+
'title': 'Referenced Document Title Format Issues',
|
2382 |
+
'description': 'Checks document title formatting based on document type. Advisory Circulars require italics without quotes, while all other document types require quotes without italics.',
|
2383 |
+
'solution': 'Format document titles according to document type: use italics for Advisory Circulars, quotes for all other document types.',
|
2384 |
+
'example_fix': {
|
2385 |
+
'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
|
2386 |
+
'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
|
2387 |
+
}
|
2388 |
}
|
2389 |
}
|
2390 |
|
|
|
2648 |
"quotes_only": {
|
2649 |
"types": [
|
2650 |
"Airworthiness Criteria", "Deviation Memo", "Exemption",
|
2651 |
+
"Federal Register Notice", "Order", "Policy Statement", "Rule",
|
2652 |
+
"Special Condition", "Technical Standard Order", "Other"
|
2653 |
],
|
2654 |
"italics": False,
|
2655 |
"quotes": True,
|
2656 |
"description": "For this document type, referenced document titles should be in quotes without italics.",
|
2657 |
"example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2658 |
}
|
2659 |
}
|
2660 |
|
|
|
2665 |
format_group = rules
|
2666 |
break
|
2667 |
|
2668 |
+
# Use quotes_only as default if document type not found
|
2669 |
if not format_group:
|
2670 |
+
format_group = formatting_rules["quotes_only"]
|
2671 |
+
|
2672 |
+
# Update document title check category based on document type
|
2673 |
+
if doc_type == "Advisory Circular":
|
2674 |
+
self.issue_categories['document_title_check'] = {
|
2675 |
+
'title': 'Referenced Document Title Format Issues',
|
2676 |
+
'description': 'For Advisory Circulars, all referenced document titles must be italicized.',
|
2677 |
+
'solution': 'Format document titles using italics for Advisory Circulars.',
|
2678 |
+
'example_fix': {
|
2679 |
+
'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
|
2680 |
+
'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
|
2681 |
+
}
|
2682 |
+
}
|
2683 |
+
else:
|
2684 |
+
self.issue_categories['document_title_check'] = {
|
2685 |
+
'title': 'Referenced Document Title Format Issues',
|
2686 |
+
'description': f'For {doc_type}s, all referenced document titles must be enclosed in quotation marks.',
|
2687 |
+
'solution': 'Format document titles using quotation marks.',
|
2688 |
+
'example_fix': {
|
2689 |
+
'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
|
2690 |
+
'after': 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
|
2691 |
+
}
|
2692 |
}
|
|
|
2693 |
|
2694 |
output = []
|
2695 |
|