Hoctar77 commited on
Commit
e546bb7
·
verified ·
1 Parent(s): be09779

Cleaned up code

Browse files
Files changed (1) hide show
  1. app.py +205 -556
app.py CHANGED
@@ -42,20 +42,19 @@ DOCUMENT_TYPES = [
42
 
43
  TEMPLATE_TYPES = ["Short AC template AC", "Long AC template AC"]
44
 
45
- # Heading Word Constants
46
  HEADING_WORDS = {
47
- 'INFORMATION', 'GENERAL', 'SUMMARY', 'INTRODUCTION', 'BACKGROUND',
48
- 'DISCUSSION', 'CONCLUSION', 'APPENDIX', 'CHAPTER', 'SECTION',
49
- 'PURPOSE', 'APPLICABILITY', 'CANCELLATION', 'DEFINITION', 'REQUIREMENTS',
50
- 'AUTHORITY', 'POLICY', 'SCOPE', 'RELATED', 'MATERIAL', 'DISTRIBUTION',
51
- 'EXPLANATION', 'PROCEDURES', 'NOTE', 'WARNING', 'CAUTION', 'EXCEPTION',
52
- 'GROUPS', 'PARTS', 'TABLE', 'FIGURE', 'REFERENCES', 'DEFINITIONS', 'DEPARTMENT',
53
- 'INSERT'
54
  }
55
 
56
  PREDEFINED_ACRONYMS = {
57
- 'CFR', 'U.S.', 'USA', 'US', 'U.S.C.', 'e.g.', 'i.e.', 'FAQ', 'No.', 'ZIP', 'PDF', 'SSN',
58
- 'DC', 'MD', 'MA', 'WA', 'TX', 'MO', 'FAA IR-M', 'DOT', 'AGC', 'AIR'
 
59
  }
60
 
61
  # Configuration Constants
@@ -463,19 +462,19 @@ class DocumentCheckerConfig:
463
  ),
464
  PatternConfig(
465
  pattern=r'\bUSC\b',
466
- description="USC should be U.S.C.",
467
  is_error=True,
468
  replacement="U.S.C."
469
  ),
470
  PatternConfig(
471
  pattern=r'\bCFR Part\b',
472
- description="CFR Part should be CFR part",
473
  is_error=True,
474
  replacement="CFR part"
475
  ),
476
  PatternConfig(
477
  pattern=r'\bC\.F\.R\.\b',
478
- description="C.F.R. should be CFR",
479
  is_error=True,
480
  replacement="CFR"
481
  ),
@@ -493,79 +492,79 @@ class DocumentCheckerConfig:
493
  ),
494
  PatternConfig(
495
  pattern=r'\bcancelled\b',
496
- description="'cancelled' should be 'canceled'",
497
  is_error=True,
498
  replacement="canceled"
499
  ),
500
  PatternConfig(
501
  pattern=r'\bshall\b',
502
- description="'shall' should be 'must'",
503
  is_error=True,
504
  replacement="must"
505
  ),
506
  PatternConfig(
507
  pattern=r'\b\&\b',
508
- description="'&' should be 'and'",
509
  is_error=True,
510
  replacement="and"
511
  ),
512
  PatternConfig(
513
  pattern=r'\bflight crew\b',
514
- description="'flight crew' should be 'flightcrew'",
515
  is_error=True,
516
  replacement="flightcrew"
517
  ),
518
  PatternConfig(
519
  pattern=r'\bchairman\b',
520
- description="'chairman' should be 'chair'",
521
  is_error=True,
522
  replacement="chair"
523
  ),
524
  PatternConfig(
525
  pattern=r'\bflagman\b',
526
- description="'flagman' should be 'flagger' or 'flagperson'",
527
  is_error=True,
528
  replacement="flagperson"
529
  ),
530
  PatternConfig(
531
  pattern=r'\bman\b',
532
- description="'man' should be 'individual' or 'person'",
533
  is_error=True,
534
  replacement="person"
535
  ),
536
  PatternConfig(
537
  pattern=r'\bmanmade\b',
538
- description="'manmade' should be 'personmade'",
539
  is_error=True,
540
  replacement="personmade"
541
  ),
542
  PatternConfig(
543
  pattern=r'\bmanpower\b',
544
- description="'manpower' should be 'labor force'",
545
  is_error=True,
546
  replacement="labor force"
547
  ),
548
  PatternConfig(
549
  pattern=r'\bnotice to airman\b',
550
- description="'notice to airman' should be 'notice to air missions'",
551
  is_error=True,
552
  replacement="notice to air missions"
553
  ),
554
  PatternConfig(
555
  pattern=r'\bnotice to airmen\b',
556
- description="'notice to airmen' should be 'notice to air missions'",
557
  is_error=True,
558
  replacement="notice to air missions"
559
  ),
560
  PatternConfig(
561
  pattern=r'\bcockpit\b',
562
- description="'cockpit' should be 'flight deck'",
563
  is_error=True,
564
  replacement="flight deck"
565
  ),
566
  PatternConfig(
567
  pattern=r'\bA321 neo\b',
568
- description="'A321 neo' should be 'A321neo'",
569
  is_error=True,
570
  replacement="A321neo"
571
  )
@@ -573,7 +572,7 @@ class DocumentCheckerConfig:
573
  'section_symbol': [
574
  PatternConfig(
575
  pattern=r'^§',
576
- description="Sentence should not start with section symbol",
577
  is_error=True
578
  ),
579
  PatternConfig(
@@ -599,28 +598,23 @@ class DocumentCheckerConfig:
599
  ],
600
  'spacing': [
601
  PatternConfig(
602
- pattern=r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)',
603
- description="Missing space between document type and number",
604
  is_error=True
605
  ),
606
  PatternConfig(
607
- pattern=r'(?<!\s)(§|§§)(\d+\.\d+)',
608
- description="Missing space after section symbol (§)",
609
  is_error=True
610
  ),
611
  PatternConfig(
612
- pattern=r'(?<!\s)Part(\d+)',
613
- description="Missing space between 'Part' and number",
614
  is_error=True
615
  ),
616
  PatternConfig(
617
- pattern=r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))',
618
- description="Missing space before paragraph indication",
619
- is_error=True
620
- ),
621
- PatternConfig(
622
- pattern=r'\s{2,}',
623
- description="Double spaces between words",
624
  is_error=True
625
  )
626
  ],
@@ -1078,16 +1072,14 @@ class FAADocumentChecker(DocumentChecker):
1078
 
1079
  @profile_performance
1080
  def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
1081
- """
1082
- Check document terminology and output only unique sentences needing correction.
1083
- """
1084
  if not self.validate_input(doc):
1085
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1086
 
1087
  terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
1088
  prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
1089
 
1090
- sentence_issues = {}
1091
 
1092
  # Process each sentence
1093
  for paragraph in doc:
@@ -1097,46 +1089,27 @@ class FAADocumentChecker(DocumentChecker):
1097
  if not sentence:
1098
  continue
1099
 
1100
- current_sentence_issues = []
1101
-
1102
  for pattern_config in terminology_patterns:
1103
  matches = list(re.finditer(pattern_config.pattern, sentence))
1104
  for match in matches:
1105
- current_sentence_issues.append({
1106
- 'incorrect_term': match.group(),
1107
- 'correct_term': pattern_config.replacement,
1108
- 'description': pattern_config.description,
1109
- 'sentence': sentence
1110
- })
1111
 
 
1112
  for pattern_config in prohibited_patterns:
1113
  if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
1114
- current_sentence_issues.append({
1115
- 'description': pattern_config.description,
1116
- 'sentence': sentence
1117
- })
1118
-
1119
- if current_sentence_issues:
1120
- if sentence not in sentence_issues:
1121
- sentence_issues[sentence] = current_sentence_issues
1122
- else:
1123
- sentence_issues[sentence].extend(current_sentence_issues)
1124
 
1125
- # Compile unique issues
1126
- unique_issues = []
1127
- for sentence, sentence_issue_list in sentence_issues.items():
1128
- replacements = []
1129
- for issue in sentence_issue_list:
1130
- if 'incorrect_term' in issue and issue.get('correct_term'):
1131
- replacements.append(f"'{issue['incorrect_term']}' with '{issue['correct_term']}'")
1132
-
1133
- replacement_text = "; ".join(replacements)
1134
- formatted_issue = {
1135
- 'sentence': f"{sentence} (Replace {replacement_text})" if replacements else sentence
1136
- }
1137
- unique_issues.append(formatted_issue)
1138
 
1139
- return DocumentCheckResult(success=not unique_issues, issues=unique_issues)
1140
 
1141
  @profile_performance
1142
  def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
@@ -1168,7 +1141,8 @@ class FAADocumentChecker(DocumentChecker):
1168
  matches = compiled_pattern.finditer(sentence)
1169
  for match in matches:
1170
  incorrect = match.group()
1171
- correct = incorrect.replace('§', 'Section')
 
1172
  issues.append({
1173
  'incorrect': incorrect,
1174
  'correct': correct
@@ -1402,57 +1376,65 @@ class FAADocumentChecker(DocumentChecker):
1402
  if not self.validate_input(doc):
1403
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1404
 
1405
- # Get patterns from registry
1406
- spacing_patterns = self.config_manager.pattern_registry.get('spacing', [])
1407
-
1408
- # Initialize issue groups
1409
- issue_groups = {
1410
- 'document_type_spacing': [], # AC25.25, FAA123, etc.
1411
- 'section_symbol_spacing': [], # §25.25
1412
- 'part_number_spacing': [], # Part25
1413
- 'paragraph_spacing': [], # text(a) or text(1)
1414
- 'double_space': [] # Multiple spaces between words
1415
- }
1416
-
1417
- # Define descriptions for each issue type
1418
- category_descriptions = {
1419
- 'document_type_spacing': 'Missing space between document type and number',
1420
- 'section_symbol_spacing': 'Missing space after section symbol',
1421
- 'part_number_spacing': 'Missing space between Part and number',
1422
- 'paragraph_spacing': 'Missing space before paragraph indication',
1423
- 'double_space': 'Multiple spaces between words'
1424
- }
1425
 
1426
- # Pattern mapping for categorization
1427
- pattern_categories = {
1428
- r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)': ('document_type_spacing', issue_groups['document_type_spacing']),
1429
- r'(?<!\s)(§|§§)(\d+\.\d+)': ('section_symbol_spacing', issue_groups['section_symbol_spacing']),
1430
- r'(?<!\s)Part(\d+)': ('part_number_spacing', issue_groups['part_number_spacing']),
1431
- r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))': ('paragraph_spacing', issue_groups['paragraph_spacing']),
1432
- r'\s{2,}': ('double_space', issue_groups['double_space'])
1433
- }
1434
-
1435
- # Use _process_sentences helper instead of manual sentence processing
1436
- for sentence, paragraph in self._process_sentences(doc, skip_empty=True, skip_headings=False):
1437
- for pattern_config in spacing_patterns:
1438
- compiled_pattern = re.compile(pattern_config.pattern)
1439
 
1440
- # Find the corresponding category for this pattern
1441
- for pattern_key, (category_name, category_list) in pattern_categories.items():
1442
- if pattern_config.pattern == pattern_key:
1443
- matches = compiled_pattern.finditer(sentence)
1444
- for match in matches:
1445
- category_list.append({
1446
- 'text': match.group(),
1447
- 'sentence': sentence.strip(),
1448
- 'description': pattern_config.description
1449
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
 
1451
- # Use the helper to compile issues
1452
- issues = self._compile_issues(issue_groups, category_descriptions)
1453
-
1454
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
1455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1456
  @profile_performance
1457
  def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
1458
  """Check for abbreviation consistency after first definition."""
@@ -1651,12 +1633,13 @@ class FAADocumentChecker(DocumentChecker):
1651
  details={'message': f'No patterns defined for {pattern_category}'}
1652
  )
1653
 
1654
- # Use custom processing function if provided, otherwise use default
1655
  if process_func:
1656
  return process_func(doc, patterns)
1657
 
1658
- # Default processing
1659
- issues = []
 
1660
  for paragraph in doc:
1661
  sentences = re.split(r'(?<=[.!?])\s+', paragraph)
1662
  for sentence in sentences:
@@ -1667,14 +1650,25 @@ class FAADocumentChecker(DocumentChecker):
1667
  for pattern_config in patterns:
1668
  matches = list(re.finditer(pattern_config.pattern, sentence))
1669
  if matches:
1670
- issues.append({
1671
- 'pattern': pattern_config.pattern,
1672
- 'description': pattern_config.description,
1673
- 'sentence': sentence,
1674
- 'matches': [m.group() for m in matches]
1675
- })
 
1676
 
1677
- return DocumentCheckResult(success=len(issues) == 0, issues=issues)
 
 
 
 
 
 
 
 
 
 
1678
 
1679
  def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
1680
  """
@@ -1803,55 +1797,14 @@ class FAADocumentChecker(DocumentChecker):
1803
 
1804
  return sentences
1805
 
1806
- @profile_performance
1807
- def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
1808
- """Check document terminology and output only unique term replacements needed."""
1809
- if not self.validate_input(doc):
1810
- return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1811
-
1812
- terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
1813
- prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
1814
-
1815
- unique_issues = set() # Using a set to avoid duplicate replacements
1816
-
1817
- # Process each sentence
1818
- for paragraph in doc:
1819
- sentences = re.split(r'(?<=[.!?])\s+', paragraph)
1820
- for sentence in sentences:
1821
- sentence = sentence.strip()
1822
- if not sentence:
1823
- continue
1824
-
1825
- # Check terminology patterns
1826
- for pattern_config in terminology_patterns:
1827
- matches = list(re.finditer(pattern_config.pattern, sentence))
1828
- for match in matches:
1829
- if pattern_config.replacement: # Only if there's a replacement term
1830
- unique_issues.add((match.group(), pattern_config.replacement))
1831
-
1832
- # Check prohibited patterns
1833
- for pattern_config in prohibited_patterns:
1834
- if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
1835
- if pattern_config.replacement: # Only if there's a replacement term
1836
- match_text = re.search(pattern_config.pattern, sentence, re.IGNORECASE).group()
1837
- unique_issues.add((match_text, pattern_config.replacement))
1838
-
1839
- # Format issues as simple replacement instructions
1840
- formatted_issues = [
1841
- {'incorrect_term': incorrect, 'correct_term': correct}
1842
- for incorrect, correct in sorted(unique_issues) # Sort for consistent output
1843
- ]
1844
-
1845
- return DocumentCheckResult(success=not formatted_issues, issues=formatted_issues)
1846
-
1847
  @profile_performance
1848
  def check_parentheses(self, doc: List[str]) -> DocumentCheckResult:
1849
  """
1850
  Check for matching parentheses in the document.
1851
-
1852
  Args:
1853
  doc (List[str]): List of document paragraphs
1854
-
1855
  Returns:
1856
  DocumentCheckResult: Result containing any mismatched parentheses issues
1857
  """
@@ -1861,35 +1814,37 @@ class FAADocumentChecker(DocumentChecker):
1861
  issues = []
1862
 
1863
  for i, paragraph in enumerate(doc, 1):
1864
- # Skip empty paragraphs
1865
- if not paragraph.strip():
1866
  continue
1867
-
1868
- stack = []
1869
- for j, char in enumerate(paragraph):
1870
- if char == '(':
1871
- stack.append((i, j)) # Store paragraph and character position
1872
- elif char == ')':
1873
- if not stack: # No matching opening parenthesis
1874
- issues.append({
1875
- 'type': 'missing_opening',
1876
- 'paragraph': i,
1877
- 'position': j,
1878
- 'text': paragraph,
1879
- 'message': f"Add an opening parenthesis before '{paragraph[max(0, j-20):min(len(paragraph), j+20)]}'"
1880
- })
1881
- else:
1882
- stack.pop() # Remove matching pair
1883
 
1884
- # Check for remaining opening parentheses
1885
- while stack:
1886
- para_num, pos = stack.pop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1887
  issues.append({
1888
  'type': 'missing_closing',
1889
- 'paragraph': para_num,
1890
  'position': pos,
1891
- 'text': paragraph,
1892
- 'message': f"Add a closing parenthesis after '{paragraph[max(0, pos-20):min(len(paragraph), pos+20)]}'"
1893
  })
1894
 
1895
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
@@ -1900,57 +1855,52 @@ class FAADocumentChecker(DocumentChecker):
1900
  if not self.validate_input(doc):
1901
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1902
 
1903
- # Get patterns from registry
1904
  spacing_patterns = self.config_manager.pattern_registry.get('spacing', [])
 
1905
 
1906
- # Initialize issue groups
1907
- issue_groups = {
1908
- 'document_type_spacing': [], # AC25.25, FAA123, etc.
1909
- 'section_symbol_spacing': [], # §25.25
1910
- 'part_number_spacing': [], # Part25
1911
- 'paragraph_spacing': [], # text(a) or text(1)
1912
- 'double_space': [] # Multiple spaces between words
1913
- }
1914
-
1915
- # Define descriptions for each issue type
1916
- category_descriptions = {
1917
- 'document_type_spacing': 'Missing space between document type and number',
1918
- 'section_symbol_spacing': 'Missing space after section symbol',
1919
- 'part_number_spacing': 'Missing space between Part and number',
1920
- 'paragraph_spacing': 'Missing space before paragraph indication',
1921
- 'double_space': 'Multiple spaces between words'
1922
- }
1923
-
1924
- # Pattern mapping for categorization
1925
- pattern_categories = {
1926
- r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)': ('document_type_spacing', issue_groups['document_type_spacing']),
1927
- r'(?<!\s)(§|§§)(\d+\.\d+)': ('section_symbol_spacing', issue_groups['section_symbol_spacing']),
1928
- r'(?<!\s)Part(\d+)': ('part_number_spacing', issue_groups['part_number_spacing']),
1929
- r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))': ('paragraph_spacing', issue_groups['paragraph_spacing']),
1930
- r'\s{2,}': ('double_space', issue_groups['double_space'])
1931
- }
1932
 
1933
- # Use _process_sentences helper instead of manual sentence processing
1934
- for sentence, paragraph in self._process_sentences(doc, skip_empty=True, skip_headings=False):
1935
- for pattern_config in spacing_patterns:
1936
- compiled_pattern = re.compile(pattern_config.pattern)
1937
-
1938
- # Find the corresponding category for this pattern
1939
- for pattern_key, (category_name, category_list) in pattern_categories.items():
1940
- if pattern_config.pattern == pattern_key:
1941
- matches = compiled_pattern.finditer(sentence)
1942
- for match in matches:
1943
- category_list.append({
1944
- 'text': match.group(),
1945
- 'sentence': sentence.strip(),
1946
- 'description': pattern_config.description
1947
- })
 
 
 
 
 
1948
 
1949
- # Use the helper to compile issues
1950
- issues = self._compile_issues(issue_groups, category_descriptions)
1951
-
1952
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
1953
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1954
  @profile_performance
1955
  def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
1956
  """Check for abbreviation consistency after first definition."""
@@ -1992,303 +1942,6 @@ class FAADocumentChecker(DocumentChecker):
1992
  success = len(inconsistent_uses) == 0
1993
  return DocumentCheckResult(success=success, issues=inconsistent_uses)
1994
 
1995
- @profile_performance
1996
- def check_placeholders(self, doc: List[str]) -> DocumentCheckResult:
1997
- """Check for placeholders that should be removed."""
1998
- def process_placeholders(doc: List[str], patterns: List[PatternConfig]) -> DocumentCheckResult:
1999
- tbd_placeholders = []
2000
- to_be_determined_placeholders = []
2001
- to_be_added_placeholders = []
2002
-
2003
- pattern_categories = {
2004
- r'\bTBD\b': ('tbd', tbd_placeholders),
2005
- r'\bTo be determined\b': ('to_be_determined', to_be_determined_placeholders),
2006
- r'\bTo be added\b': ('to_be_added', to_be_added_placeholders)
2007
- }
2008
-
2009
- # Use _process_sentences helper
2010
- for sentence, paragraph in self._process_sentences(doc, skip_empty=True, skip_headings=True):
2011
- for pattern_config in patterns:
2012
- compiled_pattern = re.compile(pattern_config.pattern, re.IGNORECASE)
2013
-
2014
- for pattern_key, (category_name, category_list) in pattern_categories.items():
2015
- if pattern_config.pattern == pattern_key:
2016
- matches = compiled_pattern.finditer(sentence)
2017
- for match in matches:
2018
- category_list.append({
2019
- 'placeholder': match.group().strip(),
2020
- 'sentence': sentence.strip(),
2021
- 'description': pattern_config.description
2022
- })
2023
-
2024
- # Compile issues
2025
- issues = []
2026
- if tbd_placeholders:
2027
- issues.append({
2028
- 'issue_type': 'tbd_placeholder',
2029
- 'description': 'Remove TBD placeholder',
2030
- 'occurrences': tbd_placeholders
2031
- })
2032
-
2033
- if to_be_determined_placeholders:
2034
- issues.append({
2035
- 'issue_type': 'to_be_determined_placeholder',
2036
- 'description': "Remove 'To be determined' placeholder",
2037
- 'occurrences': to_be_determined_placeholders
2038
- })
2039
-
2040
- if to_be_added_placeholders:
2041
- issues.append({
2042
- 'issue_type': 'to_be_added_placeholder',
2043
- 'description': "Remove 'To be added' placeholder",
2044
- 'occurrences': to_be_added_placeholders
2045
- })
2046
-
2047
- details = {
2048
- 'total_placeholders': len(tbd_placeholders) +
2049
- len(to_be_determined_placeholders) +
2050
- len(to_be_added_placeholders),
2051
- 'placeholder_types': {
2052
- 'TBD': len(tbd_placeholders),
2053
- 'To be determined': len(to_be_determined_placeholders),
2054
- 'To be added': len(to_be_added_placeholders)
2055
- }
2056
- }
2057
-
2058
- return DocumentCheckResult(success=len(issues) == 0, issues=issues, details=details)
2059
-
2060
- return self._process_patterns(doc, 'placeholders', process_placeholders)
2061
-
2062
- @profile_performance
2063
- def _process_patterns(
2064
- self,
2065
- doc: List[str],
2066
- pattern_category: str,
2067
- process_func: Optional[Callable] = None
2068
- ) -> DocumentCheckResult:
2069
- """
2070
- Process document text against patterns from a specific category.
2071
-
2072
- Args:
2073
- doc: List of document paragraphs
2074
- pattern_category: Category of patterns to check against
2075
- process_func: Optional custom processing function
2076
-
2077
- Returns:
2078
- DocumentCheckResult with processed issues
2079
- """
2080
- if not self.validate_input(doc):
2081
- self.logger.error("Invalid document input for pattern check")
2082
- return DocumentCheckResult(
2083
- success=False,
2084
- issues=[{'error': 'Invalid document input'}]
2085
- )
2086
-
2087
- # Get patterns from registry
2088
- patterns = self.config_manager.pattern_registry.get(pattern_category, [])
2089
- if not patterns:
2090
- self.logger.warning(f"No patterns found for category: {pattern_category}")
2091
- return DocumentCheckResult(
2092
- success=True,
2093
- issues=[],
2094
- details={'message': f'No patterns defined for {pattern_category}'}
2095
- )
2096
-
2097
- # Use custom processing function if provided, otherwise use default
2098
- if process_func:
2099
- return process_func(doc, patterns)
2100
-
2101
- # Default processing
2102
- issues = []
2103
- for paragraph in doc:
2104
- sentences = re.split(r'(?<=[.!?])\s+', paragraph)
2105
- for sentence in sentences:
2106
- sentence = sentence.strip()
2107
- if not sentence:
2108
- continue
2109
-
2110
- for pattern_config in patterns:
2111
- matches = list(re.finditer(pattern_config.pattern, sentence))
2112
- if matches:
2113
- issues.append({
2114
- 'pattern': pattern_config.pattern,
2115
- 'description': pattern_config.description,
2116
- 'sentence': sentence,
2117
- 'matches': [m.group() for m in matches]
2118
- })
2119
-
2120
- return DocumentCheckResult(success=len(issues) == 0, issues=issues)
2121
-
2122
- def _format_colored_text(self, text: str, color: str) -> str:
2123
- """Helper method to format colored text with reset.
2124
-
2125
- Args:
2126
- text: The text to be colored
2127
- color: The color to apply (from colorama.Fore)
2128
-
2129
- Returns:
2130
- str: The colored text with reset styling
2131
- """
2132
- return f"{color}{text}{Style.RESET_ALL}"
2133
-
2134
- def _format_example(self, example_fix: Dict[str, str]) -> List[str]:
2135
- """Format example fixes consistently.
2136
-
2137
- Args:
2138
- example_fix: Dictionary containing 'before' and 'after' examples
2139
-
2140
- Returns:
2141
- List[str]: Formatted example lines
2142
- """
2143
- return [
2144
- f" ❌ Incorrect: {example_fix['before']}",
2145
- f" ✓ Correct: {example_fix['after']}"
2146
- ]
2147
-
2148
- def _format_heading_issues(self, result: DocumentCheckResult, doc_type: str) -> List[str]:
2149
- """Format heading check issues consistently."""
2150
- output = []
2151
-
2152
- for issue in result.issues:
2153
- if issue.get('type') == 'missing_headings':
2154
- missing = sorted(issue['missing'])
2155
- output.append(f"\n Missing Required Headings for {doc_type}:")
2156
- for heading in missing:
2157
- output.append(f" • {heading}")
2158
- elif issue.get('type') == 'unexpected_headings':
2159
- unexpected = sorted(issue['unexpected'])
2160
- output.append(f"\n Unexpected Headings Found:")
2161
- for heading in unexpected:
2162
- output.append(f" • {heading}")
2163
-
2164
- return output
2165
-
2166
- def _format_period_issues(self, result: DocumentCheckResult) -> List[str]:
2167
- """Format period check issues consistently."""
2168
- output = []
2169
-
2170
- if result.issues:
2171
- output.append(f"\n Heading Period Format Issues:")
2172
- for issue in result.issues:
2173
- if 'message' in issue:
2174
- output.append(f" • {issue['message']}")
2175
-
2176
- return output
2177
-
2178
- def _format_caption_issues(self, issues: List[Dict], doc_type: str) -> List[str]:
2179
- """Format caption check issues with clear replacement instructions."""
2180
- formatted_issues = []
2181
- for issue in issues:
2182
- if 'incorrect_caption' in issue:
2183
- caption_parts = issue['incorrect_caption'].split()
2184
- if len(caption_parts) >= 2:
2185
- caption_type = caption_parts[0] # "Table" or "Figure"
2186
- number = caption_parts[1]
2187
-
2188
- # Determine correct format based on document type
2189
- if doc_type in ["Advisory Circular", "Order"]:
2190
- if '-' not in number:
2191
- correct_format = f"{caption_type} {number}-1"
2192
- else:
2193
- if '-' in number:
2194
- correct_format = f"{caption_type} {number.split('-')[0]}"
2195
- else:
2196
- correct_format = issue['incorrect_caption']
2197
-
2198
- formatted_issues.append(
2199
- f" • Replace '{issue['incorrect_caption']}' with '{correct_format}'"
2200
- )
2201
-
2202
- return formatted_issues
2203
-
2204
- def _format_reference_issues(self, result: DocumentCheckResult) -> List[str]:
2205
- """Format reference-related issues with clear replacement instructions."""
2206
- output = []
2207
-
2208
- if result.issues:
2209
- for issue in result.issues:
2210
- if 'reference' in issue and 'correct_form' in issue:
2211
- output.append(f" • Replace '{issue['reference']}' with '{issue['correct_form']}'")
2212
-
2213
- return output
2214
-
2215
- def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
2216
- """Format standard issues consistently."""
2217
- if isinstance(issue, str):
2218
- return f" • {issue}"
2219
-
2220
- if 'incorrect' in issue and 'correct' in issue:
2221
- return f" • Replace '{issue['incorrect']}' with '{issue['correct']}'"
2222
-
2223
- if 'incorrect_term' in issue and 'correct_term' in issue:
2224
- return f" • Replace '{issue['incorrect_term']}' with '{issue['correct_term']}'"
2225
-
2226
- if 'sentence' in issue:
2227
- return f" • {issue['sentence']}"
2228
-
2229
- if 'description' in issue:
2230
- return f" • {issue['description']}"
2231
-
2232
- # Fallback for other issue formats
2233
- return f" • {str(issue)}"
2234
-
2235
- def _format_unused_acronym_issues(self, result: DocumentCheckResult) -> List[str]:
2236
- """Format unused acronym issues with a simple, clear message.
2237
-
2238
- Args:
2239
- result: DocumentCheckResult containing acronym issues
2240
-
2241
- Returns:
2242
- List[str]: Formatted list of unused acronym issues
2243
- """
2244
- formatted_issues = []
2245
-
2246
- if result.issues:
2247
- for issue in result.issues:
2248
- if isinstance(issue, dict) and 'acronym' in issue:
2249
- formatted_issues.append(f" • Acronym '{issue['acronym']}' was defined but never used.")
2250
- elif isinstance(issue, str):
2251
- # Handle case where issue might be just the acronym
2252
- formatted_issues.append(f" • Acronym '{issue}' was defined but never used.")
2253
-
2254
- return formatted_issues
2255
-
2256
- def _format_parentheses_issues(self, result: DocumentCheckResult) -> List[str]:
2257
- """Format parentheses issues with clear instructions for fixing."""
2258
- formatted_issues = []
2259
-
2260
- if result.issues:
2261
- for issue in result.issues:
2262
- if issue['type'] == 'missing_opening':
2263
- formatted_issues.append(
2264
- f" • Paragraph {issue['paragraph']}: {issue['message']}"
2265
- )
2266
- elif issue['type'] == 'missing_closing':
2267
- formatted_issues.append(
2268
- f" • Paragraph {issue['paragraph']}: {issue['message']}"
2269
- )
2270
-
2271
- return formatted_issues
2272
-
2273
- def _format_section_symbol_issues(self, result: DocumentCheckResult) -> List[str]:
2274
- """Format section symbol issues with clear replacement instructions."""
2275
- formatted_issues = []
2276
-
2277
- if result.issues:
2278
- for issue in result.issues:
2279
- if 'incorrect' in issue and 'correct' in issue:
2280
- if issue.get('is_sentence_start'):
2281
- formatted_issues.append(
2282
- f" • Do not begin sentences with the section symbol. "
2283
- f"Replace '{issue['incorrect']}' with '{issue['correct']}' at the start of the sentence"
2284
- )
2285
- else:
2286
- formatted_issues.append(
2287
- f" • Replace '{issue['incorrect']}' with '{issue['correct']}'"
2288
- )
2289
-
2290
- return formatted_issues
2291
-
2292
  class DocumentCheckResultsFormatter:
2293
  """Formats document check results in a user-friendly way with detailed examples and fixes."""
2294
 
@@ -2375,7 +2028,7 @@ class DocumentCheckResultsFormatter:
2375
  'description': 'Analyzes document spacing patterns to ensure compliance with FAA formatting standards. This includes checking for proper spacing around regulatory references (like "AC 25-1" not "AC25-1"), section symbols (§ 25.1), paragraph references, and multiple spaces between words.',
2376
  'solution': 'Fix spacing issues: remove any missing spaces, double spaces, or inadvertent tabs.',
2377
  'example_fix': {
2378
- 'before': 'AC25.25 states that SFAR88 and §25.981 require... (note double space before SFAR88)',
2379
  'after': 'AC 25.25 states that SFAR 88 and § 25.981 require...'
2380
  }
2381
  },
@@ -2384,8 +2037,8 @@ class DocumentCheckResultsFormatter:
2384
  'description': 'Examines all date references in your document. The check automatically excludes technical reference numbers that may look like dates to ensure accurate validation of true date references. Note, though, there might be instances in the heading of the document where the date is formatted as "MM/DD/YYYY", which is acceptable. This applies mostly to date formats within the document body.',
2385
  'solution': 'Use the format "Month Day, Year" where appropriate.',
2386
  'example_fix': {
2387
- 'before': '01/15/2024 | 2024-01-15 | 15 January 2024 | January 15th, 2024',
2388
- 'after': 'January 15, 2024'
2389
  }
2390
  },
2391
  'placeholders_check': {
@@ -2393,8 +2046,8 @@ class DocumentCheckResultsFormatter:
2393
  'description': 'Identifies incomplete content and temporary placeholders that must be finalized before document publication. This includes common placeholder text (like "TBD" or "To be determined"), draft markers, and incomplete sections.',
2394
  'solution': 'Replace all placeholder content with actual content',
2395
  'example_fix': {
2396
- 'before': 'TBD | To be determined | [Insert text] | [Pending review] | To be added',
2397
- 'after': 'Actual, specific content relevant to the section\'s purpose'
2398
  }
2399
  },
2400
  'parentheses_check': {
@@ -2451,7 +2104,7 @@ class DocumentCheckResultsFormatter:
2451
  for heading in unexpected:
2452
  output.append(f" • {heading}")
2453
 
2454
- return output
2455
 
2456
  def _format_period_issues(self, result: DocumentCheckResult) -> List[str]:
2457
  """Format period check issues consistently."""
@@ -2507,6 +2160,9 @@ class DocumentCheckResultsFormatter:
2507
  if isinstance(issue, str):
2508
  return f" • {issue}"
2509
 
 
 
 
2510
  if 'incorrect_term' in issue and 'correct_term' in issue:
2511
  return f" • Replace '{issue['incorrect_term']}' with '{issue['correct_term']}'"
2512
 
@@ -2546,14 +2202,7 @@ class DocumentCheckResultsFormatter:
2546
 
2547
  if result.issues:
2548
  for issue in result.issues:
2549
- if issue['type'] == 'missing_opening':
2550
- formatted_issues.append(
2551
- f" • Paragraph {issue['paragraph']}: {issue['message']}"
2552
- )
2553
- elif issue['type'] == 'missing_closing':
2554
- formatted_issues.append(
2555
- f" • Paragraph {issue['paragraph']}: {issue['message']}"
2556
- )
2557
 
2558
  return formatted_issues
2559
 
 
42
 
43
  TEMPLATE_TYPES = ["Short AC template AC", "Long AC template AC"]
44
 
 
45
  HEADING_WORDS = {
46
+ 'APPLICABILITY', 'APPENDIX', 'AUTHORITY', 'BACKGROUND', 'CANCELLATION', 'CAUTION',
47
+ 'CHAPTER', 'CONCLUSION', 'DEPARTMENT', 'DEFINITION', 'DEFINITIONS', 'DISCUSSION',
48
+ 'DISTRIBUTION', 'EXCEPTION', 'EXPLANATION', 'FIGURE', 'GENERAL', 'GROUPS',
49
+ 'INFORMATION', 'INSERT', 'INTRODUCTION', 'MATERIAL', 'NOTE', 'PARTS', 'PAST',
50
+ 'POLICY', 'PRACTICE', 'PROCEDURES', 'PURPOSE', 'RELEVANT', 'RELATED',
51
+ 'REQUIREMENTS', 'SCOPE', 'SECTION', 'SUMMARY', 'TABLE', 'WARNING'
 
52
  }
53
 
54
  PREDEFINED_ACRONYMS = {
55
+ 'AGC', 'AIR', 'CFR', 'DC', 'DOT', 'FAA IR-M', 'FAQ', 'i.e.', 'e.g.', 'MA',
56
+ 'MD', 'MIL', 'MO', 'No.', 'PDF', 'SSN', 'TX', 'U.S.', 'U.S.C.', 'USA', 'US',
57
+ 'WA', 'ZIP'
58
  }
59
 
60
  # Configuration Constants
 
462
  ),
463
  PatternConfig(
464
  pattern=r'\bUSC\b',
465
+ description="USC should be U.S.C.", # Per GPO Style Manual
466
  is_error=True,
467
  replacement="U.S.C."
468
  ),
469
  PatternConfig(
470
  pattern=r'\bCFR Part\b',
471
+ description="CFR Part should be CFR part (lowercase)", # Per FAA Order 1320.46
472
  is_error=True,
473
  replacement="CFR part"
474
  ),
475
  PatternConfig(
476
  pattern=r'\bC\.F\.R\.\b',
477
+ description="C.F.R. should be CFR", # GPO Style Manual
478
  is_error=True,
479
  replacement="CFR"
480
  ),
 
492
  ),
493
  PatternConfig(
494
  pattern=r'\bcancelled\b',
495
+ description="'cancelled' should be 'canceled'", # Per GPO Style Manual
496
  is_error=True,
497
  replacement="canceled"
498
  ),
499
  PatternConfig(
500
  pattern=r'\bshall\b',
501
+ description="'shall' should be 'must'", # Per FAA Order 1320.46
502
  is_error=True,
503
  replacement="must"
504
  ),
505
  PatternConfig(
506
  pattern=r'\b\&\b',
507
+ description="'&' should be 'and'", # Per April 17, 2024 Use ampersand instead or 'and' email from Judith Watson
508
  is_error=True,
509
  replacement="and"
510
  ),
511
  PatternConfig(
512
  pattern=r'\bflight crew\b',
513
+ description="'flight crew' should be 'flightcrew'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
514
  is_error=True,
515
  replacement="flightcrew"
516
  ),
517
  PatternConfig(
518
  pattern=r'\bchairman\b',
519
+ description="'chairman' should be 'chair'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
520
  is_error=True,
521
  replacement="chair"
522
  ),
523
  PatternConfig(
524
  pattern=r'\bflagman\b',
525
+ description="'flagman' should be 'flagger' or 'flagperson'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
526
  is_error=True,
527
  replacement="flagperson"
528
  ),
529
  PatternConfig(
530
  pattern=r'\bman\b',
531
+ description="'man' should be 'individual' or 'person'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
532
  is_error=True,
533
  replacement="person"
534
  ),
535
  PatternConfig(
536
  pattern=r'\bmanmade\b',
537
+ description="'manmade' should be 'personmade'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
538
  is_error=True,
539
  replacement="personmade"
540
  ),
541
  PatternConfig(
542
  pattern=r'\bmanpower\b',
543
+ description="'manpower' should be 'labor force'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
544
  is_error=True,
545
  replacement="labor force"
546
  ),
547
  PatternConfig(
548
  pattern=r'\bnotice to airman\b',
549
+ description="'notice to airman' should be 'notice to air missions'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
550
  is_error=True,
551
  replacement="notice to air missions"
552
  ),
553
  PatternConfig(
554
  pattern=r'\bnotice to airmen\b',
555
+ description="'notice to airmen' should be 'notice to air missions'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
556
  is_error=True,
557
  replacement="notice to air missions"
558
  ),
559
  PatternConfig(
560
  pattern=r'\bcockpit\b',
561
+ description="'cockpit' should be 'flight deck'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
562
  is_error=True,
563
  replacement="flight deck"
564
  ),
565
  PatternConfig(
566
  pattern=r'\bA321 neo\b',
567
+ description="'A321 neo' should be 'A321neo'", # Per TCDS
568
  is_error=True,
569
  replacement="A321neo"
570
  )
 
572
  'section_symbol': [
573
  PatternConfig(
574
  pattern=r'^§',
575
+ description="Don't start a sentence with the section symbol. Write out 'Section'",
576
  is_error=True
577
  ),
578
  PatternConfig(
 
598
  ],
599
  'spacing': [
600
  PatternConfig(
601
+ pattern=r'([^\s]+)[ ]{2,}([^\s]+)', # Capture words before and after double space
602
+ description="Remove double spacing between '{0}' and '{1}'",
603
  is_error=True
604
  ),
605
  PatternConfig(
606
+ pattern=r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*[A-Z]?)', # Capture doc type and number
607
+ description="Add space between '{0}' and '{1}'",
608
  is_error=True
609
  ),
610
  PatternConfig(
611
+ pattern=r'(§|§§)(\d+\.\d+)', # Removed (?<!\s) to catch all section symbols
612
+ description="Add space after '{0}' before '{1}'",
613
  is_error=True
614
  ),
615
  PatternConfig(
616
+ pattern=r'(?<!\s)(Part)(\d+)', # Capture 'Part' and number
617
+ description="Add space between '{0}' and '{1}'",
 
 
 
 
 
618
  is_error=True
619
  )
620
  ],
 
1072
 
1073
  @profile_performance
1074
  def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
1075
+ """Check document terminology and output only unique term replacements needed."""
 
 
1076
  if not self.validate_input(doc):
1077
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1078
 
1079
  terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
1080
  prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
1081
 
1082
+ unique_issues = set() # Using a set to avoid duplicate replacements
1083
 
1084
  # Process each sentence
1085
  for paragraph in doc:
 
1089
  if not sentence:
1090
  continue
1091
 
1092
+ # Check terminology patterns
 
1093
  for pattern_config in terminology_patterns:
1094
  matches = list(re.finditer(pattern_config.pattern, sentence))
1095
  for match in matches:
1096
+ if pattern_config.replacement: # Only if there's a replacement term
1097
+ unique_issues.add((match.group(), pattern_config.replacement))
 
 
 
 
1098
 
1099
+ # Check prohibited patterns
1100
  for pattern_config in prohibited_patterns:
1101
  if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
1102
+ if pattern_config.replacement: # Only if there's a replacement term
1103
+ match_text = re.search(pattern_config.pattern, sentence, re.IGNORECASE).group()
1104
+ unique_issues.add((match_text, pattern_config.replacement))
 
 
 
 
 
 
 
1105
 
1106
+ # Format issues as simple replacement instructions
1107
+ formatted_issues = [
1108
+ {'incorrect_term': incorrect, 'correct_term': correct}
1109
+ for incorrect, correct in sorted(unique_issues) # Sort for consistent output
1110
+ ]
 
 
 
 
 
 
 
 
1111
 
1112
+ return DocumentCheckResult(success=not formatted_issues, issues=formatted_issues)
1113
 
1114
  @profile_performance
1115
  def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
 
1141
  matches = compiled_pattern.finditer(sentence)
1142
  for match in matches:
1143
  incorrect = match.group()
1144
+ # Remove § symbol without adding 'Section'
1145
+ correct = incorrect.replace('§ ', '')
1146
  issues.append({
1147
  'incorrect': incorrect,
1148
  'correct': correct
 
1376
  if not self.validate_input(doc):
1377
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1378
 
1379
+ issues = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1380
 
1381
+ try:
1382
+ for paragraph in doc:
1383
+ # Skip empty paragraphs
1384
+ if not paragraph.strip():
1385
+ continue
1386
+
1387
+ # Skip paragraphs with tabs
1388
+ if '\t' in paragraph:
1389
+ continue
1390
+
1391
+ # Check for multiple spaces between words, but ignore spaces around parentheses
1392
+ # First, temporarily replace valid parenthetical patterns to protect them
1393
+ working_text = paragraph
1394
 
1395
+ # Protect common regulatory reference patterns
1396
+ patterns_to_ignore = [
1397
+ r'\d+\(\d+\)\([a-z]\)', # matches patterns like 25(1)(a)
1398
+ r'\d+\([a-z]\)', # matches patterns like 25(a)
1399
+ r'\([a-z]\)\(\d+\)', # matches patterns like (a)(1)
1400
+ r'\(\d+\)\([a-z]\)', # matches patterns like (1)(a)
1401
+ r'\([a-z]\)', # matches single letter references like (a)
1402
+ r'\(\d+\)', # matches number references like (1)
1403
+ ]
1404
+
1405
+ for pattern in patterns_to_ignore:
1406
+ working_text = re.sub(pattern, lambda m: 'PROTECTED' + str(hash(m.group())), working_text)
1407
+
1408
+ # Now check for multiple spaces
1409
+ matches = re.finditer(r'[ ]{2,}', working_text)
1410
+ for match in matches:
1411
+ issues.append({
1412
+ 'incorrect': match.group(),
1413
+ 'context': paragraph.strip(),
1414
+ 'description': "Remove extra spaces"
1415
+ })
1416
+
1417
+ except Exception as e:
1418
+ self.logger.error(f"Error in spacing check: {str(e)}")
1419
+ return DocumentCheckResult(success=False, issues=[{'error': f'Spacing check failed: {str(e)}'}])
1420
 
 
 
 
1421
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
1422
 
1423
+ def _format_spacing_issues(self, result: DocumentCheckResult) -> List[str]:
1424
+ """Format spacing issues with clear instructions for fixing."""
1425
+ formatted_issues = []
1426
+
1427
+ if result.issues:
1428
+ for issue in result.issues:
1429
+ if 'error' in issue:
1430
+ formatted_issues.append(f" • {issue['error']}")
1431
+ else:
1432
+ formatted_issues.append(
1433
+ f" • {issue['description']} in: \"{issue['context']}\""
1434
+ )
1435
+
1436
+ return formatted_issues
1437
+
1438
  @profile_performance
1439
  def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
1440
  """Check for abbreviation consistency after first definition."""
 
1633
  details={'message': f'No patterns defined for {pattern_category}'}
1634
  )
1635
 
1636
+ # Use custom processing function if provided
1637
  if process_func:
1638
  return process_func(doc, patterns)
1639
 
1640
+ # Default processing with deduplication
1641
+ unique_issues = set() # Using a set to track unique issues
1642
+
1643
  for paragraph in doc:
1644
  sentences = re.split(r'(?<=[.!?])\s+', paragraph)
1645
  for sentence in sentences:
 
1650
  for pattern_config in patterns:
1651
  matches = list(re.finditer(pattern_config.pattern, sentence))
1652
  if matches:
1653
+ # Add each match as a tuple to ensure uniqueness
1654
+ for match in matches:
1655
+ unique_issues.add((
1656
+ match.group(), # The matched text
1657
+ pattern_config.description, # The issue description
1658
+ pattern_config.replacement if hasattr(pattern_config, 'replacement') else None
1659
+ ))
1660
 
1661
+ # Convert unique issues back to the expected format
1662
+ formatted_issues = [
1663
+ {
1664
+ 'incorrect': issue[0],
1665
+ 'description': issue[1],
1666
+ 'replacement': issue[2]
1667
+ }
1668
+ for issue in sorted(unique_issues) # Sort for consistent output
1669
+ ]
1670
+
1671
+ return DocumentCheckResult(success=len(formatted_issues) == 0, issues=formatted_issues)
1672
 
1673
  def run_all_checks(self, doc_path: str, doc_type: str, template_type: Optional[str] = None) -> Dict[str, DocumentCheckResult]:
1674
  """
 
1797
 
1798
  return sentences
1799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1800
  @profile_performance
1801
  def check_parentheses(self, doc: List[str]) -> DocumentCheckResult:
1802
  """
1803
  Check for matching parentheses in the document.
1804
+
1805
  Args:
1806
  doc (List[str]): List of document paragraphs
1807
+
1808
  Returns:
1809
  DocumentCheckResult: Result containing any mismatched parentheses issues
1810
  """
 
1814
  issues = []
1815
 
1816
  for i, paragraph in enumerate(doc, 1):
1817
+ if not paragraph.strip(): # Skip empty paragraphs
 
1818
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1819
 
1820
+ stack = [] # Track unmatched opening parentheses
1821
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph) # Split paragraph into sentences
1822
+ for sentence in sentences:
1823
+ for j, char in enumerate(sentence):
1824
+ if char == '(':
1825
+ stack.append((sentence, j)) # Store sentence and character position
1826
+ elif char == ')':
1827
+ if stack:
1828
+ stack.pop() # Remove matching opening parenthesis
1829
+ else:
1830
+ # No matching opening parenthesis
1831
+ issues.append({
1832
+ 'type': 'missing_opening',
1833
+ 'paragraph': i, # Still tracked but not included in the message
1834
+ 'position': j,
1835
+ 'text': sentence,
1836
+ 'message': f"Add an opening parenthesis to the sentence: \"{sentence.strip()}\""
1837
+ })
1838
+
1839
+ # Check for any unmatched opening parentheses left in the stack
1840
+ for unmatched in stack:
1841
+ sentence, pos = unmatched
1842
  issues.append({
1843
  'type': 'missing_closing',
1844
+ 'paragraph': i, # Still tracked but not included in the message
1845
  'position': pos,
1846
+ 'text': sentence,
1847
+ 'message': f"Add a closing parenthesis to the sentence: \"{sentence.strip()}\""
1848
  })
1849
 
1850
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
 
1855
  if not self.validate_input(doc):
1856
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
1857
 
 
1858
  spacing_patterns = self.config_manager.pattern_registry.get('spacing', [])
1859
+ issues = []
1860
 
1861
+ try:
1862
+ for paragraph in doc:
1863
+ if not paragraph.strip() or '\t' in paragraph:
1864
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1865
 
1866
+ for pattern_config in spacing_patterns:
1867
+ matches = re.finditer(pattern_config.pattern, paragraph)
1868
+ for match in matches:
1869
+ groups = match.groups()
1870
+ description = pattern_config.description.replace('{0}', groups[0]).replace('{1}', groups[1])
1871
+
1872
+ context_start = max(0, match.start() - 20)
1873
+ context_end = min(len(paragraph), match.end() + 20)
1874
+ context = paragraph[context_start:context_end].strip()
1875
+
1876
+ issues.append({
1877
+ 'type': 'spacing',
1878
+ 'incorrect': match.group(),
1879
+ 'context': context,
1880
+ 'description': description
1881
+ })
1882
+
1883
+ except Exception as e:
1884
+ self.logger.error(f"Error in spacing check: {str(e)}")
1885
+ return DocumentCheckResult(success=False, issues=[{'error': f'Spacing check failed: {str(e)}'}])
1886
 
 
 
 
1887
  return DocumentCheckResult(success=len(issues) == 0, issues=issues)
1888
 
1889
+ def _format_spacing_issues(self, result: DocumentCheckResult) -> List[str]:
1890
+ """Format spacing issues with clear instructions for fixing."""
1891
+ formatted_issues = []
1892
+
1893
+ if result.issues:
1894
+ for issue in result.issues:
1895
+ if 'error' in issue:
1896
+ formatted_issues.append(f" • {issue['error']}")
1897
+ else:
1898
+ formatted_issues.append(
1899
+ f" • {issue['description']} in: \"{issue['context']}\""
1900
+ )
1901
+
1902
+ return formatted_issues
1903
+
1904
  @profile_performance
1905
  def check_abbreviation_usage(self, doc: List[str]) -> DocumentCheckResult:
1906
  """Check for abbreviation consistency after first definition."""
 
1942
  success = len(inconsistent_uses) == 0
1943
  return DocumentCheckResult(success=success, issues=inconsistent_uses)
1944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1945
  class DocumentCheckResultsFormatter:
1946
  """Formats document check results in a user-friendly way with detailed examples and fixes."""
1947
 
 
2028
  'description': 'Analyzes document spacing patterns to ensure compliance with FAA formatting standards. This includes checking for proper spacing around regulatory references (like "AC 25-1" not "AC25-1"), section symbols (§ 25.1), paragraph references, and multiple spaces between words.',
2029
  'solution': 'Fix spacing issues: remove any missing spaces, double spaces, or inadvertent tabs.',
2030
  'example_fix': {
2031
+ 'before': 'AC25.25 states that SFAR88 and §25.981 require...',
2032
  'after': 'AC 25.25 states that SFAR 88 and § 25.981 require...'
2033
  }
2034
  },
 
2037
  'description': 'Examines all date references in your document. The check automatically excludes technical reference numbers that may look like dates to ensure accurate validation of true date references. Note, though, there might be instances in the heading of the document where the date is formatted as "MM/DD/YYYY", which is acceptable. This applies mostly to date formats within the document body.',
2038
  'solution': 'Use the format "Month Day, Year" where appropriate.',
2039
  'example_fix': {
2040
+ 'before': 'This policy statement cancels Policy Statement PS-AIR100-2006-MMPDS, dated 7/25/2006.',
2041
+ 'after': 'This policy statement cancels Policy Statement PS-AIR100-2006-MMPDS, dated July 25, 2006.'
2042
  }
2043
  },
2044
  'placeholders_check': {
 
2046
  'description': 'Identifies incomplete content and temporary placeholders that must be finalized before document publication. This includes common placeholder text (like "TBD" or "To be determined"), draft markers, and incomplete sections.',
2047
  'solution': 'Replace all placeholder content with actual content',
2048
  'example_fix': {
2049
+ 'before': 'Pilots must submit the [Insert text] form to the FAA for approval.',
2050
+ 'after': 'Pilots must submit the Report of Eye Evaluation form 8500-7 to the FAA for approval.'
2051
  }
2052
  },
2053
  'parentheses_check': {
 
2104
  for heading in unexpected:
2105
  output.append(f" • {heading}")
2106
 
2107
+ return output
2108
 
2109
  def _format_period_issues(self, result: DocumentCheckResult) -> List[str]:
2110
  """Format period check issues consistently."""
 
2160
  if isinstance(issue, str):
2161
  return f" • {issue}"
2162
 
2163
+ if 'incorrect' in issue and 'correct' in issue:
2164
+ return f" • Replace '{issue['incorrect']}' with '{issue['correct']}'"
2165
+
2166
  if 'incorrect_term' in issue and 'correct_term' in issue:
2167
  return f" • Replace '{issue['incorrect_term']}' with '{issue['correct_term']}'"
2168
 
 
2202
 
2203
  if result.issues:
2204
  for issue in result.issues:
2205
+ formatted_issues.append(f" • {issue['message']}")
 
 
 
 
 
 
 
2206
 
2207
  return formatted_issues
2208