Hoctar77 commited on
Commit
7a044f4
·
verified ·
1 Parent(s): 023f4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -19
app.py CHANGED
@@ -84,7 +84,7 @@ DOCUMENT_FORMATTING_RULES = {
84
  "types": ["Advisory Circular"],
85
  "italics": True,
86
  "quotes": False,
87
- "description": "For Advisory Circulars, referenced document titles should be italicized but not quoted",
88
  "example": "See AC 20-135, *Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria* for information on X."
89
  },
90
  "quotes_only": {
@@ -95,14 +95,14 @@ DOCUMENT_FORMATTING_RULES = {
95
  ],
96
  "italics": False,
97
  "quotes": True,
98
- "description": "For this document type, referenced document titles should be in quotes without italics",
99
  "example": 'See AC 20-135, "Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria" for information on X.'
100
  },
101
  "no_formatting": {
102
  "types": ["Policy Statement", "Other"],
103
  "italics": False,
104
  "quotes": False,
105
- "description": "For this document type, referenced document titles should not use italics or quotes",
106
  "example": "See AC 20-135, Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria for information on X."
107
  }
108
  }
@@ -510,6 +510,54 @@ class DocumentCheckerConfig:
510
  description="'flight crew' should be 'flightcrew'",
511
  is_error=True,
512
  replacement="flightcrew"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  )
514
  ],
515
  'section_symbol': [
@@ -945,7 +993,13 @@ class FAADocumentChecker(DocumentChecker):
945
  @profile_performance
946
  def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
947
  """
948
- Check document terminology and output only the sentences needing correction.
 
 
 
 
 
 
949
  """
950
  if not self.validate_input(doc):
951
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
@@ -954,28 +1008,67 @@ class FAADocumentChecker(DocumentChecker):
954
  terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
955
  prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
956
 
957
- # Collect sentences with issues
958
- issue_sentences = []
 
959
 
960
- # Check each paragraph for terminology and prohibited patterns
961
  for paragraph in doc:
962
  sentences = re.split(r'(?<=[.!?])\s+', paragraph)
963
  for sentence in sentences:
 
 
 
 
 
 
 
 
964
  # Check for incorrect terms that need replacement
965
  for pattern_config in terminology_patterns:
966
- if re.search(pattern_config.pattern, sentence):
967
- issue_sentences.append(sentence.strip())
 
 
 
 
 
 
968
 
969
  # Check for prohibited phrases and constructions
970
  for pattern_config in prohibited_patterns:
971
  if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
972
- issue_sentences.append(sentence.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
 
974
- # Remove duplicates for cleaner output
975
- issue_sentences = list(set(issue_sentences))
976
 
977
- # Return only the minimal list of issue sentences
978
- return DocumentCheckResult(success=not issue_sentences, issues=issue_sentences)
979
 
980
  @profile_performance
981
  def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
@@ -1830,6 +1923,24 @@ class DocumentCheckResultsFormatter:
1830
  def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
1831
  """Format a standard issue consistently."""
1832
  if isinstance(issue, dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1833
  # Handle issues with occurrences list
1834
  if 'occurrences' in issue:
1835
  # Format the first 3 occurrences
@@ -1871,9 +1982,10 @@ class DocumentCheckResultsFormatter:
1871
  )
1872
 
1873
  # Handle terminology issues
1874
- elif 'incorrect_term' in issue and 'correct_term' in issue:
1875
  return textwrap.fill(
1876
- f" • '{issue['incorrect_term']}' should be '{issue['correct_term']}' in: {issue.get('sentence', '')}",
 
1877
  width=76,
1878
  subsequent_indent=' '
1879
  )
@@ -1966,7 +2078,7 @@ class DocumentCheckResultsFormatter:
1966
  "types": ["Advisory Circular"],
1967
  "italics": True,
1968
  "quotes": False,
1969
- "description": "For Advisory Circulars, referenced document titles should be italicized but not quoted.",
1970
  "example": "See AC 25.1309-1B, *System Design and Analysis*, for information on X."
1971
  },
1972
  "quotes_only": {
@@ -1977,14 +2089,14 @@ class DocumentCheckResultsFormatter:
1977
  ],
1978
  "italics": False,
1979
  "quotes": True,
1980
- "description": "For this document type, referenced document titles should be in quotes without italics.",
1981
  "example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
1982
  },
1983
  "no_formatting": {
1984
  "types": ["Policy Statement", "Other"],
1985
  "italics": False,
1986
  "quotes": False,
1987
- "description": "For this document type, referenced document titles should not use italics or quotes.",
1988
  "example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
1989
  }
1990
  }
 
84
  "types": ["Advisory Circular"],
85
  "italics": True,
86
  "quotes": False,
87
+ "description": "For Advisory Circulars, referenced document titles should be italicized but not quoted.",
88
  "example": "See AC 20-135, *Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria* for information on X."
89
  },
90
  "quotes_only": {
 
95
  ],
96
  "italics": False,
97
  "quotes": True,
98
+ "description": "For this document type, referenced document titles should be in quotes without italics.",
99
  "example": 'See AC 20-135, "Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria" for information on X.'
100
  },
101
  "no_formatting": {
102
  "types": ["Policy Statement", "Other"],
103
  "italics": False,
104
  "quotes": False,
105
+ "description": "For this document type, referenced document titles should not use italics or quotes.",
106
  "example": "See AC 20-135, Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria for information on X."
107
  }
108
  }
 
510
  description="'flight crew' should be 'flightcrew'",
511
  is_error=True,
512
  replacement="flightcrew"
513
+ ),
514
+ PatternConfig(
515
+ pattern=r'\bchairman\b',
516
+ description="'chairman' should be 'chair'",
517
+ is_error=True,
518
+ replacement="chair"
519
+ ),
520
+ PatternConfig(
521
+ pattern=r'\bflagman\b',
522
+ description="'flagman' should be 'flagger' or 'flagperson'",
523
+ is_error=True,
524
+ replacement="flagperson"
525
+ ),
526
+ PatternConfig(
527
+ pattern=r'\bman\b',
528
+ description="'man' should be 'individual' or 'person'",
529
+ is_error=True,
530
+ replacement="person"
531
+ ),
532
+ PatternConfig(
533
+ pattern=r'\bmanmade\b',
534
+ description="'manmade' should be 'personmade'",
535
+ is_error=True,
536
+ replacement="personmade"
537
+ ),
538
+ PatternConfig(
539
+ pattern=r'\bmanpower\b',
540
+ description="'manpower' should be 'labor force'",
541
+ is_error=True,
542
+ replacement="labor force"
543
+ ),
544
+ PatternConfig(
545
+ pattern=r'\bnotice to airman\b',
546
+ description="'notice to airman' should be 'notice to air missions'",
547
+ is_error=True,
548
+ replacement="notice to air missions"
549
+ ),
550
+ PatternConfig(
551
+ pattern=r'\bnotice to airmen\b',
552
+ description="'notice to airmen' should be 'notice to air missions'",
553
+ is_error=True,
554
+ replacement="notice to air missions"
555
+ ),
556
+ PatternConfig(
557
+ pattern=r'\bcockpit\b',
558
+ description="'cockpit' should be 'flight deck'",
559
+ is_error=True,
560
+ replacement="flight deck"
561
  )
562
  ],
563
  'section_symbol': [
 
993
  @profile_performance
994
  def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
995
  """
996
+ Check document terminology and output only unique sentences needing correction.
997
+
998
+ Args:
999
+ doc (List[str]): List of document paragraphs
1000
+
1001
+ Returns:
1002
+ DocumentCheckResult: Result containing unique terminology issues with context
1003
  """
1004
  if not self.validate_input(doc):
1005
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
 
1008
  terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
1009
  prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
1010
 
1011
+ # Use a dictionary to track unique issues by sentence
1012
+ # Key: sentence, Value: list of issues in that sentence
1013
+ sentence_issues = {}
1014
 
1015
+ # Check each paragraph for terminology issues
1016
  for paragraph in doc:
1017
  sentences = re.split(r'(?<=[.!?])\s+', paragraph)
1018
  for sentence in sentences:
1019
+ sentence = sentence.strip()
1020
+
1021
+ # Skip empty sentences
1022
+ if not sentence:
1023
+ continue
1024
+
1025
+ current_sentence_issues = []
1026
+
1027
  # Check for incorrect terms that need replacement
1028
  for pattern_config in terminology_patterns:
1029
+ matches = list(re.finditer(pattern_config.pattern, sentence))
1030
+ for match in matches:
1031
+ current_sentence_issues.append({
1032
+ 'incorrect_term': match.group(),
1033
+ 'correct_term': pattern_config.replacement,
1034
+ 'description': pattern_config.description,
1035
+ 'sentence': sentence
1036
+ })
1037
 
1038
  # Check for prohibited phrases and constructions
1039
  for pattern_config in prohibited_patterns:
1040
  if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
1041
+ current_sentence_issues.append({
1042
+ 'description': pattern_config.description,
1043
+ 'sentence': sentence
1044
+ })
1045
+
1046
+ # Only add if we found issues in this sentence
1047
+ if current_sentence_issues:
1048
+ # Use sentence as key to prevent duplicates
1049
+ if sentence not in sentence_issues:
1050
+ sentence_issues[sentence] = current_sentence_issues
1051
+ else:
1052
+ sentence_issues[sentence].extend(current_sentence_issues)
1053
+
1054
+ # Build the issues per sentence
1055
+ unique_issues = []
1056
+ for sentence, sentence_issue_list in sentence_issues.items():
1057
+ incorrect_terms = set()
1058
+ descriptions = set()
1059
+ for issue in sentence_issue_list:
1060
+ if 'incorrect_term' in issue:
1061
+ incorrect_terms.add((issue['incorrect_term'], issue.get('correct_term')))
1062
+ if 'description' in issue:
1063
+ descriptions.add(issue['description'])
1064
+ unique_issues.append({
1065
+ 'sentence': sentence,
1066
+ 'incorrect_terms': list(incorrect_terms),
1067
+ 'descriptions': list(descriptions),
1068
+ })
1069
 
1070
+ return DocumentCheckResult(success=not unique_issues, issues=unique_issues)
 
1071
 
 
 
1072
 
1073
  @profile_performance
1074
  def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
 
1923
  def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
1924
  """Format a standard issue consistently."""
1925
  if isinstance(issue, dict):
1926
+ # Handle grouped issues per sentence
1927
+ if 'incorrect_terms' in issue and 'sentence' in issue:
1928
+ # Build the replacements text
1929
+ replacements = '; '.join(
1930
+ f"'{inc}' with '{corr}'" if corr else f"Remove '{inc}'"
1931
+ for inc, corr in sorted(issue['incorrect_terms'])
1932
+ )
1933
+ # Start building the output lines
1934
+ lines = []
1935
+ lines.append(f" • In: {issue['sentence']}")
1936
+ lines.append(f" Replace {replacements}")
1937
+ # Format each line individually
1938
+ formatted_lines = [
1939
+ textwrap.fill(line, width=76, subsequent_indent=' ')
1940
+ for line in lines
1941
+ ]
1942
+ return '\n'.join(formatted_lines)
1943
+
1944
  # Handle issues with occurrences list
1945
  if 'occurrences' in issue:
1946
  # Format the first 3 occurrences
 
1982
  )
1983
 
1984
  # Handle terminology issues
1985
+ if all(k in issue for k in ['incorrect_term', 'correct_term', 'sentence']):
1986
  return textwrap.fill(
1987
+ f" • Replace '{issue['incorrect_term']}' with '{issue['correct_term']}' in: "
1988
+ f"{issue['sentence']}",
1989
  width=76,
1990
  subsequent_indent=' '
1991
  )
 
2078
  "types": ["Advisory Circular"],
2079
  "italics": True,
2080
  "quotes": False,
2081
+ "description": "For Advisory Circulars, referenced document titles should be italicized but not quoted",
2082
  "example": "See AC 25.1309-1B, *System Design and Analysis*, for information on X."
2083
  },
2084
  "quotes_only": {
 
2089
  ],
2090
  "italics": False,
2091
  "quotes": True,
2092
+ "description": "For this document type, referenced document titles should be in quotes without italics",
2093
  "example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
2094
  },
2095
  "no_formatting": {
2096
  "types": ["Policy Statement", "Other"],
2097
  "italics": False,
2098
  "quotes": False,
2099
+ "description": "For this document type, referenced document titles should not use italics or quotes",
2100
  "example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
2101
  }
2102
  }