Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -84,7 +84,7 @@ DOCUMENT_FORMATTING_RULES = {
|
|
84 |
"types": ["Advisory Circular"],
|
85 |
"italics": True,
|
86 |
"quotes": False,
|
87 |
-
"description": "For Advisory Circulars, referenced document titles should be italicized but not quoted",
|
88 |
"example": "See AC 20-135, *Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria* for information on X."
|
89 |
},
|
90 |
"quotes_only": {
|
@@ -95,14 +95,14 @@ DOCUMENT_FORMATTING_RULES = {
|
|
95 |
],
|
96 |
"italics": False,
|
97 |
"quotes": True,
|
98 |
-
"description": "For this document type, referenced document titles should be in quotes without italics",
|
99 |
"example": 'See AC 20-135, "Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria" for information on X.'
|
100 |
},
|
101 |
"no_formatting": {
|
102 |
"types": ["Policy Statement", "Other"],
|
103 |
"italics": False,
|
104 |
"quotes": False,
|
105 |
-
"description": "For this document type, referenced document titles should not use italics or quotes",
|
106 |
"example": "See AC 20-135, Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria for information on X."
|
107 |
}
|
108 |
}
|
@@ -510,6 +510,54 @@ class DocumentCheckerConfig:
|
|
510 |
description="'flight crew' should be 'flightcrew'",
|
511 |
is_error=True,
|
512 |
replacement="flightcrew"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
)
|
514 |
],
|
515 |
'section_symbol': [
|
@@ -945,7 +993,13 @@ class FAADocumentChecker(DocumentChecker):
|
|
945 |
@profile_performance
|
946 |
def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
|
947 |
"""
|
948 |
-
Check document terminology and output only
|
|
|
|
|
|
|
|
|
|
|
|
|
949 |
"""
|
950 |
if not self.validate_input(doc):
|
951 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
@@ -954,28 +1008,67 @@ class FAADocumentChecker(DocumentChecker):
|
|
954 |
terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
|
955 |
prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
|
956 |
|
957 |
-
#
|
958 |
-
|
|
|
959 |
|
960 |
-
# Check each paragraph for terminology
|
961 |
for paragraph in doc:
|
962 |
sentences = re.split(r'(?<=[.!?])\s+', paragraph)
|
963 |
for sentence in sentences:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
964 |
# Check for incorrect terms that need replacement
|
965 |
for pattern_config in terminology_patterns:
|
966 |
-
|
967 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
968 |
|
969 |
# Check for prohibited phrases and constructions
|
970 |
for pattern_config in prohibited_patterns:
|
971 |
if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
|
972 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
973 |
|
974 |
-
|
975 |
-
issue_sentences = list(set(issue_sentences))
|
976 |
|
977 |
-
# Return only the minimal list of issue sentences
|
978 |
-
return DocumentCheckResult(success=not issue_sentences, issues=issue_sentences)
|
979 |
|
980 |
@profile_performance
|
981 |
def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
|
@@ -1830,6 +1923,24 @@ class DocumentCheckResultsFormatter:
|
|
1830 |
def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
|
1831 |
"""Format a standard issue consistently."""
|
1832 |
if isinstance(issue, dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1833 |
# Handle issues with occurrences list
|
1834 |
if 'occurrences' in issue:
|
1835 |
# Format the first 3 occurrences
|
@@ -1871,9 +1982,10 @@ class DocumentCheckResultsFormatter:
|
|
1871 |
)
|
1872 |
|
1873 |
# Handle terminology issues
|
1874 |
-
|
1875 |
return textwrap.fill(
|
1876 |
-
f" • '{issue['incorrect_term']}'
|
|
|
1877 |
width=76,
|
1878 |
subsequent_indent=' '
|
1879 |
)
|
@@ -1966,7 +2078,7 @@ class DocumentCheckResultsFormatter:
|
|
1966 |
"types": ["Advisory Circular"],
|
1967 |
"italics": True,
|
1968 |
"quotes": False,
|
1969 |
-
"description": "For Advisory Circulars, referenced document titles should be italicized but not quoted
|
1970 |
"example": "See AC 25.1309-1B, *System Design and Analysis*, for information on X."
|
1971 |
},
|
1972 |
"quotes_only": {
|
@@ -1977,14 +2089,14 @@ class DocumentCheckResultsFormatter:
|
|
1977 |
],
|
1978 |
"italics": False,
|
1979 |
"quotes": True,
|
1980 |
-
"description": "For this document type, referenced document titles should be in quotes without italics
|
1981 |
"example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
|
1982 |
},
|
1983 |
"no_formatting": {
|
1984 |
"types": ["Policy Statement", "Other"],
|
1985 |
"italics": False,
|
1986 |
"quotes": False,
|
1987 |
-
"description": "For this document type, referenced document titles should not use italics or quotes
|
1988 |
"example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
|
1989 |
}
|
1990 |
}
|
|
|
84 |
"types": ["Advisory Circular"],
|
85 |
"italics": True,
|
86 |
"quotes": False,
|
87 |
+
"description": "For Advisory Circulars, referenced document titles should be italicized but not quoted.",
|
88 |
"example": "See AC 20-135, *Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria* for information on X."
|
89 |
},
|
90 |
"quotes_only": {
|
|
|
95 |
],
|
96 |
"italics": False,
|
97 |
"quotes": True,
|
98 |
+
"description": "For this document type, referenced document titles should be in quotes without italics.",
|
99 |
"example": 'See AC 20-135, "Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria" for information on X.'
|
100 |
},
|
101 |
"no_formatting": {
|
102 |
"types": ["Policy Statement", "Other"],
|
103 |
"italics": False,
|
104 |
"quotes": False,
|
105 |
+
"description": "For this document type, referenced document titles should not use italics or quotes.",
|
106 |
"example": "See AC 20-135, Powerplant Installation and Propulsion System Component Fire Protection Test Methods, Standards, and Criteria for information on X."
|
107 |
}
|
108 |
}
|
|
|
510 |
description="'flight crew' should be 'flightcrew'",
|
511 |
is_error=True,
|
512 |
replacement="flightcrew"
|
513 |
+
),
|
514 |
+
PatternConfig(
|
515 |
+
pattern=r'\bchairman\b',
|
516 |
+
description="'chairman' should be 'chair'",
|
517 |
+
is_error=True,
|
518 |
+
replacement="chair"
|
519 |
+
),
|
520 |
+
PatternConfig(
|
521 |
+
pattern=r'\bflagman\b',
|
522 |
+
description="'flagman' should be 'flagger' or 'flagperson'",
|
523 |
+
is_error=True,
|
524 |
+
replacement="flagperson"
|
525 |
+
),
|
526 |
+
PatternConfig(
|
527 |
+
pattern=r'\bman\b',
|
528 |
+
description="'man' should be 'individual' or 'person'",
|
529 |
+
is_error=True,
|
530 |
+
replacement="person"
|
531 |
+
),
|
532 |
+
PatternConfig(
|
533 |
+
pattern=r'\bmanmade\b',
|
534 |
+
description="'manmade' should be 'personmade'",
|
535 |
+
is_error=True,
|
536 |
+
replacement="personmade"
|
537 |
+
),
|
538 |
+
PatternConfig(
|
539 |
+
pattern=r'\bmanpower\b',
|
540 |
+
description="'manpower' should be 'labor force'",
|
541 |
+
is_error=True,
|
542 |
+
replacement="labor force"
|
543 |
+
),
|
544 |
+
PatternConfig(
|
545 |
+
pattern=r'\bnotice to airman\b',
|
546 |
+
description="'notice to airman' should be 'notice to air missions'",
|
547 |
+
is_error=True,
|
548 |
+
replacement="notice to air missions"
|
549 |
+
),
|
550 |
+
PatternConfig(
|
551 |
+
pattern=r'\bnotice to airmen\b',
|
552 |
+
description="'notice to airmen' should be 'notice to air missions'",
|
553 |
+
is_error=True,
|
554 |
+
replacement="notice to air missions"
|
555 |
+
),
|
556 |
+
PatternConfig(
|
557 |
+
pattern=r'\bcockpit\b',
|
558 |
+
description="'cockpit' should be 'flight deck'",
|
559 |
+
is_error=True,
|
560 |
+
replacement="flight deck"
|
561 |
)
|
562 |
],
|
563 |
'section_symbol': [
|
|
|
993 |
@profile_performance
|
994 |
def check_terminology(self, doc: List[str]) -> DocumentCheckResult:
|
995 |
"""
|
996 |
+
Check document terminology and output only unique sentences needing correction.
|
997 |
+
|
998 |
+
Args:
|
999 |
+
doc (List[str]): List of document paragraphs
|
1000 |
+
|
1001 |
+
Returns:
|
1002 |
+
DocumentCheckResult: Result containing unique terminology issues with context
|
1003 |
"""
|
1004 |
if not self.validate_input(doc):
|
1005 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
|
|
1008 |
terminology_patterns = self.config_manager.pattern_registry.get('terminology', [])
|
1009 |
prohibited_patterns = self.config_manager.pattern_registry.get('reference_terms', [])
|
1010 |
|
1011 |
+
# Use a dictionary to track unique issues by sentence
|
1012 |
+
# Key: sentence, Value: list of issues in that sentence
|
1013 |
+
sentence_issues = {}
|
1014 |
|
1015 |
+
# Check each paragraph for terminology issues
|
1016 |
for paragraph in doc:
|
1017 |
sentences = re.split(r'(?<=[.!?])\s+', paragraph)
|
1018 |
for sentence in sentences:
|
1019 |
+
sentence = sentence.strip()
|
1020 |
+
|
1021 |
+
# Skip empty sentences
|
1022 |
+
if not sentence:
|
1023 |
+
continue
|
1024 |
+
|
1025 |
+
current_sentence_issues = []
|
1026 |
+
|
1027 |
# Check for incorrect terms that need replacement
|
1028 |
for pattern_config in terminology_patterns:
|
1029 |
+
matches = list(re.finditer(pattern_config.pattern, sentence))
|
1030 |
+
for match in matches:
|
1031 |
+
current_sentence_issues.append({
|
1032 |
+
'incorrect_term': match.group(),
|
1033 |
+
'correct_term': pattern_config.replacement,
|
1034 |
+
'description': pattern_config.description,
|
1035 |
+
'sentence': sentence
|
1036 |
+
})
|
1037 |
|
1038 |
# Check for prohibited phrases and constructions
|
1039 |
for pattern_config in prohibited_patterns:
|
1040 |
if re.search(pattern_config.pattern, sentence, re.IGNORECASE):
|
1041 |
+
current_sentence_issues.append({
|
1042 |
+
'description': pattern_config.description,
|
1043 |
+
'sentence': sentence
|
1044 |
+
})
|
1045 |
+
|
1046 |
+
# Only add if we found issues in this sentence
|
1047 |
+
if current_sentence_issues:
|
1048 |
+
# Use sentence as key to prevent duplicates
|
1049 |
+
if sentence not in sentence_issues:
|
1050 |
+
sentence_issues[sentence] = current_sentence_issues
|
1051 |
+
else:
|
1052 |
+
sentence_issues[sentence].extend(current_sentence_issues)
|
1053 |
+
|
1054 |
+
# Build the issues per sentence
|
1055 |
+
unique_issues = []
|
1056 |
+
for sentence, sentence_issue_list in sentence_issues.items():
|
1057 |
+
incorrect_terms = set()
|
1058 |
+
descriptions = set()
|
1059 |
+
for issue in sentence_issue_list:
|
1060 |
+
if 'incorrect_term' in issue:
|
1061 |
+
incorrect_terms.add((issue['incorrect_term'], issue.get('correct_term')))
|
1062 |
+
if 'description' in issue:
|
1063 |
+
descriptions.add(issue['description'])
|
1064 |
+
unique_issues.append({
|
1065 |
+
'sentence': sentence,
|
1066 |
+
'incorrect_terms': list(incorrect_terms),
|
1067 |
+
'descriptions': list(descriptions),
|
1068 |
+
})
|
1069 |
|
1070 |
+
return DocumentCheckResult(success=not unique_issues, issues=unique_issues)
|
|
|
1071 |
|
|
|
|
|
1072 |
|
1073 |
@profile_performance
|
1074 |
def check_section_symbol_usage(self, doc: List[str]) -> DocumentCheckResult:
|
|
|
1923 |
def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
|
1924 |
"""Format a standard issue consistently."""
|
1925 |
if isinstance(issue, dict):
|
1926 |
+
# Handle grouped issues per sentence
|
1927 |
+
if 'incorrect_terms' in issue and 'sentence' in issue:
|
1928 |
+
# Build the replacements text
|
1929 |
+
replacements = '; '.join(
|
1930 |
+
f"'{inc}' with '{corr}'" if corr else f"Remove '{inc}'"
|
1931 |
+
for inc, corr in sorted(issue['incorrect_terms'])
|
1932 |
+
)
|
1933 |
+
# Start building the output lines
|
1934 |
+
lines = []
|
1935 |
+
lines.append(f" • In: {issue['sentence']}")
|
1936 |
+
lines.append(f" Replace {replacements}")
|
1937 |
+
# Format each line individually
|
1938 |
+
formatted_lines = [
|
1939 |
+
textwrap.fill(line, width=76, subsequent_indent=' ')
|
1940 |
+
for line in lines
|
1941 |
+
]
|
1942 |
+
return '\n'.join(formatted_lines)
|
1943 |
+
|
1944 |
# Handle issues with occurrences list
|
1945 |
if 'occurrences' in issue:
|
1946 |
# Format the first 3 occurrences
|
|
|
1982 |
)
|
1983 |
|
1984 |
# Handle terminology issues
|
1985 |
+
if all(k in issue for k in ['incorrect_term', 'correct_term', 'sentence']):
|
1986 |
return textwrap.fill(
|
1987 |
+
f" • Replace '{issue['incorrect_term']}' with '{issue['correct_term']}' in: "
|
1988 |
+
f"{issue['sentence']}",
|
1989 |
width=76,
|
1990 |
subsequent_indent=' '
|
1991 |
)
|
|
|
2078 |
"types": ["Advisory Circular"],
|
2079 |
"italics": True,
|
2080 |
"quotes": False,
|
2081 |
+
"description": "For Advisory Circulars, referenced document titles should be italicized but not quoted",
|
2082 |
"example": "See AC 25.1309-1B, *System Design and Analysis*, for information on X."
|
2083 |
},
|
2084 |
"quotes_only": {
|
|
|
2089 |
],
|
2090 |
"italics": False,
|
2091 |
"quotes": True,
|
2092 |
+
"description": "For this document type, referenced document titles should be in quotes without italics",
|
2093 |
"example": 'See AC 25.1309-1B, "System Design and Analysis," for information on X.'
|
2094 |
},
|
2095 |
"no_formatting": {
|
2096 |
"types": ["Policy Statement", "Other"],
|
2097 |
"italics": False,
|
2098 |
"quotes": False,
|
2099 |
+
"description": "For this document type, referenced document titles should not use italics or quotes",
|
2100 |
"example": "See AC 25.1309-1B, System Design and Analysis, for information on X."
|
2101 |
}
|
2102 |
}
|