Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -929,13 +929,16 @@ class FAADocumentChecker(DocumentChecker):
|
|
929 |
if not self.validate_input(doc):
|
930 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
931 |
|
932 |
-
#
|
933 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
|
|
|
|
934 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
935 |
|
936 |
# Tracking structures
|
937 |
defined_acronyms = {} # Stores definition info
|
938 |
used_acronyms = set() # Stores acronyms used after definition
|
|
|
939 |
issues = []
|
940 |
|
941 |
# Patterns
|
@@ -943,12 +946,12 @@ class FAADocumentChecker(DocumentChecker):
|
|
943 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
944 |
|
945 |
for paragraph in doc:
|
946 |
-
# Skip lines that appear to be headings
|
947 |
words = paragraph.strip().split()
|
948 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
949 |
continue
|
950 |
|
951 |
-
# Check for acronym definitions
|
952 |
defined_matches = defined_pattern.findall(paragraph)
|
953 |
for full_term, acronym in defined_matches:
|
954 |
if acronym not in predefined_acronyms:
|
@@ -956,7 +959,7 @@ class FAADocumentChecker(DocumentChecker):
|
|
956 |
defined_acronyms[acronym] = {
|
957 |
'full_term': full_term.strip(),
|
958 |
'defined_at': paragraph.strip(),
|
959 |
-
'used': False
|
960 |
}
|
961 |
|
962 |
# Check for acronym usage
|
@@ -968,10 +971,17 @@ class FAADocumentChecker(DocumentChecker):
|
|
968 |
if acronym in predefined_acronyms:
|
969 |
continue
|
970 |
|
971 |
-
if
|
972 |
-
|
973 |
-
|
974 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
975 |
# Mark as used
|
976 |
defined_acronyms[acronym]['used'] = True
|
977 |
used_acronyms.add(acronym)
|
@@ -979,7 +989,8 @@ class FAADocumentChecker(DocumentChecker):
|
|
979 |
# Define success based on whether there are any undefined acronyms
|
980 |
success = len(issues) == 0
|
981 |
|
982 |
-
|
|
|
983 |
|
984 |
@profile_performance
|
985 |
def acronym_usage_check(self, doc: List[str]) -> DocumentCheckResult:
|
@@ -1806,11 +1817,11 @@ class DocumentCheckResultsFormatter:
|
|
1806 |
},
|
1807 |
'acronym_usage_check': {
|
1808 |
'title': 'Unused Acronym Definitions',
|
1809 |
-
'description': 'Ensures all acronyms defined in the document are
|
1810 |
-
'solution': '
|
1811 |
'example_fix': {
|
1812 |
-
'before': 'Airworthiness
|
1813 |
-
'after': 'Remove "Airworthiness
|
1814 |
}
|
1815 |
},
|
1816 |
'terminology_check': {
|
@@ -2192,11 +2203,11 @@ class DocumentCheckResultsFormatter:
|
|
2192 |
|
2193 |
self.issue_categories['acronym_usage_check'] = {
|
2194 |
'title': 'Unused Acronym Definitions',
|
2195 |
-
'description': 'Ensures all acronyms defined in the document are
|
2196 |
-
'solution': '
|
2197 |
'example_fix': {
|
2198 |
-
'before': 'Airworthiness
|
2199 |
-
'after': 'Remove "Airworthiness
|
2200 |
}
|
2201 |
}
|
2202 |
|
|
|
929 |
if not self.validate_input(doc):
|
930 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
931 |
|
932 |
+
# Common words that might appear in uppercase but aren't acronyms
|
933 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
934 |
+
|
935 |
+
# Standard acronyms that don't need to be defined
|
936 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
937 |
|
938 |
# Tracking structures
|
939 |
defined_acronyms = {} # Stores definition info
|
940 |
used_acronyms = set() # Stores acronyms used after definition
|
941 |
+
reported_acronyms = set() # Stores acronyms that have already been noted as issues
|
942 |
issues = []
|
943 |
|
944 |
# Patterns
|
|
|
946 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
947 |
|
948 |
for paragraph in doc:
|
949 |
+
# Skip lines that appear to be headings (all uppercase with common heading words)
|
950 |
words = paragraph.strip().split()
|
951 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
952 |
continue
|
953 |
|
954 |
+
# Check for acronym definitions first
|
955 |
defined_matches = defined_pattern.findall(paragraph)
|
956 |
for full_term, acronym in defined_matches:
|
957 |
if acronym not in predefined_acronyms:
|
|
|
959 |
defined_acronyms[acronym] = {
|
960 |
'full_term': full_term.strip(),
|
961 |
'defined_at': paragraph.strip(),
|
962 |
+
'used': False # Initially not used
|
963 |
}
|
964 |
|
965 |
# Check for acronym usage
|
|
|
971 |
if acronym in predefined_acronyms:
|
972 |
continue
|
973 |
|
974 |
+
# Skip if it's part of a heading or contains non-letter characters
|
975 |
+
if (acronym in heading_words or
|
976 |
+
any(not c.isalpha() for c in acronym) or
|
977 |
+
len(acronym) > 10): # Usually acronyms aren't this long
|
978 |
+
continue
|
979 |
+
|
980 |
+
if acronym not in defined_acronyms and acronym not in reported_acronyms:
|
981 |
+
# Undefined acronym used; report only once
|
982 |
+
issues.append(f"Confirm '{acronym}' was defined at its first use.")
|
983 |
+
reported_acronyms.add(acronym) # Add to reported list
|
984 |
+
elif acronym in defined_acronyms:
|
985 |
# Mark as used
|
986 |
defined_acronyms[acronym]['used'] = True
|
987 |
used_acronyms.add(acronym)
|
|
|
989 |
# Define success based on whether there are any undefined acronyms
|
990 |
success = len(issues) == 0
|
991 |
|
992 |
+
# Return the result with detailed issues
|
993 |
+
return DocumentCheckResult(success=success, issues=issues)
|
994 |
|
995 |
@profile_performance
|
996 |
def acronym_usage_check(self, doc: List[str]) -> DocumentCheckResult:
|
|
|
1817 |
},
|
1818 |
'acronym_usage_check': {
|
1819 |
'title': 'Unused Acronym Definitions',
|
1820 |
+
'description': 'Ensures that all acronyms defined in the document are actually used later. If an acronym is defined but never referenced, the definition should be removed to avoid confusion or unnecessary clutter.',
|
1821 |
+
'solution': 'Identify acronyms that are defined but not used later in the document and remove their definitions.',
|
1822 |
'example_fix': {
|
1823 |
+
'before': 'Operators must comply with Airworthiness Directives (AD) to ensure aircraft safety and regulatory compliance.',
|
1824 |
+
'after': 'Remove "(AD)" of "Airworthiness Directives (AD)" if "AD" is not used later in the document.'
|
1825 |
}
|
1826 |
},
|
1827 |
'terminology_check': {
|
|
|
2203 |
|
2204 |
self.issue_categories['acronym_usage_check'] = {
|
2205 |
'title': 'Unused Acronym Definitions',
|
2206 |
+
'description': 'Ensures that all acronyms defined in the document are actually used later. If an acronym is defined but never referenced, the definition should be removed to avoid confusion or unnecessary clutter.',
|
2207 |
+
'solution': 'Identify acronyms that are defined but not used later in the document and remove their definitions.',
|
2208 |
'example_fix': {
|
2209 |
+
'before': 'Operators must comply with Airworthiness Directives (AD) to ensure aircraft safety and regulatory compliance.',
|
2210 |
+
'after': 'Remove "(AD)" of "Airworthiness Directives (AD)" if "AD" is not used later in the document.'
|
2211 |
}
|
2212 |
}
|
2213 |
|