Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -735,6 +735,7 @@ class FAADocumentChecker(DocumentChecker):
|
|
735 |
def __init__(self, config_path: Optional[str] = None):
|
736 |
super().__init__(config_path)
|
737 |
self.HEADING_WORDS = HEADING_WORDS
|
|
|
738 |
|
739 |
# Core Check Methods
|
740 |
@profile_performance
|
@@ -934,10 +935,8 @@ class FAADocumentChecker(DocumentChecker):
|
|
934 |
if not self.validate_input(doc):
|
935 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
936 |
|
937 |
-
#
|
938 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
939 |
-
|
940 |
-
# Standard acronyms that don't need to be defined
|
941 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
942 |
|
943 |
# Tracking structures
|
@@ -947,16 +946,15 @@ class FAADocumentChecker(DocumentChecker):
|
|
947 |
|
948 |
# Patterns
|
949 |
defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
|
950 |
-
# Modified acronym pattern
|
951 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
952 |
|
953 |
for paragraph in doc:
|
954 |
-
# Skip lines that appear to be headings
|
955 |
words = paragraph.strip().split()
|
956 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
957 |
continue
|
958 |
|
959 |
-
# Check for acronym definitions
|
960 |
defined_matches = defined_pattern.findall(paragraph)
|
961 |
for full_term, acronym in defined_matches:
|
962 |
if acronym not in predefined_acronyms:
|
@@ -964,11 +962,8 @@ class FAADocumentChecker(DocumentChecker):
|
|
964 |
defined_acronyms[acronym] = {
|
965 |
'full_term': full_term.strip(),
|
966 |
'defined_at': paragraph.strip(),
|
967 |
-
'used': False
|
968 |
}
|
969 |
-
else:
|
970 |
-
# Handle duplicate definitions if necessary
|
971 |
-
pass # You may add logic for duplicate definitions
|
972 |
|
973 |
# Check for acronym usage
|
974 |
usage_matches = acronym_pattern.finditer(paragraph)
|
@@ -979,15 +974,9 @@ class FAADocumentChecker(DocumentChecker):
|
|
979 |
if acronym in predefined_acronyms:
|
980 |
continue
|
981 |
|
982 |
-
# Skip if it's part of a heading or contains non-letter characters
|
983 |
-
if (acronym in heading_words or
|
984 |
-
any(not c.isalpha() for c in acronym) or
|
985 |
-
len(acronym) > 10): # Usually acronyms aren't this long
|
986 |
-
continue
|
987 |
-
|
988 |
if acronym not in defined_acronyms:
|
989 |
# Undefined acronym used
|
990 |
-
issues.append(acronym)
|
991 |
else:
|
992 |
# Mark as used
|
993 |
defined_acronyms[acronym]['used'] = True
|
@@ -996,7 +985,6 @@ class FAADocumentChecker(DocumentChecker):
|
|
996 |
# Define success based on whether there are any undefined acronyms
|
997 |
success = len(issues) == 0
|
998 |
|
999 |
-
# Return the result with only undefined acronyms
|
1000 |
return DocumentCheckResult(success=success, issues=list(set(issues)))
|
1001 |
|
1002 |
@profile_performance
|
|
|
735 |
def __init__(self, config_path: Optional[str] = None):
|
736 |
super().__init__(config_path)
|
737 |
self.HEADING_WORDS = HEADING_WORDS
|
738 |
+
self.PREDEFINED_ACRONYMS = PREDEFINED_ACRONYMS
|
739 |
|
740 |
# Core Check Methods
|
741 |
@profile_performance
|
|
|
935 |
if not self.validate_input(doc):
|
936 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
937 |
|
938 |
+
# Use instance variables for heading words and predefined acronyms
|
939 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
|
|
|
|
940 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
941 |
|
942 |
# Tracking structures
|
|
|
946 |
|
947 |
# Patterns
|
948 |
defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
|
|
|
949 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
950 |
|
951 |
for paragraph in doc:
|
952 |
+
# Skip lines that appear to be headings
|
953 |
words = paragraph.strip().split()
|
954 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
955 |
continue
|
956 |
|
957 |
+
# Check for acronym definitions
|
958 |
defined_matches = defined_pattern.findall(paragraph)
|
959 |
for full_term, acronym in defined_matches:
|
960 |
if acronym not in predefined_acronyms:
|
|
|
962 |
defined_acronyms[acronym] = {
|
963 |
'full_term': full_term.strip(),
|
964 |
'defined_at': paragraph.strip(),
|
965 |
+
'used': False
|
966 |
}
|
|
|
|
|
|
|
967 |
|
968 |
# Check for acronym usage
|
969 |
usage_matches = acronym_pattern.finditer(paragraph)
|
|
|
974 |
if acronym in predefined_acronyms:
|
975 |
continue
|
976 |
|
|
|
|
|
|
|
|
|
|
|
|
|
977 |
if acronym not in defined_acronyms:
|
978 |
# Undefined acronym used
|
979 |
+
issues.append(acronym)
|
980 |
else:
|
981 |
# Mark as used
|
982 |
defined_acronyms[acronym]['used'] = True
|
|
|
985 |
# Define success based on whether there are any undefined acronyms
|
986 |
success = len(issues) == 0
|
987 |
|
|
|
988 |
return DocumentCheckResult(success=success, issues=list(set(issues)))
|
989 |
|
990 |
@profile_performance
|