Hoctar77 commited on
Commit
0e44409
·
verified ·
1 Parent(s): fa98e6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -18
app.py CHANGED
@@ -735,6 +735,7 @@ class FAADocumentChecker(DocumentChecker):
735
  def __init__(self, config_path: Optional[str] = None):
736
  super().__init__(config_path)
737
  self.HEADING_WORDS = HEADING_WORDS
 
738
 
739
  # Core Check Methods
740
  @profile_performance
@@ -934,10 +935,8 @@ class FAADocumentChecker(DocumentChecker):
934
  if not self.validate_input(doc):
935
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
936
 
937
- # Common words that might appear in uppercase but aren't acronyms
938
  heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
939
-
940
- # Standard acronyms that don't need to be defined
941
  predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
942
 
943
  # Tracking structures
@@ -947,16 +946,15 @@ class FAADocumentChecker(DocumentChecker):
947
 
948
  # Patterns
949
  defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
950
- # Modified acronym pattern
951
  acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
952
 
953
  for paragraph in doc:
954
- # Skip lines that appear to be headings (all uppercase with common heading words)
955
  words = paragraph.strip().split()
956
  if all(word.isupper() for word in words) and any(word in heading_words for word in words):
957
  continue
958
 
959
- # Check for acronym definitions first
960
  defined_matches = defined_pattern.findall(paragraph)
961
  for full_term, acronym in defined_matches:
962
  if acronym not in predefined_acronyms:
@@ -964,11 +962,8 @@ class FAADocumentChecker(DocumentChecker):
964
  defined_acronyms[acronym] = {
965
  'full_term': full_term.strip(),
966
  'defined_at': paragraph.strip(),
967
- 'used': False # Initially not used
968
  }
969
- else:
970
- # Handle duplicate definitions if necessary
971
- pass # You may add logic for duplicate definitions
972
 
973
  # Check for acronym usage
974
  usage_matches = acronym_pattern.finditer(paragraph)
@@ -979,15 +974,9 @@ class FAADocumentChecker(DocumentChecker):
979
  if acronym in predefined_acronyms:
980
  continue
981
 
982
- # Skip if it's part of a heading or contains non-letter characters
983
- if (acronym in heading_words or
984
- any(not c.isalpha() for c in acronym) or
985
- len(acronym) > 10): # Usually acronyms aren't this long
986
- continue
987
-
988
  if acronym not in defined_acronyms:
989
  # Undefined acronym used
990
- issues.append(acronym) # Add only the acronym, not the sentence
991
  else:
992
  # Mark as used
993
  defined_acronyms[acronym]['used'] = True
@@ -996,7 +985,6 @@ class FAADocumentChecker(DocumentChecker):
996
  # Define success based on whether there are any undefined acronyms
997
  success = len(issues) == 0
998
 
999
- # Return the result with only undefined acronyms
1000
  return DocumentCheckResult(success=success, issues=list(set(issues)))
1001
 
1002
  @profile_performance
 
735
  def __init__(self, config_path: Optional[str] = None):
736
  super().__init__(config_path)
737
  self.HEADING_WORDS = HEADING_WORDS
738
+ self.PREDEFINED_ACRONYMS = PREDEFINED_ACRONYMS
739
 
740
  # Core Check Methods
741
  @profile_performance
 
935
  if not self.validate_input(doc):
936
  return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
937
 
938
+ # Use instance variables for heading words and predefined acronyms
939
  heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
 
 
940
  predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
941
 
942
  # Tracking structures
 
946
 
947
  # Patterns
948
  defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
 
949
  acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
950
 
951
  for paragraph in doc:
952
+ # Skip lines that appear to be headings
953
  words = paragraph.strip().split()
954
  if all(word.isupper() for word in words) and any(word in heading_words for word in words):
955
  continue
956
 
957
+ # Check for acronym definitions
958
  defined_matches = defined_pattern.findall(paragraph)
959
  for full_term, acronym in defined_matches:
960
  if acronym not in predefined_acronyms:
 
962
  defined_acronyms[acronym] = {
963
  'full_term': full_term.strip(),
964
  'defined_at': paragraph.strip(),
965
+ 'used': False
966
  }
 
 
 
967
 
968
  # Check for acronym usage
969
  usage_matches = acronym_pattern.finditer(paragraph)
 
974
  if acronym in predefined_acronyms:
975
  continue
976
 
 
 
 
 
 
 
977
  if acronym not in defined_acronyms:
978
  # Undefined acronym used
979
+ issues.append(acronym)
980
  else:
981
  # Mark as used
982
  defined_acronyms[acronym]['used'] = True
 
985
  # Define success based on whether there are any undefined acronyms
986
  success = len(issues) == 0
987
 
 
988
  return DocumentCheckResult(success=success, issues=list(set(issues)))
989
 
990
  @profile_performance