Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

App Files Files Community

Hoctar77 commited on Nov 15, 2024

Commit

0e44409

verified ·

1 Parent(s): fa98e6a

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -18

app.py CHANGED Viewed

@@ -735,6 +735,7 @@ class FAADocumentChecker(DocumentChecker):
     def __init__(self, config_path: Optional[str] = None):
         super().__init__(config_path)
         self.HEADING_WORDS = HEADING_WORDS
     # Core Check Methods
     @profile_performance
@@ -934,10 +935,8 @@ class FAADocumentChecker(DocumentChecker):
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
-        # Common words that might appear in uppercase but aren't acronyms
         heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
-        # Standard acronyms that don't need to be defined
         predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
         # Tracking structures
@@ -947,16 +946,15 @@ class FAADocumentChecker(DocumentChecker):
         # Patterns
         defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
-        # Modified acronym pattern
         acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
         for paragraph in doc:
-            # Skip lines that appear to be headings (all uppercase with common heading words)
             words = paragraph.strip().split()
             if all(word.isupper() for word in words) and any(word in heading_words for word in words):
                 continue
-            # Check for acronym definitions first
             defined_matches = defined_pattern.findall(paragraph)
             for full_term, acronym in defined_matches:
                 if acronym not in predefined_acronyms:
@@ -964,11 +962,8 @@ class FAADocumentChecker(DocumentChecker):
                         defined_acronyms[acronym] = {
                             'full_term': full_term.strip(),
                             'defined_at': paragraph.strip(),
-                            'used': False  # Initially not used
                         }
-                    else:
-                        # Handle duplicate definitions if necessary
-                        pass  # You may add logic for duplicate definitions
             # Check for acronym usage
             usage_matches = acronym_pattern.finditer(paragraph)
@@ -979,15 +974,9 @@ class FAADocumentChecker(DocumentChecker):
                 if acronym in predefined_acronyms:
                     continue
-                # Skip if it's part of a heading or contains non-letter characters
-                if (acronym in heading_words or
-                    any(not c.isalpha() for c in acronym) or
-                    len(acronym) > 10):  # Usually acronyms aren't this long
-                    continue
                 if acronym not in defined_acronyms:
                     # Undefined acronym used
-                    issues.append(acronym)  # Add only the acronym, not the sentence
                 else:
                     # Mark as used
                     defined_acronyms[acronym]['used'] = True
@@ -996,7 +985,6 @@ class FAADocumentChecker(DocumentChecker):
         # Define success based on whether there are any undefined acronyms
         success = len(issues) == 0
-        # Return the result with only undefined acronyms
         return DocumentCheckResult(success=success, issues=list(set(issues)))
     @profile_performance

     def __init__(self, config_path: Optional[str] = None):
         super().__init__(config_path)
         self.HEADING_WORDS = HEADING_WORDS
+        self.PREDEFINED_ACRONYMS = PREDEFINED_ACRONYMS
     # Core Check Methods
     @profile_performance
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Use instance variables for heading words and predefined acronyms
         heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
         predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
         # Tracking structures
         # Patterns
         defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
         acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
         for paragraph in doc:
+            # Skip lines that appear to be headings
             words = paragraph.strip().split()
             if all(word.isupper() for word in words) and any(word in heading_words for word in words):
                 continue
+            # Check for acronym definitions
             defined_matches = defined_pattern.findall(paragraph)
             for full_term, acronym in defined_matches:
                 if acronym not in predefined_acronyms:
                         defined_acronyms[acronym] = {
                             'full_term': full_term.strip(),
                             'defined_at': paragraph.strip(),
+                            'used': False
                         }
             # Check for acronym usage
             usage_matches = acronym_pattern.finditer(paragraph)
                 if acronym in predefined_acronyms:
                     continue
                 if acronym not in defined_acronyms:
                     # Undefined acronym used
+                    issues.append(acronym)
                 else:
                     # Mark as used
                     defined_acronyms[acronym]['used'] = True
         # Define success based on whether there are any undefined acronyms
         success = len(issues) == 0
         return DocumentCheckResult(success=success, issues=list(set(issues)))
     @profile_performance