Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,12 +11,8 @@ import io
|
|
11 |
import os
|
12 |
import traceback
|
13 |
from datetime import datetime
|
14 |
-
import gc
|
15 |
|
16 |
-
#
|
17 |
-
from main import FAADocumentChecker, DocumentCheckResult
|
18 |
-
|
19 |
-
# Core data structures and utilities
|
20 |
@dataclass
|
21 |
class DocumentCheckResult:
|
22 |
"""Structured result for document checks."""
|
@@ -187,7 +183,7 @@ class DocumentChecker:
|
|
187 |
return []
|
188 |
|
189 |
class FAADocumentChecker(DocumentChecker):
|
190 |
-
"""Main document checker implementation
|
191 |
def __init__(self, config_path: Optional[str] = None):
|
192 |
super().__init__(config_path)
|
193 |
|
@@ -869,6 +865,33 @@ class FAADocumentChecker(DocumentChecker):
|
|
869 |
|
870 |
return results
|
871 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
872 |
def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
873 |
"""Format check results into a Markdown string for Gradio display."""
|
874 |
output = []
|
@@ -951,35 +974,6 @@ def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: s
|
|
951 |
return "\n".join(output)
|
952 |
|
953 |
def create_interface():
|
954 |
-
|
955 |
-
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
956 |
-
"""Process document and run all checks."""
|
957 |
-
try:
|
958 |
-
checker = FAADocumentChecker()
|
959 |
-
|
960 |
-
if isinstance(file_obj, bytes):
|
961 |
-
file_obj = io.BytesIO(file_obj)
|
962 |
-
|
963 |
-
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
964 |
-
return format_markdown_results(results, doc_type)
|
965 |
-
|
966 |
-
except Exception as e:
|
967 |
-
logging.error(f"Error processing document: {str(e)}")
|
968 |
-
traceback.print_exc()
|
969 |
-
return f"""
|
970 |
-
# ❌ Error Processing Document
|
971 |
-
|
972 |
-
**Error Details:** {str(e)}
|
973 |
-
|
974 |
-
Please ensure:
|
975 |
-
1. The file is a valid .docx document
|
976 |
-
2. The file is not corrupted or password protected
|
977 |
-
3. The file is properly formatted
|
978 |
-
|
979 |
-
Try again after checking these issues. If the problem persists, contact support.
|
980 |
-
"""
|
981 |
-
|
982 |
-
def create_interface():
|
983 |
"""Create and configure the Gradio interface."""
|
984 |
document_types = [
|
985 |
"Advisory Circular",
|
@@ -1149,100 +1143,19 @@ def create_interface():
|
|
1149 |
|
1150 |
return demo
|
1151 |
|
1152 |
-
|
1153 |
-
|
1154 |
-
|
1155 |
-
|
1156 |
-
|
1157 |
-
|
1158 |
-
|
1159 |
-
f"## Document Type: {doc_type}",
|
1160 |
-
"---\n"
|
1161 |
-
])
|
1162 |
-
|
1163 |
-
total_issues = sum(1 for r in results.values() if not r.success)
|
1164 |
-
|
1165 |
-
if total_issues == 0:
|
1166 |
-
output.append("✅ **All checks passed successfully!**\n")
|
1167 |
-
return "\n".join(output)
|
1168 |
-
|
1169 |
-
output.append(f"❗ Found issues in {total_issues} check categories\n")
|
1170 |
-
|
1171 |
-
check_categories = {
|
1172 |
-
'heading_title_check': {'title': '📋 Required Headings', 'priority': 1},
|
1173 |
-
'heading_title_period_check': {'title': '🔍 Heading Period Usage', 'priority': 1},
|
1174 |
-
'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 2},
|
1175 |
-
'terminology_check': {'title': '📖 Terminology Usage', 'priority': 2},
|
1176 |
-
'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
|
1177 |
-
'caption_check_table': {'title': '📊 Table Captions', 'priority': 3},
|
1178 |
-
'caption_check_figure': {'title': '🖼️ Figure Captions', 'priority': 3},
|
1179 |
-
'table_figure_reference_check': {'title': '🔗 Table/Figure References', 'priority': 3},
|
1180 |
-
'document_title_check': {'title': '📑 Document Title Format', 'priority': 1},
|
1181 |
-
'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
|
1182 |
-
'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
|
1183 |
-
'abbreviation_usage_check': {'title': '📎 Abbreviation Usage', 'priority': 3},
|
1184 |
-
'date_formats_check': {'title': '📅 Date Formats', 'priority': 3},
|
1185 |
-
'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 1}
|
1186 |
-
}
|
1187 |
-
|
1188 |
-
sorted_checks = sorted(
|
1189 |
-
[(name, result) for name, result in results.items()],
|
1190 |
-
key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
|
1191 |
-
)
|
1192 |
-
|
1193 |
-
for check_name, result in sorted_checks:
|
1194 |
-
if not result.success:
|
1195 |
-
category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
|
1196 |
-
|
1197 |
-
output.append(f"### {category['title']}")
|
1198 |
-
|
1199 |
-
if isinstance(result.issues, list):
|
1200 |
-
for issue in result.issues[:5]:
|
1201 |
-
if isinstance(issue, dict):
|
1202 |
-
for key, value in issue.items():
|
1203 |
-
if isinstance(value, list):
|
1204 |
-
output.extend([f"- {item}" for item in value])
|
1205 |
-
else:
|
1206 |
-
output.append(f"- {key}: {value}")
|
1207 |
-
else:
|
1208 |
-
output.append(f"- {issue}")
|
1209 |
-
|
1210 |
-
if len(result.issues) > 5:
|
1211 |
-
output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
|
1212 |
-
|
1213 |
-
output.append("")
|
1214 |
-
|
1215 |
-
output.extend([
|
1216 |
-
"## 📋 Summary and Recommendations",
|
1217 |
-
"",
|
1218 |
-
"### Priority Order for Fixes:",
|
1219 |
-
"1. 🔴 Critical: Heading formats, required content, and document structure",
|
1220 |
-
"2. 🟡 Important: Terminology, acronyms, and references",
|
1221 |
-
"3. 🟢 Standard: Formatting, spacing, and style consistency",
|
1222 |
-
"",
|
1223 |
-
"### Next Steps:",
|
1224 |
-
"1. Address issues in priority order",
|
1225 |
-
"2. Use search/replace for consistent fixes",
|
1226 |
-
"3. Re-run checker after making changes",
|
1227 |
-
"4. Update your document template if needed",
|
1228 |
-
""
|
1229 |
-
])
|
1230 |
-
|
1231 |
-
return "\n".join(output)
|
1232 |
-
|
1233 |
-
# Initialize and launch the interface
|
1234 |
-
if __name__ == "__main__":
|
1235 |
-
# Setup logging
|
1236 |
-
logging.basicConfig(
|
1237 |
-
level=logging.INFO,
|
1238 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
1239 |
-
)
|
1240 |
|
1241 |
-
|
1242 |
-
|
1243 |
-
|
1244 |
-
|
1245 |
-
|
1246 |
-
|
1247 |
-
|
1248 |
-
|
|
|
11 |
import os
|
12 |
import traceback
|
13 |
from datetime import datetime
|
|
|
14 |
|
15 |
+
# Core data structures
|
|
|
|
|
|
|
16 |
@dataclass
|
17 |
class DocumentCheckResult:
|
18 |
"""Structured result for document checks."""
|
|
|
183 |
return []
|
184 |
|
185 |
class FAADocumentChecker(DocumentChecker):
|
186 |
+
"""Main document checker implementation."""
|
187 |
def __init__(self, config_path: Optional[str] = None):
|
188 |
super().__init__(config_path)
|
189 |
|
|
|
865 |
|
866 |
return results
|
867 |
|
868 |
+
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
869 |
+
"""Process document and run all checks."""
|
870 |
+
try:
|
871 |
+
checker = FAADocumentChecker()
|
872 |
+
|
873 |
+
if isinstance(file_obj, bytes):
|
874 |
+
file_obj = io.BytesIO(file_obj)
|
875 |
+
|
876 |
+
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
877 |
+
return format_markdown_results(results, doc_type)
|
878 |
+
|
879 |
+
except Exception as e:
|
880 |
+
logging.error(f"Error processing document: {str(e)}")
|
881 |
+
traceback.print_exc()
|
882 |
+
return f"""
|
883 |
+
# ❌ Error Processing Document
|
884 |
+
|
885 |
+
**Error Details:** {str(e)}
|
886 |
+
|
887 |
+
Please ensure:
|
888 |
+
1. The file is a valid .docx document
|
889 |
+
2. The file is not corrupted or password protected
|
890 |
+
3. The file is properly formatted
|
891 |
+
|
892 |
+
Try again after checking these issues. If the problem persists, contact support.
|
893 |
+
"""
|
894 |
+
|
895 |
def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
896 |
"""Format check results into a Markdown string for Gradio display."""
|
897 |
output = []
|
|
|
974 |
return "\n".join(output)
|
975 |
|
976 |
def create_interface():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
977 |
"""Create and configure the Gradio interface."""
|
978 |
document_types = [
|
979 |
"Advisory Circular",
|
|
|
1143 |
|
1144 |
return demo
|
1145 |
|
1146 |
+
# Initialize and launch the interface
|
1147 |
+
if __name__ == "__main__":
|
1148 |
+
# Setup logging
|
1149 |
+
logging.basicConfig(
|
1150 |
+
level=logging.INFO,
|
1151 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
1152 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1153 |
|
1154 |
+
# Create and launch the interface
|
1155 |
+
demo = create_interface()
|
1156 |
+
demo.launch(
|
1157 |
+
share=False, # Set to True if you want to generate a public link
|
1158 |
+
server_name="0.0.0.0", # Allows external access
|
1159 |
+
server_port=7860, # Default Gradio port
|
1160 |
+
debug=True
|
1161 |
+
)
|