Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from docx import Document
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import traceback
|
|
|
8 |
|
9 |
def setup_logging():
|
10 |
"""Initialize logging configuration."""
|
@@ -651,27 +652,14 @@ def process_file(file_obj, doc_type, template_type):
|
|
651 |
return "Please upload a document first."
|
652 |
|
653 |
try:
|
654 |
-
#
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
# Check if it's a file-like object
|
659 |
-
if hasattr(file_obj, 'name'):
|
660 |
-
print(f"File name: {file_obj.name}")
|
661 |
-
|
662 |
-
# For Gradio file upload, the file object should have a name attribute
|
663 |
-
# that contains the path to the temporary uploaded file
|
664 |
-
if isinstance(file_obj, str):
|
665 |
-
# If file_obj is a string (filepath)
|
666 |
-
file_path = file_obj
|
667 |
else:
|
668 |
-
|
669 |
-
file_path = file_obj.name
|
670 |
|
671 |
-
print(f"File path: {file_path}")
|
672 |
-
|
673 |
# Process the document and get results
|
674 |
-
results = process_document(
|
675 |
return results
|
676 |
|
677 |
except Exception as e:
|
@@ -692,6 +680,53 @@ Technical details: {str(e)}"""
|
|
692 |
|
693 |
return error_message
|
694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
695 |
# Create the interface with simplified layout for Hugging Face Spaces
|
696 |
demo = gr.Blocks(title="FAA Document Checker")
|
697 |
|
@@ -749,24 +784,6 @@ with demo:
|
|
749 |
inputs=[file_input, doc_type, template_type],
|
750 |
outputs=[output]
|
751 |
)
|
752 |
-
|
753 |
-
gr.Markdown("""
|
754 |
-
## Instructions
|
755 |
-
1. Upload your Word document (.docx format)
|
756 |
-
2. Select the document type
|
757 |
-
3. If it's an Advisory Circular, select the template type
|
758 |
-
4. Click 'Check Document' to process
|
759 |
-
5. Review the results in the output panel
|
760 |
-
|
761 |
-
The checker will verify:
|
762 |
-
- Required headings
|
763 |
-
- Acronym definitions
|
764 |
-
- Legal terminology
|
765 |
-
- Table and figure captions
|
766 |
-
- Document title styling
|
767 |
-
- Spacing and formatting
|
768 |
-
- And more...
|
769 |
-
""")
|
770 |
|
771 |
# For Hugging Face Spaces, we just need to expose the 'demo'
|
772 |
demo.launch()
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import traceback
|
8 |
+
import io
|
9 |
|
10 |
def setup_logging():
|
11 |
"""Initialize logging configuration."""
|
|
|
652 |
return "Please upload a document first."
|
653 |
|
654 |
try:
|
655 |
+
# Convert bytes to BytesIO object that Document can read
|
656 |
+
if isinstance(file_obj, bytes):
|
657 |
+
doc_bytes = io.BytesIO(file_obj)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
else:
|
659 |
+
doc_bytes = io.BytesIO(file_obj.read())
|
|
|
660 |
|
|
|
|
|
661 |
# Process the document and get results
|
662 |
+
results = process_document(doc_bytes, doc_type, template_type)
|
663 |
return results
|
664 |
|
665 |
except Exception as e:
|
|
|
680 |
|
681 |
return error_message
|
682 |
|
683 |
+
def process_document(file_obj, doc_type, template_type):
|
684 |
+
"""Process the document and perform checks based on document type and template type."""
|
685 |
+
try:
|
686 |
+
# Read the Word document directly from BytesIO
|
687 |
+
doc = Document(file_obj)
|
688 |
+
print("Document read successfully.")
|
689 |
+
|
690 |
+
# Get required headings for document type and template type
|
691 |
+
checks = get_document_checks(doc_type, template_type)
|
692 |
+
required_headings = checks.get("required_headings", [])
|
693 |
+
|
694 |
+
# Step 2: Perform all checks
|
695 |
+
heading_valid, headings_found = heading_title_check(doc, required_headings)
|
696 |
+
acronyms_valid, undefined_acronyms = acronym_check(doc)
|
697 |
+
legal_valid, incorrect_legal_references = legal_check(doc)
|
698 |
+
table_valid, incorrect_captions = table_caption_check(doc, doc_type)
|
699 |
+
figure_valid, incorrect_fig_captions = figure_caption_check(doc, doc_type)
|
700 |
+
references_valid, incorrect_table_figure_references = table_figure_reference_check(doc, doc_type)
|
701 |
+
title_style_valid, incorrect_titles = document_title_check(doc, doc_type) # Modified to use doc object directly
|
702 |
+
double_period_valid, incorrect_sentences = double_period_check(doc)
|
703 |
+
spacing_valid, incorrect_spacing = spacing_check(doc)
|
704 |
+
abbreviation_issues = check_abbreviation_usage(doc)
|
705 |
+
date_issues = check_date_formats(doc)
|
706 |
+
placeholder_issues = check_placeholders(doc)
|
707 |
+
|
708 |
+
# Format results for Gradio
|
709 |
+
results = format_results_for_gradio(
|
710 |
+
heading_valid, headings_found,
|
711 |
+
acronyms_valid, undefined_acronyms,
|
712 |
+
legal_valid, incorrect_legal_references,
|
713 |
+
table_valid, incorrect_captions,
|
714 |
+
figure_valid, incorrect_fig_captions,
|
715 |
+
references_valid, incorrect_table_figure_references,
|
716 |
+
title_style_valid, incorrect_titles,
|
717 |
+
required_headings, doc_type,
|
718 |
+
double_period_valid, incorrect_sentences,
|
719 |
+
spacing_valid, incorrect_spacing,
|
720 |
+
abbreviation_issues, date_issues,
|
721 |
+
placeholder_issues
|
722 |
+
)
|
723 |
+
|
724 |
+
return results
|
725 |
+
|
726 |
+
except Exception as e:
|
727 |
+
print(f"Error in process_document: {str(e)}")
|
728 |
+
raise
|
729 |
+
|
730 |
# Create the interface with simplified layout for Hugging Face Spaces
|
731 |
demo = gr.Blocks(title="FAA Document Checker")
|
732 |
|
|
|
784 |
inputs=[file_input, doc_type, template_type],
|
785 |
outputs=[output]
|
786 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
787 |
|
788 |
# For Hugging Face Spaces, we just need to expose the 'demo'
|
789 |
demo.launch()
|