Hoctar77 commited on
Commit
b2f4d9e
·
verified ·
1 Parent(s): 4b2d07d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -126
app.py CHANGED
@@ -280,6 +280,105 @@ def get_document_checks(doc_type, template_type):
280
  # Add other document types as needed
281
  return {"required_headings": []}
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  def format_results_for_gradio(**kwargs):
284
  """Format the results for display in Gradio."""
285
  results = []
@@ -422,132 +521,6 @@ def format_results_for_gradio(**kwargs):
422
 
423
  return "\n".join(results)
424
 
425
- def process_file(file_obj, doc_type, template_type):
426
- """
427
- Process the uploaded file and return results with error handling
428
- """
429
- if file_obj is None:
430
- return "Please upload a document first."
431
-
432
- try:
433
- # Convert bytes to BytesIO object that Document can read
434
- if isinstance(file_obj, bytes):
435
- doc_bytes = io.BytesIO(file_obj)
436
- else:
437
- doc_bytes = io.BytesIO(file_obj.read())
438
-
439
- # Process the document and get results
440
- results = process_document(doc_bytes, doc_type, template_type)
441
- return results
442
-
443
- except Exception as e:
444
- error_trace = traceback.format_exc()
445
- print(f"Error processing file: {str(e)}")
446
- print(f"Full traceback: {error_trace}")
447
-
448
- error_message = f"""An error occurred while processing the document:
449
-
450
- Error: {str(e)}
451
-
452
- Please ensure:
453
- 1. The file is a valid Word document (.docx)
454
- 2. The file is not corrupted
455
- 3. The file is not password protected
456
-
457
- Technical details: {str(e)}"""
458
-
459
- return error_message
460
-
461
- def process_document(file_obj, doc_type, template_type):
462
- """Process the document and perform checks."""
463
- try:
464
- # Read the Word document
465
- doc = Document(file_obj)
466
- print("Document read successfully.")
467
-
468
- # Get required headings based on document type
469
- required_headings = get_document_checks(doc_type, template_type).get("required_headings", [])
470
-
471
- # Perform checks
472
- heading_valid, headings_found = heading_title_check(doc, required_headings)
473
- acronyms_valid, undefined_acronyms = acronym_check(doc)
474
-
475
- # Format results
476
- results = format_results_for_gradio(
477
- heading_valid=heading_valid,
478
- headings_found=headings_found,
479
- acronyms_valid=acronyms_valid,
480
- undefined_acronyms=undefined_acronyms,
481
- legal_valid=True, # Placeholder
482
- incorrect_legal_references=[],
483
- table_valid=True, # Placeholder
484
- incorrect_captions=[],
485
- figure_valid=True, # Placeholder
486
- incorrect_fig_captions=[],
487
- references_valid=True, # Placeholder
488
- incorrect_table_figure_references=[],
489
- title_style_valid=True, # Placeholder
490
- incorrect_titles=[],
491
- required_headings=required_headings,
492
- doc_type=doc_type,
493
- double_period_valid=True, # Placeholder
494
- incorrect_sentences=[],
495
- spacing_valid=True, # Placeholder
496
- incorrect_spacing=[],
497
- abbreviation_issues=[],
498
- date_issues=[],
499
- placeholder_issues=[]
500
- )
501
-
502
- return results
503
-
504
- except Exception as e:
505
- print(f"Error in process_document: {str(e)}")
506
- raise
507
-
508
- def get_document_checks(doc_type, template_type):
509
- """Return the required headings and other checks based on document type."""
510
- if doc_type == "Advisory Circular":
511
- if template_type == "Short AC template AC":
512
- return {
513
- "required_headings": ["Purpose", "Applicability", "Related Reading Material",
514
- "Background", "Discussion"]
515
- }
516
- else: # Long AC template
517
- return {
518
- "required_headings": ["Purpose", "Applicability", "Audience", "Related Reading Material",
519
- "Background", "Discussion", "Conclusion"]
520
- }
521
- # Add other document types as needed
522
- return {"required_headings": []}
523
-
524
- def format_results_for_gradio(**kwargs):
525
- """Format the results for display."""
526
- results = []
527
- results.append("# Document Check Results\n")
528
-
529
- # Required Headings Check
530
- results.append("## Required Headings Check")
531
- if kwargs['heading_valid']:
532
- results.append("✅ All required headings are present.\n")
533
- else:
534
- missing_headings = set(kwargs['required_headings']) - set(kwargs['headings_found'])
535
- results.append("❌ Missing Required Headings:")
536
- for heading in missing_headings:
537
- results.append(f"- {heading}")
538
- results.append("")
539
-
540
- # Acronym Check
541
- results.append("## Acronym Check")
542
- if kwargs['acronyms_valid']:
543
- results.append("✅ All acronyms are properly defined.\n")
544
- else:
545
- results.append("❌ The following acronyms need to be defined at first use:")
546
- for acronym in kwargs['undefined_acronyms']:
547
- results.append(f"- {acronym}")
548
-
549
- return "\n".join(results)
550
-
551
  def process_file(file_obj, doc_type, template_type):
552
  """Process the uploaded file and return results with error handling."""
553
  if file_obj is None:
 
280
  # Add other document types as needed
281
  return {"required_headings": []}
282
 
283
+ def process_file(file_obj, doc_type, template_type):
284
+ """
285
+ Process the uploaded file and return results with error handling
286
+ """
287
+ if file_obj is None:
288
+ return "Please upload a document first."
289
+
290
+ try:
291
+ # Convert bytes to BytesIO object that Document can read
292
+ if isinstance(file_obj, bytes):
293
+ doc_bytes = io.BytesIO(file_obj)
294
+ else:
295
+ doc_bytes = io.BytesIO(file_obj.read())
296
+
297
+ # Process the document and get results
298
+ results = process_document(doc_bytes, doc_type, template_type)
299
+ return results
300
+
301
+ except Exception as e:
302
+ error_trace = traceback.format_exc()
303
+ print(f"Error processing file: {str(e)}")
304
+ print(f"Full traceback: {error_trace}")
305
+
306
+ error_message = f"""An error occurred while processing the document:
307
+
308
+ Error: {str(e)}
309
+
310
+ Please ensure:
311
+ 1. The file is a valid Word document (.docx)
312
+ 2. The file is not corrupted
313
+ 3. The file is not password protected
314
+
315
+ Technical details: {str(e)}"""
316
+
317
+ return error_message
318
+
319
+ def process_document(file_obj, doc_type, template_type):
320
+ """Process the document and perform checks."""
321
+ try:
322
+ # Read the Word document
323
+ doc = Document(file_obj)
324
+ print("Document read successfully.")
325
+
326
+ # Get required headings based on document type
327
+ required_headings = get_document_checks(doc_type, template_type).get("required_headings", [])
328
+
329
+ # Perform checks
330
+ heading_valid, headings_found = heading_title_check(doc, required_headings)
331
+ acronyms_valid, undefined_acronyms = acronym_check(doc)
332
+
333
+ # Format results
334
+ results = format_results_for_gradio(
335
+ heading_valid=heading_valid,
336
+ headings_found=headings_found,
337
+ acronyms_valid=acronyms_valid,
338
+ undefined_acronyms=undefined_acronyms,
339
+ legal_valid=True, # Placeholder
340
+ incorrect_legal_references=[],
341
+ table_valid=True, # Placeholder
342
+ incorrect_captions=[],
343
+ figure_valid=True, # Placeholder
344
+ incorrect_fig_captions=[],
345
+ references_valid=True, # Placeholder
346
+ incorrect_table_figure_references=[],
347
+ title_style_valid=True, # Placeholder
348
+ incorrect_titles=[],
349
+ required_headings=required_headings,
350
+ doc_type=doc_type,
351
+ double_period_valid=True, # Placeholder
352
+ incorrect_sentences=[],
353
+ spacing_valid=True, # Placeholder
354
+ incorrect_spacing=[],
355
+ abbreviation_issues=[],
356
+ date_issues=[],
357
+ placeholder_issues=[]
358
+ )
359
+
360
+ return results
361
+
362
+ except Exception as e:
363
+ print(f"Error in process_document: {str(e)}")
364
+ raise
365
+
366
+ def get_document_checks(doc_type, template_type):
367
+ """Return the required headings and other checks based on document type."""
368
+ if doc_type == "Advisory Circular":
369
+ if template_type == "Short AC template AC":
370
+ return {
371
+ "required_headings": ["Purpose", "Applicability", "Related Reading Material",
372
+ "Background", "Discussion"]
373
+ }
374
+ else: # Long AC template
375
+ return {
376
+ "required_headings": ["Purpose", "Applicability", "Audience", "Related Reading Material",
377
+ "Background", "Discussion", "Conclusion"]
378
+ }
379
+ # Add other document types as needed
380
+ return {"required_headings": []}
381
+
382
  def format_results_for_gradio(**kwargs):
383
  """Format the results for display in Gradio."""
384
  results = []
 
521
 
522
  return "\n".join(results)
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  def process_file(file_obj, doc_type, template_type):
525
  """Process the uploaded file and return results with error handling."""
526
  if file_obj is None: