Hoctar77 commited on
Commit
7173793
·
verified ·
1 Parent(s): 35c2a20

Changed to weasyprint

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -13,13 +13,13 @@ from typing import Dict, List, Any, Tuple, Optional, Pattern, Callable
13
  from dataclasses import dataclass
14
  from functools import wraps
15
  from abc import ABC, abstractmethod
16
- import pdfkit # Import pdfkit for PDF generation
17
  import tempfile # For creating temporary files
18
 
19
  # Third-party imports
20
  import gradio as gr
21
  from docx import Document
22
  from colorama import init, Fore, Style
 
23
 
24
  # Constants
25
  DEFAULT_PORT = 7860
@@ -354,6 +354,11 @@ class DocumentCheckerConfig:
354
  description="Ignore 'title 14, Code of Federal Regulations (14 CFR)'",
355
  is_error=False
356
  ),
 
 
 
 
 
357
  PatternConfig(
358
  pattern=r'\bUSC\b',
359
  description="USC should be U.S.C.", # Per GPO Style Manual
@@ -2783,19 +2788,15 @@ def create_interface():
2783
  # Function to generate PDF and provide it for download
2784
  def generate_pdf(html_content):
2785
  try:
2786
- # Specify the path to wkhtmltopdf
2787
- path_wkhtmltopdf = '/usr/bin/wkhtmltopdf'
2788
- config = pdfkit.configuration(wkhtmltopdf=path_wkhtmltopdf)
2789
-
2790
  # Use a temporary file to store the PDF
2791
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
2792
- # Convert HTML to PDF
2793
- pdfkit.from_string(html_content, tmp_pdf.name, configuration=config)
2794
 
2795
  # Return the path to the PDF file
2796
  return gr.update(value=tmp_pdf.name, visible=True)
2797
  except Exception as e:
2798
- logging.error(f"Error generating PDF: {str(e)}")
2799
  return gr.update(value=None, visible=False)
2800
 
2801
  # When the download button is clicked, generate the PDF
 
13
  from dataclasses import dataclass
14
  from functools import wraps
15
  from abc import ABC, abstractmethod
 
16
  import tempfile # For creating temporary files
17
 
18
  # Third-party imports
19
  import gradio as gr
20
  from docx import Document
21
  from colorama import init, Fore, Style
22
+ from weasyprint import HTML
23
 
24
  # Constants
25
  DEFAULT_PORT = 7860
 
354
  description="Ignore 'title 14, Code of Federal Regulations (14 CFR)'",
355
  is_error=False
356
  ),
357
+ PatternConfig(
358
+ pattern=r'\bAD Compliance Team \(AD CRT\)\b',
359
+ description="Ignore 'AD Compliance Team (AD CRT)'",
360
+ is_error=False
361
+ ),
362
  PatternConfig(
363
  pattern=r'\bUSC\b',
364
  description="USC should be U.S.C.", # Per GPO Style Manual
 
2788
  # Function to generate PDF and provide it for download
2789
  def generate_pdf(html_content):
2790
  try:
 
 
 
 
2791
  # Use a temporary file to store the PDF
2792
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
2793
+ # Convert HTML to PDF using WeasyPrint
2794
+ HTML(string=html_content, base_url='.').write_pdf(tmp_pdf.name)
2795
 
2796
  # Return the path to the PDF file
2797
  return gr.update(value=tmp_pdf.name, visible=True)
2798
  except Exception as e:
2799
+ logging.error(f"Error generating PDF with WeasyPrint: {str(e)}")
2800
  return gr.update(value=None, visible=False)
2801
 
2802
  # When the download button is clicked, generate the PDF