Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -150,43 +150,40 @@ def to_pdf(file_path):
|
|
150 |
return tmp_file_path
|
151 |
|
152 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
153 |
-
""
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
txt_content = f.read()
|
182 |
-
time.sleep(0.5)
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
time.sleep(0.5)
|
187 |
|
188 |
-
progress(100, "λ³ν μλ£!")
|
189 |
-
return md_content
|
190 |
|
191 |
def init_model():
|
192 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
|
|
150 |
return tmp_file_path
|
151 |
|
152 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
153 |
+
progress(0, "νμΌμ PDFλ‘ λ³ν μ€...")
|
154 |
+
file_path = to_pdf(file_path)
|
155 |
+
time.sleep(0.5)
|
156 |
+
|
157 |
+
if end_pages > 20:
|
158 |
+
end_pages = 20
|
159 |
+
|
160 |
+
progress(30, "PDF νμ± μ€...")
|
161 |
+
local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
|
162 |
+
layout_mode, formula_enable, table_enable, language)
|
163 |
+
time.sleep(0.5)
|
164 |
+
|
165 |
+
progress(50, "μμΆ(Zip) μμ± μ€...")
|
166 |
+
archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
|
167 |
+
zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
|
168 |
+
if zip_archive_success == 0:
|
169 |
+
logger.info("μμΆ μ±κ³΅")
|
170 |
+
else:
|
171 |
+
logger.error("μμΆ μ€ν¨")
|
172 |
+
time.sleep(0.5)
|
173 |
+
|
174 |
+
progress(70, "λ§ν¬λ€μ΄ μ½λ μ€...")
|
175 |
+
md_path = os.path.join(local_md_dir, file_name + ".md")
|
176 |
+
with open(md_path, 'r', encoding='utf-8') as f:
|
177 |
+
txt_content = f.read()
|
178 |
+
time.sleep(0.5)
|
179 |
|
180 |
+
progress(90, "μ΄λ―Έμ§ base64 λ³ν μ€...")
|
181 |
+
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
182 |
+
time.sleep(0.5)
|
|
|
|
|
183 |
|
184 |
+
progress(100, "λ³ν μλ£!")
|
185 |
+
return md_content
|
|
|
186 |
|
|
|
|
|
187 |
|
188 |
def init_model():
|
189 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|