Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,6 @@ import numpy as np
|
|
17 |
###############################
|
18 |
os.system('pip uninstall -y magic-pdf')
|
19 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
20 |
-
|
21 |
-
# โ
OpenCV ์ค์น (headless ๋ฒ์ )
|
22 |
os.system('pip install opencv-python-headless')
|
23 |
|
24 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
@@ -102,6 +100,74 @@ def read_fn(path):
|
|
102 |
disk_rw = FileBasedDataReader(os.path.dirname(path))
|
103 |
return disk_rw.read(os.path.basename(path))
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
|
106 |
os.makedirs(output_dir, exist_ok=True)
|
107 |
try:
|
@@ -146,84 +212,19 @@ def image_to_base64(image_path):
|
|
146 |
|
147 |
def replace_image_with_base64(markdown_text, image_dir_path):
|
148 |
pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
|
|
|
149 |
def replace(match):
|
150 |
relative_path = match.group(1)
|
151 |
full_path = os.path.join(image_dir_path, relative_path)
|
152 |
base64_image = image_to_base64(full_path)
|
153 |
return f""
|
154 |
-
return re.sub(pattern, replace, markdown_text)
|
155 |
|
156 |
-
|
157 |
-
# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ ํจ์ (Grayscale/Binarization + Deskew)
|
158 |
-
###############################
|
159 |
-
def preprocess_image(image_path):
|
160 |
-
"""
|
161 |
-
1) Grayscale + Binarization(OTSU)
|
162 |
-
2) Deskew(๊ธฐ์ธ์ ๋ณด์ )
|
163 |
-
์ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง๋ฅผ ์์ ๊ฒฝ๋ก์ ์ ์ฅ ํ ํด๋น ๊ฒฝ๋ก๋ฅผ ๋ฐํ
|
164 |
-
"""
|
165 |
-
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
166 |
-
if img is None:
|
167 |
-
# ์ด๋ฏธ์ง ํ์ผ์ด ์๋ ๊ฒฝ์ฐ ํน์ ๋ก๋ฉ ์คํจ ์ ์๋ณธ ๊ฒฝ๋ก ๊ทธ๋๋ก ๋ฐํ
|
168 |
-
return image_path
|
169 |
-
|
170 |
-
# (a) ์ด์งํ(binarization)
|
171 |
-
_, img_bin = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
|
172 |
-
|
173 |
-
# (b) ๊ธฐ์ธ์ ๋ณด์ (deskew)
|
174 |
-
coords = np.column_stack(np.where(img_bin > 0))
|
175 |
-
angle = cv2.minAreaRect(coords)[-1]
|
176 |
-
# OpenCV๋ ํ์ ๊ฐ๋๋ฅผ [-90, 0)๋ก ๋ฐํํ ๋๊ฐ ๋ง์ผ๋ฏ๋ก ๋ณด์
|
177 |
-
if angle < -45:
|
178 |
-
angle = -(90 + angle)
|
179 |
-
else:
|
180 |
-
angle = -angle
|
181 |
-
|
182 |
-
(h, w) = img_bin.shape[:2]
|
183 |
-
center = (w // 2, h // 2)
|
184 |
-
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
185 |
-
img_rotated = cv2.warpAffine(
|
186 |
-
img_bin,
|
187 |
-
M,
|
188 |
-
(w, h),
|
189 |
-
flags=cv2.INTER_CUBIC,
|
190 |
-
borderMode=cv2.BORDER_CONSTANT,
|
191 |
-
borderValue=255
|
192 |
-
)
|
193 |
-
|
194 |
-
# ์์ ํ์ผ๋ก ์ ์ฅ
|
195 |
-
preprocessed_path = image_path + "_preprocessed.png"
|
196 |
-
cv2.imwrite(preprocessed_path, img_rotated)
|
197 |
-
|
198 |
-
return preprocessed_path
|
199 |
-
|
200 |
-
def to_pdf(file_path):
|
201 |
-
"""
|
202 |
-
์ด๋ฏธ์ง(JPG/PNG ๋ฑ)๋ฅผ PDF๋ก ์ปจ๋ฒํ
ํ๋,
|
203 |
-
์ด๋ฏธ์ง์ผ ๊ฒฝ์ฐ ์ ์ฒ๋ฆฌ(Grayscale/Binarization + Deskew)๋ฅผ ๋จผ์ ์ ์ฉ
|
204 |
-
"""
|
205 |
-
with pymupdf.open(file_path) as f:
|
206 |
-
if f.is_pdf:
|
207 |
-
return file_path
|
208 |
-
else:
|
209 |
-
# ์ด๋ฏธ์ง ํ์ผ์ธ ๊ฒฝ์ฐ, ์ ์ฒ๋ฆฌ ์ํ ํ PDF ์์ฑ
|
210 |
-
f.close()
|
211 |
-
preprocessed_path = preprocess_image(file_path)
|
212 |
-
|
213 |
-
# ์ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง๋ฅผ ๋ค์ PyMuPDF๋ก ์ด์ด์ PDF ๋ณํ
|
214 |
-
with pymupdf.open(preprocessed_path) as img_doc:
|
215 |
-
pdf_bytes = img_doc.convert_to_pdf()
|
216 |
-
|
217 |
-
unique_filename = f"{uuid.uuid4()}.pdf"
|
218 |
-
tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
|
219 |
-
with open(tmp_file_path, 'wb') as tmp_pdf_file:
|
220 |
-
tmp_pdf_file.write(pdf_bytes)
|
221 |
-
return tmp_file_path
|
222 |
|
223 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
224 |
"""
|
225 |
์
๋ก๋๋ PDF/์ด๋ฏธ์ง -> PDF ๋ณํ -> ๋งํฌ๋ค์ด ๋ณํ
|
226 |
-
(ํ๋ก๊ทธ๋ ์ค
|
227 |
"""
|
228 |
progress(0, "PDF๋ก ๋ณํ ์ค...")
|
229 |
file_path = to_pdf(file_path)
|
@@ -347,7 +348,6 @@ def convert_chat_messages_to_gradio_format(messages):
|
|
347 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
348 |
"""
|
349 |
Gemini ์๋ต ์คํธ๋ฆฌ๋ฐ
|
350 |
-
(user_message๊ฐ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋ก ๋์ฒด)
|
351 |
"""
|
352 |
if not user_message.strip():
|
353 |
user_message = "...(No content from user)..."
|
@@ -376,7 +376,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
|
376 |
parts = chunk.candidates[0].content.parts
|
377 |
current_chunk = parts[0].text
|
378 |
|
379 |
-
# ๋ง์ฝ parts
|
380 |
if len(parts) == 2 and not thinking_complete:
|
381 |
thought_buffer += current_chunk
|
382 |
messages[-1] = ChatMessage(
|
@@ -413,7 +413,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
|
413 |
|
414 |
def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
415 |
"""
|
416 |
-
doc_text(๋งํฌ๋ค์ด) ์ฌ์ฉํด
|
417 |
"""
|
418 |
if doc_text.strip():
|
419 |
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
@@ -435,7 +435,6 @@ def reset_states(_):
|
|
435 |
###############################
|
436 |
# (2) OCR FLEX ์ ์ฉ (์ค๋ํซ)
|
437 |
###############################
|
438 |
-
# ๋ณ๋์ LaTeX ์ค์
|
439 |
latex_delimiters = [
|
440 |
{"left": "$$", "right": "$$", "display": True},
|
441 |
{"left": '$', "right": '$', "display": False}
|
@@ -444,8 +443,7 @@ latex_delimiters = [
|
|
444 |
def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
445 |
"""
|
446 |
์ค๋ํซ์์ ์ฌ์ฉ:
|
447 |
-
์
๋ก๋๋ PDF/์ด๋ฏธ์ง -> PDF ๋ณํ -> ๋งํฌ๋ค์ด ๋ณํ
|
448 |
-
(๋งํฌ๋ค์ด ๋ ๋๋ง / ๋งํฌ๋ค์ด ํ
์คํธ / ์์ถํ์ผ / PDF๋ฏธ๋ฆฌ๋ณด๊ธฐ) ๋ฐํ
|
449 |
"""
|
450 |
file_path = to_pdf(file_path)
|
451 |
if end_pages > 20:
|
@@ -475,11 +473,10 @@ def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enab
|
|
475 |
###############################
|
476 |
if __name__ == "__main__":
|
477 |
with gr.Blocks(title="VisionOCR", css=create_css()) as demo:
|
478 |
-
# ํญ ์์ญ
|
479 |
with gr.Tabs():
|
480 |
-
|
481 |
# Tab (1) : PDF -> Markdown ๋ณํ + Chat
|
482 |
-
|
483 |
with gr.Tab("PDF Chat with LLM"):
|
484 |
gr.HTML("""
|
485 |
<div class="title-area">
|
@@ -491,21 +488,19 @@ if __name__ == "__main__":
|
|
491 |
md_state = gr.State("") # ๋ณํ๋ ๋งํฌ๋ค์ด ํ
์คํธ
|
492 |
chat_history = gr.State([]) # ChatMessage ๋ฆฌ์คํธ
|
493 |
|
494 |
-
# ์
๋ก๋ & ๋ณํ
|
495 |
with gr.Row():
|
496 |
file = gr.File(label="PDF/์ด๋ฏธ์ง ์
๋ก๋", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
|
497 |
convert_btn = gr.Button("๋ณํํ๊ธฐ")
|
498 |
|
499 |
chatbot = gr.Chatbot(height=600)
|
500 |
|
501 |
-
# ์ ํ์ผ ์
๋ก๋ ์: ์ด์ ๋ํ/๋งํฌ๋ค์ด/์ฑ๋ด ์ด๊ธฐํ
|
502 |
file.change(
|
503 |
fn=reset_states,
|
504 |
inputs=file,
|
505 |
outputs=[chat_history, md_state, chatbot]
|
506 |
)
|
507 |
|
508 |
-
# ์จ๊น
|
509 |
max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
|
510 |
layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
|
511 |
language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
|
@@ -520,7 +515,6 @@ if __name__ == "__main__":
|
|
520 |
show_progress=True
|
521 |
)
|
522 |
|
523 |
-
# Gemini Chat
|
524 |
gr.Markdown("## ์ถ๋ก LLM๊ณผ ๋ํ")
|
525 |
|
526 |
with gr.Row():
|
@@ -546,9 +540,9 @@ if __name__ == "__main__":
|
|
546 |
outputs=[chat_history, md_state, chatbot]
|
547 |
)
|
548 |
|
549 |
-
|
550 |
-
# Tab (2) : OCR FLEX
|
551 |
-
|
552 |
with gr.Tab("OCR FLEX"):
|
553 |
gr.HTML("""
|
554 |
<div class="title-area">
|
@@ -558,87 +552,38 @@ if __name__ == "__main__":
|
|
558 |
""")
|
559 |
|
560 |
with gr.Row():
|
561 |
-
# ์ผ์ชฝ ํจ๋
|
562 |
with gr.Column(variant='panel', scale=5):
|
563 |
-
file_ocr = gr.File(
|
564 |
-
|
565 |
-
file_types=[".pdf", ".png", ".jpeg", ".jpg"]
|
566 |
-
)
|
567 |
-
|
568 |
-
max_pages_ocr = gr.Slider(
|
569 |
-
1, 20, 10,
|
570 |
-
step=1,
|
571 |
-
label='์ต๋ ๋ณํ ํ์ด์ง ์'
|
572 |
-
)
|
573 |
|
574 |
with gr.Row():
|
575 |
-
layout_mode_ocr = gr.Dropdown(
|
576 |
-
|
577 |
-
label="๋ ์ด์์ ๋ชจ๋ธ",
|
578 |
-
value="doclayout_yolo"
|
579 |
-
)
|
580 |
-
language_ocr = gr.Dropdown(
|
581 |
-
all_lang,
|
582 |
-
label="์ธ์ด",
|
583 |
-
value='auto'
|
584 |
-
)
|
585 |
|
586 |
with gr.Row():
|
587 |
-
formula_enable_ocr = gr.Checkbox(
|
588 |
-
|
589 |
-
|
590 |
-
)
|
591 |
-
is_ocr_ocr = gr.Checkbox(
|
592 |
-
label="OCR ๊ฐ์ ํ์ฑํ",
|
593 |
-
value=False
|
594 |
-
)
|
595 |
-
table_enable_ocr = gr.Checkbox(
|
596 |
-
label="ํ ์ธ์ ํ์ฑํ(ํ
์คํธ)",
|
597 |
-
value=True
|
598 |
-
)
|
599 |
|
600 |
with gr.Row():
|
601 |
change_bu_ocr = gr.Button("๋ณํ")
|
602 |
-
|
603 |
-
# โ
ClearButton ์์ โ
|
604 |
-
# ์ฒซ ๋ฒ์งธ ์ธ์ -> clearํ ๋์(์ปดํฌ๋ํธ),
|
605 |
-
# ๋ฒํผ์ ํ์๋ ํ
์คํธ๋ value="์ด๊ธฐํ"
|
606 |
clear_bu_ocr = gr.ClearButton(
|
607 |
-
components=[
|
608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
609 |
value="์ด๊ธฐํ"
|
610 |
)
|
611 |
|
612 |
-
pdf_show_ocr = PDF(
|
613 |
-
label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
|
614 |
-
interactive=False,
|
615 |
-
visible=True,
|
616 |
-
height=800
|
617 |
-
)
|
618 |
-
|
619 |
-
# ์์ ํด๋๊ฐ ์๋ค๋ฉด ์ฌ์ฉ (์ค์ ์คํํ๊ฒฝ์ ๋ฐ๋ผ ์ฃผ์)
|
620 |
-
with gr.Accordion("์์ :", open=False):
|
621 |
-
example_root = (
|
622 |
-
os.path.join(os.path.dirname(__file__), "examples")
|
623 |
-
if "__file__" in globals() else "./examples"
|
624 |
-
)
|
625 |
-
if os.path.exists(example_root):
|
626 |
-
gr.Examples(
|
627 |
-
examples=[
|
628 |
-
os.path.join(example_root, _) for _ in os.listdir(example_root)
|
629 |
-
if _.endswith("pdf")
|
630 |
-
],
|
631 |
-
inputs=file_ocr
|
632 |
-
)
|
633 |
-
else:
|
634 |
-
gr.Markdown("์์ ํด๋๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.")
|
635 |
|
636 |
-
# ์ค๋ฅธ์ชฝ ํจ๋
|
637 |
with gr.Column(variant='panel', scale=5):
|
638 |
-
output_file_ocr = gr.File(
|
639 |
-
label="๋ณํ ๊ฒฐ๊ณผ",
|
640 |
-
interactive=False
|
641 |
-
)
|
642 |
|
643 |
with gr.Tabs():
|
644 |
with gr.Tab("๋งํฌ๋ค์ด ๋ ๋๋ง"):
|
@@ -646,17 +591,13 @@ if __name__ == "__main__":
|
|
646 |
label="๋งํฌ๋ค์ด ๋ ๋๋ง",
|
647 |
height=1100,
|
648 |
show_copy_button=True,
|
649 |
-
|
650 |
-
|
651 |
)
|
652 |
|
653 |
with gr.Tab("๋งํฌ๋ค์ด ํ
์คํธ"):
|
654 |
-
md_text_ocr = gr.TextArea(
|
655 |
-
lines=45,
|
656 |
-
show_copy_button=True
|
657 |
-
)
|
658 |
|
659 |
-
# ์ด๋ฒคํธ ํธ๋ค๋ฌ (OCR FLEX)
|
660 |
file_ocr.change(
|
661 |
fn=to_pdf,
|
662 |
inputs=file_ocr,
|
@@ -685,5 +626,4 @@ if __name__ == "__main__":
|
|
685 |
]
|
686 |
)
|
687 |
|
688 |
-
# ์ ์ฒด ์ฑ ์คํ
|
689 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)
|
|
|
17 |
###############################
|
18 |
os.system('pip uninstall -y magic-pdf')
|
19 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
|
|
|
|
20 |
os.system('pip install opencv-python-headless')
|
21 |
|
22 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
|
|
100 |
disk_rw = FileBasedDataReader(os.path.dirname(path))
|
101 |
return disk_rw.read(os.path.basename(path))
|
102 |
|
103 |
+
###############################
|
104 |
+
# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ ํจ์ (์ด์งํ + Deskew)
|
105 |
+
###############################
|
106 |
+
def preprocess_image(image_path):
|
107 |
+
"""
|
108 |
+
1) Grayscale + Otsu Binarization
|
109 |
+
2) Deskew(๊ธฐ์ธ์ ๋ณด์ )
|
110 |
+
"""
|
111 |
+
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
112 |
+
if img is None:
|
113 |
+
# ์ด๋ฏธ์ง๊ฐ ์๋๊ฑฐ๋ ๋ก๋ฉ ์คํจ ์ ๊ทธ๋๋ก ๋ฐํ
|
114 |
+
return image_path
|
115 |
+
|
116 |
+
# (a) ์ด์งํ(Otsu)
|
117 |
+
_, img_bin = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
|
118 |
+
|
119 |
+
# (b) ๊ธฐ์ธ์ ๋ณด์ (deskew)
|
120 |
+
coords = np.column_stack(np.where(img_bin > 0))
|
121 |
+
angle = cv2.minAreaRect(coords)[-1]
|
122 |
+
if angle < -45:
|
123 |
+
angle = -(90 + angle)
|
124 |
+
else:
|
125 |
+
angle = -angle
|
126 |
+
|
127 |
+
(h, w) = img_bin.shape[:2]
|
128 |
+
center = (w // 2, h // 2)
|
129 |
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
130 |
+
img_rotated = cv2.warpAffine(
|
131 |
+
img_bin, M, (w, h),
|
132 |
+
flags=cv2.INTER_CUBIC,
|
133 |
+
borderMode=cv2.BORDER_CONSTANT,
|
134 |
+
borderValue=255
|
135 |
+
)
|
136 |
+
|
137 |
+
# ์์ ํ์ผ๋ก ์ ์ฅ
|
138 |
+
preprocessed_path = image_path + "_preprocessed.png"
|
139 |
+
cv2.imwrite(preprocessed_path, img_rotated)
|
140 |
+
return preprocessed_path
|
141 |
+
|
142 |
+
###############################
|
143 |
+
# PDF or Image -> PDF ๋ณํ
|
144 |
+
# (๋ถํ์ํ f.close() ์ ๊ฑฐ)
|
145 |
+
###############################
|
146 |
+
def to_pdf(file_path):
|
147 |
+
"""
|
148 |
+
์ด๋ฏธ์ง(JPG/PNG ๋ฑ)๋ผ๋ฉด ์ ์ฒ๋ฆฌ ํ PDF๋ก ๋ณํ.
|
149 |
+
์ด๋ฏธ PDF๋ผ๋ฉด ๊ทธ๋๋ก ๋ฐํ.
|
150 |
+
"""
|
151 |
+
with pymupdf.open(file_path) as f:
|
152 |
+
# PDF์ธ ๊ฒฝ์ฐ
|
153 |
+
if f.is_pdf:
|
154 |
+
return file_path
|
155 |
+
# ์ด๋ฏธ์ง ํ์ผ์ธ ๊ฒฝ์ฐ
|
156 |
+
# (์ค์ฒฉ๋ with๋ฌธ์ ๋ค์ด๊ฐ๊ธฐ ์ , ๊ทธ๋ฅ ๋ธ๋ก์ด ๋๋๋ฉด ์๋ close)
|
157 |
+
# ๋ธ๋ก ์ข
๋ฃ ์ f๋ ์ด๋ฏธ close๋จ.
|
158 |
+
|
159 |
+
# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ -> ์ ์ด๋ฏธ์ง -> PDF ๋ณํ
|
160 |
+
preprocessed_path = preprocess_image(file_path)
|
161 |
+
with pymupdf.open(preprocessed_path) as img_doc:
|
162 |
+
pdf_bytes = img_doc.convert_to_pdf()
|
163 |
+
|
164 |
+
unique_filename = f"{uuid.uuid4()}.pdf"
|
165 |
+
tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
|
166 |
+
with open(tmp_file_path, 'wb') as tmp_pdf_file:
|
167 |
+
tmp_pdf_file.write(pdf_bytes)
|
168 |
+
|
169 |
+
return tmp_file_path
|
170 |
+
|
171 |
def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
|
172 |
os.makedirs(output_dir, exist_ok=True)
|
173 |
try:
|
|
|
212 |
|
213 |
def replace_image_with_base64(markdown_text, image_dir_path):
|
214 |
pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
|
215 |
+
|
216 |
def replace(match):
|
217 |
relative_path = match.group(1)
|
218 |
full_path = os.path.join(image_dir_path, relative_path)
|
219 |
base64_image = image_to_base64(full_path)
|
220 |
return f""
|
|
|
221 |
|
222 |
+
return re.sub(pattern, replace, markdown_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
225 |
"""
|
226 |
์
๋ก๋๋ PDF/์ด๋ฏธ์ง -> PDF ๋ณํ -> ๋งํฌ๋ค์ด ๋ณํ
|
227 |
+
(ํ๋ก๊ทธ๋ ์ค ํ์)
|
228 |
"""
|
229 |
progress(0, "PDF๋ก ๋ณํ ์ค...")
|
230 |
file_path = to_pdf(file_path)
|
|
|
348 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
349 |
"""
|
350 |
Gemini ์๋ต ์คํธ๋ฆฌ๋ฐ
|
|
|
351 |
"""
|
352 |
if not user_message.strip():
|
353 |
user_message = "...(No content from user)..."
|
|
|
376 |
parts = chunk.candidates[0].content.parts
|
377 |
current_chunk = parts[0].text
|
378 |
|
379 |
+
# ๋ง์ฝ parts๊ฐ 2๊ฐ์ด๋ฉด [thinking, ์ต์ข
๋ต๋ณ]
|
380 |
if len(parts) == 2 and not thinking_complete:
|
381 |
thought_buffer += current_chunk
|
382 |
messages[-1] = ChatMessage(
|
|
|
413 |
|
414 |
def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
415 |
"""
|
416 |
+
doc_text(๋งํฌ๋ค์ด) ์ฌ์ฉํด ์ง๋ฌธ์ ์ด์ง ๋ณํ
|
417 |
"""
|
418 |
if doc_text.strip():
|
419 |
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
|
|
435 |
###############################
|
436 |
# (2) OCR FLEX ์ ์ฉ (์ค๋ํซ)
|
437 |
###############################
|
|
|
438 |
latex_delimiters = [
|
439 |
{"left": "$$", "right": "$$", "display": True},
|
440 |
{"left": '$', "right": '$', "display": False}
|
|
|
443 |
def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
444 |
"""
|
445 |
์ค๋ํซ์์ ์ฌ์ฉ:
|
446 |
+
์
๋ก๋๋ PDF/์ด๋ฏธ์ง -> ์ ์ฒ๋ฆฌ -> PDF ๋ณํ -> ๋งํฌ๋ค์ด ๋ณํ
|
|
|
447 |
"""
|
448 |
file_path = to_pdf(file_path)
|
449 |
if end_pages > 20:
|
|
|
473 |
###############################
|
474 |
if __name__ == "__main__":
|
475 |
with gr.Blocks(title="VisionOCR", css=create_css()) as demo:
|
|
|
476 |
with gr.Tabs():
|
477 |
+
###############################
|
478 |
# Tab (1) : PDF -> Markdown ๋ณํ + Chat
|
479 |
+
###############################
|
480 |
with gr.Tab("PDF Chat with LLM"):
|
481 |
gr.HTML("""
|
482 |
<div class="title-area">
|
|
|
488 |
md_state = gr.State("") # ๋ณํ๋ ๋งํฌ๋ค์ด ํ
์คํธ
|
489 |
chat_history = gr.State([]) # ChatMessage ๋ฆฌ์คํธ
|
490 |
|
|
|
491 |
with gr.Row():
|
492 |
file = gr.File(label="PDF/์ด๋ฏธ์ง ์
๋ก๋", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
|
493 |
convert_btn = gr.Button("๋ณํํ๊ธฐ")
|
494 |
|
495 |
chatbot = gr.Chatbot(height=600)
|
496 |
|
|
|
497 |
file.change(
|
498 |
fn=reset_states,
|
499 |
inputs=file,
|
500 |
outputs=[chat_history, md_state, chatbot]
|
501 |
)
|
502 |
|
503 |
+
# ์จ๊น ์ต์
๋ค
|
504 |
max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
|
505 |
layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
|
506 |
language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
|
|
|
515 |
show_progress=True
|
516 |
)
|
517 |
|
|
|
518 |
gr.Markdown("## ์ถ๋ก LLM๊ณผ ๋ํ")
|
519 |
|
520 |
with gr.Row():
|
|
|
540 |
outputs=[chat_history, md_state, chatbot]
|
541 |
)
|
542 |
|
543 |
+
###############################
|
544 |
+
# Tab (2) : OCR FLEX
|
545 |
+
###############################
|
546 |
with gr.Tab("OCR FLEX"):
|
547 |
gr.HTML("""
|
548 |
<div class="title-area">
|
|
|
552 |
""")
|
553 |
|
554 |
with gr.Row():
|
|
|
555 |
with gr.Column(variant='panel', scale=5):
|
556 |
+
file_ocr = gr.File(label="PDF ๋๋ ์ด๋ฏธ์ง ํ์ผ", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
|
557 |
+
max_pages_ocr = gr.Slider(1, 20, 10, step=1, label='์ต๋ ๋ณํ ํ์ด์ง ์')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
|
559 |
with gr.Row():
|
560 |
+
layout_mode_ocr = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="๋ ์ด์์ ๋ชจ๋ธ", value="doclayout_yolo")
|
561 |
+
language_ocr = gr.Dropdown(all_lang, label="์ธ์ด", value='auto')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
with gr.Row():
|
564 |
+
formula_enable_ocr = gr.Checkbox(label="์์ ์ธ์ ํ์ฑํ", value=True)
|
565 |
+
is_ocr_ocr = gr.Checkbox(label="OCR ๊ฐ์ ํ์ฑํ", value=False)
|
566 |
+
table_enable_ocr = gr.Checkbox(label="ํ ์ธ์ ํ์ฑํ(ํ
์คํธ)", value=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
|
568 |
with gr.Row():
|
569 |
change_bu_ocr = gr.Button("๋ณํ")
|
|
|
|
|
|
|
|
|
570 |
clear_bu_ocr = gr.ClearButton(
|
571 |
+
components=[
|
572 |
+
file_ocr,
|
573 |
+
max_pages_ocr,
|
574 |
+
layout_mode_ocr,
|
575 |
+
language_ocr,
|
576 |
+
formula_enable_ocr,
|
577 |
+
is_ocr_ocr,
|
578 |
+
table_enable_ocr
|
579 |
+
],
|
580 |
value="์ด๊ธฐํ"
|
581 |
)
|
582 |
|
583 |
+
pdf_show_ocr = PDF(label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ', interactive=False, visible=True, height=800)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
|
|
|
585 |
with gr.Column(variant='panel', scale=5):
|
586 |
+
output_file_ocr = gr.File(label="๋ณํ ๊ฒฐ๊ณผ", interactive=False)
|
|
|
|
|
|
|
587 |
|
588 |
with gr.Tabs():
|
589 |
with gr.Tab("๋งํฌ๋ค์ด ๋ ๋๋ง"):
|
|
|
591 |
label="๋งํฌ๋ค์ด ๋ ๋๋ง",
|
592 |
height=1100,
|
593 |
show_copy_button=True,
|
594 |
+
line_breaks=True,
|
595 |
+
latex_delimiters=latex_delimiters
|
596 |
)
|
597 |
|
598 |
with gr.Tab("๋งํฌ๋ค์ด ํ
์คํธ"):
|
599 |
+
md_text_ocr = gr.TextArea(lines=45, show_copy_button=True)
|
|
|
|
|
|
|
600 |
|
|
|
601 |
file_ocr.change(
|
602 |
fn=to_pdf,
|
603 |
inputs=file_ocr,
|
|
|
626 |
]
|
627 |
)
|
628 |
|
|
|
629 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)
|