openfree commited on
Commit
ed3742a
ยท
verified ยท
1 Parent(s): 58b1a3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -161
app.py CHANGED
@@ -17,8 +17,6 @@ import numpy as np
17
  ###############################
18
  os.system('pip uninstall -y magic-pdf')
19
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
20
-
21
- # โ˜… OpenCV ์„ค์น˜ (headless ๋ฒ„์ „)
22
  os.system('pip install opencv-python-headless')
23
 
24
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
@@ -102,6 +100,74 @@ def read_fn(path):
102
  disk_rw = FileBasedDataReader(os.path.dirname(path))
103
  return disk_rw.read(os.path.basename(path))
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
106
  os.makedirs(output_dir, exist_ok=True)
107
  try:
@@ -146,84 +212,19 @@ def image_to_base64(image_path):
146
 
147
  def replace_image_with_base64(markdown_text, image_dir_path):
148
  pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
 
149
  def replace(match):
150
  relative_path = match.group(1)
151
  full_path = os.path.join(image_dir_path, relative_path)
152
  base64_image = image_to_base64(full_path)
153
  return f"![{relative_path}](data:image/jpeg;base64,{base64_image})"
154
- return re.sub(pattern, replace, markdown_text)
155
 
156
- ###############################
157
- # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (Grayscale/Binarization + Deskew)
158
- ###############################
159
- def preprocess_image(image_path):
160
- """
161
- 1) Grayscale + Binarization(OTSU)
162
- 2) Deskew(๊ธฐ์šธ์ž„ ๋ณด์ •)
163
- ์ „์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€๋ฅผ ์ž„์‹œ ๊ฒฝ๋กœ์— ์ €์žฅ ํ›„ ํ•ด๋‹น ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜
164
- """
165
- img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
166
- if img is None:
167
- # ์ด๋ฏธ์ง€ ํŒŒ์ผ์ด ์•„๋‹Œ ๊ฒฝ์šฐ ํ˜น์€ ๋กœ๋”ฉ ์‹คํŒจ ์‹œ ์›๋ณธ ๊ฒฝ๋กœ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
168
- return image_path
169
-
170
- # (a) ์ด์ง„ํ™”(binarization)
171
- _, img_bin = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
172
-
173
- # (b) ๊ธฐ์šธ์ž„ ๋ณด์ •(deskew)
174
- coords = np.column_stack(np.where(img_bin > 0))
175
- angle = cv2.minAreaRect(coords)[-1]
176
- # OpenCV๋Š” ํšŒ์ „ ๊ฐ๋„๋ฅผ [-90, 0)๋กœ ๋ฐ˜ํ™˜ํ•  ๋•Œ๊ฐ€ ๋งŽ์œผ๋ฏ€๋กœ ๋ณด์ •
177
- if angle < -45:
178
- angle = -(90 + angle)
179
- else:
180
- angle = -angle
181
-
182
- (h, w) = img_bin.shape[:2]
183
- center = (w // 2, h // 2)
184
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
185
- img_rotated = cv2.warpAffine(
186
- img_bin,
187
- M,
188
- (w, h),
189
- flags=cv2.INTER_CUBIC,
190
- borderMode=cv2.BORDER_CONSTANT,
191
- borderValue=255
192
- )
193
-
194
- # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
195
- preprocessed_path = image_path + "_preprocessed.png"
196
- cv2.imwrite(preprocessed_path, img_rotated)
197
-
198
- return preprocessed_path
199
-
200
- def to_pdf(file_path):
201
- """
202
- ์ด๋ฏธ์ง€(JPG/PNG ๋“ฑ)๋ฅผ PDF๋กœ ์ปจ๋ฒ„ํŒ…ํ•˜๋˜,
203
- ์ด๋ฏธ์ง€์ผ ๊ฒฝ์šฐ ์ „์ฒ˜๋ฆฌ(Grayscale/Binarization + Deskew)๋ฅผ ๋จผ์ € ์ ์šฉ
204
- """
205
- with pymupdf.open(file_path) as f:
206
- if f.is_pdf:
207
- return file_path
208
- else:
209
- # ์ด๋ฏธ์ง€ ํŒŒ์ผ์ธ ๊ฒฝ์šฐ, ์ „์ฒ˜๋ฆฌ ์ˆ˜ํ–‰ ํ›„ PDF ์ƒ์„ฑ
210
- f.close()
211
- preprocessed_path = preprocess_image(file_path)
212
-
213
- # ์ „์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€๋ฅผ ๋‹ค์‹œ PyMuPDF๋กœ ์—ด์–ด์„œ PDF ๋ณ€ํ™˜
214
- with pymupdf.open(preprocessed_path) as img_doc:
215
- pdf_bytes = img_doc.convert_to_pdf()
216
-
217
- unique_filename = f"{uuid.uuid4()}.pdf"
218
- tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
219
- with open(tmp_file_path, 'wb') as tmp_pdf_file:
220
- tmp_pdf_file.write(pdf_bytes)
221
- return tmp_file_path
222
 
223
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
224
  """
225
  ์—…๋กœ๋“œ๋œ PDF/์ด๋ฏธ์ง€ -> PDF ๋ณ€ํ™˜ -> ๋งˆํฌ๋‹ค์šด ๋ณ€ํ™˜
226
- (ํ”„๋กœ๊ทธ๋ ˆ์Šค ๋ฐ” ํ‘œ์‹œ์šฉ)
227
  """
228
  progress(0, "PDF๋กœ ๋ณ€ํ™˜ ์ค‘...")
229
  file_path = to_pdf(file_path)
@@ -347,7 +348,6 @@ def convert_chat_messages_to_gradio_format(messages):
347
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
348
  """
349
  Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
350
- (user_message๊ฐ€ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๋Œ€์ฒด)
351
  """
352
  if not user_message.strip():
353
  user_message = "...(No content from user)..."
@@ -376,7 +376,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
376
  parts = chunk.candidates[0].content.parts
377
  current_chunk = parts[0].text
378
 
379
- # ๋งŒ์•ฝ parts ๊ฐ€ 2๊ฐœ๋ผ๋ฉด, parts[0]๋Š” thinking, parts[1]์€ ์ตœ์ข…๋‹ต๋ณ€
380
  if len(parts) == 2 and not thinking_complete:
381
  thought_buffer += current_chunk
382
  messages[-1] = ChatMessage(
@@ -413,7 +413,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
413
 
414
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
415
  """
416
- doc_text(๋งˆํฌ๋‹ค์šด) ์‚ฌ์šฉํ•ด ์งˆ๋ฌธ ์ž๋™ ๋ณ€ํ˜•
417
  """
418
  if doc_text.strip():
419
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
@@ -435,7 +435,6 @@ def reset_states(_):
435
  ###############################
436
  # (2) OCR FLEX ์ „์šฉ (์Šค๋‹ˆํŽซ)
437
  ###############################
438
- # ๋ณ„๋„์˜ LaTeX ์„ค์ •
439
  latex_delimiters = [
440
  {"left": "$$", "right": "$$", "display": True},
441
  {"left": '$', "right": '$', "display": False}
@@ -444,8 +443,7 @@ latex_delimiters = [
444
  def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
445
  """
446
  ์Šค๋‹ˆํŽซ์—์„œ ์‚ฌ์šฉ:
447
- ์—…๋กœ๋“œ๋œ PDF/์ด๋ฏธ์ง€ -> PDF ๋ณ€ํ™˜ -> ๋งˆํฌ๋‹ค์šด ๋ณ€ํ™˜
448
- (๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง / ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ / ์••์ถ•ํŒŒ์ผ / PDF๋ฏธ๋ฆฌ๋ณด๊ธฐ) ๋ฐ˜ํ™˜
449
  """
450
  file_path = to_pdf(file_path)
451
  if end_pages > 20:
@@ -475,11 +473,10 @@ def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enab
475
  ###############################
476
  if __name__ == "__main__":
477
  with gr.Blocks(title="VisionOCR", css=create_css()) as demo:
478
- # ํƒญ ์˜์—ญ
479
  with gr.Tabs():
480
- #########################################################
481
  # Tab (1) : PDF -> Markdown ๋ณ€ํ™˜ + Chat
482
- #########################################################
483
  with gr.Tab("PDF Chat with LLM"):
484
  gr.HTML("""
485
  <div class="title-area">
@@ -491,21 +488,19 @@ if __name__ == "__main__":
491
  md_state = gr.State("") # ๋ณ€ํ™˜๋œ ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
492
  chat_history = gr.State([]) # ChatMessage ๋ฆฌ์ŠคํŠธ
493
 
494
- # ์—…๋กœ๋“œ & ๋ณ€ํ™˜
495
  with gr.Row():
496
  file = gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
497
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
498
 
499
  chatbot = gr.Chatbot(height=600)
500
 
501
- # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ: ์ด์ „ ๋Œ€ํ™”/๋งˆํฌ๋‹ค์šด/์ฑ—๋ด‡ ์ดˆ๊ธฐํ™”
502
  file.change(
503
  fn=reset_states,
504
  inputs=file,
505
  outputs=[chat_history, md_state, chatbot]
506
  )
507
 
508
- # ์ˆจ๊น€ ์š”์†Œ๋“ค
509
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
510
  layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
511
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
@@ -520,7 +515,6 @@ if __name__ == "__main__":
520
  show_progress=True
521
  )
522
 
523
- # Gemini Chat
524
  gr.Markdown("## ์ถ”๋ก  LLM๊ณผ ๋Œ€ํ™”")
525
 
526
  with gr.Row():
@@ -546,9 +540,9 @@ if __name__ == "__main__":
546
  outputs=[chat_history, md_state, chatbot]
547
  )
548
 
549
- #########################################################
550
- # Tab (2) : OCR FLEX (์Šค๋‹ˆํŽซ ์ฝ”๋“œ)
551
- #########################################################
552
  with gr.Tab("OCR FLEX"):
553
  gr.HTML("""
554
  <div class="title-area">
@@ -558,87 +552,38 @@ if __name__ == "__main__":
558
  """)
559
 
560
  with gr.Row():
561
- # ์™ผ์ชฝ ํŒจ๋„
562
  with gr.Column(variant='panel', scale=5):
563
- file_ocr = gr.File(
564
- label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”",
565
- file_types=[".pdf", ".png", ".jpeg", ".jpg"]
566
- )
567
-
568
- max_pages_ocr = gr.Slider(
569
- 1, 20, 10,
570
- step=1,
571
- label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜'
572
- )
573
 
574
  with gr.Row():
575
- layout_mode_ocr = gr.Dropdown(
576
- ["layoutlmv3", "doclayout_yolo"],
577
- label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ",
578
- value="doclayout_yolo"
579
- )
580
- language_ocr = gr.Dropdown(
581
- all_lang,
582
- label="์–ธ์–ด",
583
- value='auto'
584
- )
585
 
586
  with gr.Row():
587
- formula_enable_ocr = gr.Checkbox(
588
- label="์ˆ˜์‹ ์ธ์‹ ํ™œ์„ฑํ™”",
589
- value=True
590
- )
591
- is_ocr_ocr = gr.Checkbox(
592
- label="OCR ๊ฐ•์ œ ํ™œ์„ฑํ™”",
593
- value=False
594
- )
595
- table_enable_ocr = gr.Checkbox(
596
- label="ํ‘œ ์ธ์‹ ํ™œ์„ฑํ™”(ํ…Œ์ŠคํŠธ)",
597
- value=True
598
- )
599
 
600
  with gr.Row():
601
  change_bu_ocr = gr.Button("๋ณ€ํ™˜")
602
-
603
- # โ˜… ClearButton ์ˆ˜์ • โ˜…
604
- # ์ฒซ ๋ฒˆ์งธ ์ธ์ž -> clearํ•  ๋Œ€์ƒ(์ปดํฌ๋„ŒํŠธ),
605
- # ๋ฒ„ํŠผ์— ํ‘œ์‹œ๋  ํ…์ŠคํŠธ๋Š” value="์ดˆ๊ธฐํ™”"
606
  clear_bu_ocr = gr.ClearButton(
607
- components=[file_ocr, max_pages_ocr, layout_mode_ocr, language_ocr,
608
- formula_enable_ocr, is_ocr_ocr, table_enable_ocr],
 
 
 
 
 
 
 
609
  value="์ดˆ๊ธฐํ™”"
610
  )
611
 
612
- pdf_show_ocr = PDF(
613
- label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
614
- interactive=False,
615
- visible=True,
616
- height=800
617
- )
618
-
619
- # ์˜ˆ์ œ ํด๋”๊ฐ€ ์žˆ๋‹ค๋ฉด ์‚ฌ์šฉ (์‹ค์ œ ์‹คํ–‰ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ์ฃผ์˜)
620
- with gr.Accordion("์˜ˆ์ œ:", open=False):
621
- example_root = (
622
- os.path.join(os.path.dirname(__file__), "examples")
623
- if "__file__" in globals() else "./examples"
624
- )
625
- if os.path.exists(example_root):
626
- gr.Examples(
627
- examples=[
628
- os.path.join(example_root, _) for _ in os.listdir(example_root)
629
- if _.endswith("pdf")
630
- ],
631
- inputs=file_ocr
632
- )
633
- else:
634
- gr.Markdown("์˜ˆ์ œ ํด๋”๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
635
 
636
- # ์˜ค๋ฅธ์ชฝ ํŒจ๋„
637
  with gr.Column(variant='panel', scale=5):
638
- output_file_ocr = gr.File(
639
- label="๋ณ€ํ™˜ ๊ฒฐ๊ณผ",
640
- interactive=False
641
- )
642
 
643
  with gr.Tabs():
644
  with gr.Tab("๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง"):
@@ -646,17 +591,13 @@ if __name__ == "__main__":
646
  label="๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง",
647
  height=1100,
648
  show_copy_button=True,
649
- latex_delimiters=latex_delimiters,
650
- line_breaks=True
651
  )
652
 
653
  with gr.Tab("๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ"):
654
- md_text_ocr = gr.TextArea(
655
- lines=45,
656
- show_copy_button=True
657
- )
658
 
659
- # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ (OCR FLEX)
660
  file_ocr.change(
661
  fn=to_pdf,
662
  inputs=file_ocr,
@@ -685,5 +626,4 @@ if __name__ == "__main__":
685
  ]
686
  )
687
 
688
- # ์ „์ฒด ์•ฑ ์‹คํ–‰
689
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)
 
17
  ###############################
18
  os.system('pip uninstall -y magic-pdf')
19
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
 
 
20
  os.system('pip install opencv-python-headless')
21
 
22
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
 
100
  disk_rw = FileBasedDataReader(os.path.dirname(path))
101
  return disk_rw.read(os.path.basename(path))
102
 
103
+ ###############################
104
+ # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (์ด์ง„ํ™” + Deskew)
105
+ ###############################
106
+ def preprocess_image(image_path):
107
+ """
108
+ 1) Grayscale + Otsu Binarization
109
+ 2) Deskew(๊ธฐ์šธ์ž„ ๋ณด์ •)
110
+ """
111
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
112
+ if img is None:
113
+ # ์ด๋ฏธ์ง€๊ฐ€ ์•„๋‹ˆ๊ฑฐ๋‚˜ ๋กœ๋”ฉ ์‹คํŒจ ์‹œ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
114
+ return image_path
115
+
116
+ # (a) ์ด์ง„ํ™”(Otsu)
117
+ _, img_bin = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
118
+
119
+ # (b) ๊ธฐ์šธ์ž„ ๋ณด์ •(deskew)
120
+ coords = np.column_stack(np.where(img_bin > 0))
121
+ angle = cv2.minAreaRect(coords)[-1]
122
+ if angle < -45:
123
+ angle = -(90 + angle)
124
+ else:
125
+ angle = -angle
126
+
127
+ (h, w) = img_bin.shape[:2]
128
+ center = (w // 2, h // 2)
129
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
130
+ img_rotated = cv2.warpAffine(
131
+ img_bin, M, (w, h),
132
+ flags=cv2.INTER_CUBIC,
133
+ borderMode=cv2.BORDER_CONSTANT,
134
+ borderValue=255
135
+ )
136
+
137
+ # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
138
+ preprocessed_path = image_path + "_preprocessed.png"
139
+ cv2.imwrite(preprocessed_path, img_rotated)
140
+ return preprocessed_path
141
+
142
+ ###############################
143
+ # PDF or Image -> PDF ๋ณ€ํ™˜
144
+ # (๋ถˆํ•„์š”ํ•œ f.close() ์ œ๊ฑฐ)
145
+ ###############################
146
+ def to_pdf(file_path):
147
+ """
148
+ ์ด๋ฏธ์ง€(JPG/PNG ๋“ฑ)๋ผ๋ฉด ์ „์ฒ˜๋ฆฌ ํ›„ PDF๋กœ ๋ณ€ํ™˜.
149
+ ์ด๋ฏธ PDF๋ผ๋ฉด ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜.
150
+ """
151
+ with pymupdf.open(file_path) as f:
152
+ # PDF์ธ ๊ฒฝ์šฐ
153
+ if f.is_pdf:
154
+ return file_path
155
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ์ธ ๊ฒฝ์šฐ
156
+ # (์ค‘์ฒฉ๋œ with๋ฌธ์— ๋“ค์–ด๊ฐ€๊ธฐ ์ „, ๊ทธ๋ƒฅ ๋ธ”๋ก์ด ๋๋‚˜๋ฉด ์ž๋™ close)
157
+ # ๋ธ”๋ก ์ข…๋ฃŒ ์‹œ f๋Š” ์ด๋ฏธ close๋จ.
158
+
159
+ # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ -> ์ƒˆ ์ด๋ฏธ์ง€ -> PDF ๋ณ€ํ™˜
160
+ preprocessed_path = preprocess_image(file_path)
161
+ with pymupdf.open(preprocessed_path) as img_doc:
162
+ pdf_bytes = img_doc.convert_to_pdf()
163
+
164
+ unique_filename = f"{uuid.uuid4()}.pdf"
165
+ tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
166
+ with open(tmp_file_path, 'wb') as tmp_pdf_file:
167
+ tmp_pdf_file.write(pdf_bytes)
168
+
169
+ return tmp_file_path
170
+
171
  def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
172
  os.makedirs(output_dir, exist_ok=True)
173
  try:
 
212
 
213
  def replace_image_with_base64(markdown_text, image_dir_path):
214
  pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
215
+
216
  def replace(match):
217
  relative_path = match.group(1)
218
  full_path = os.path.join(image_dir_path, relative_path)
219
  base64_image = image_to_base64(full_path)
220
  return f"![{relative_path}](data:image/jpeg;base64,{base64_image})"
 
221
 
222
+ return re.sub(pattern, replace, markdown_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
225
  """
226
  ์—…๋กœ๋“œ๋œ PDF/์ด๋ฏธ์ง€ -> PDF ๋ณ€ํ™˜ -> ๋งˆํฌ๋‹ค์šด ๋ณ€ํ™˜
227
+ (ํ”„๋กœ๊ทธ๋ ˆ์Šค ํ‘œ์‹œ)
228
  """
229
  progress(0, "PDF๋กœ ๋ณ€ํ™˜ ์ค‘...")
230
  file_path = to_pdf(file_path)
 
348
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
349
  """
350
  Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
 
351
  """
352
  if not user_message.strip():
353
  user_message = "...(No content from user)..."
 
376
  parts = chunk.candidates[0].content.parts
377
  current_chunk = parts[0].text
378
 
379
+ # ๋งŒ์•ฝ parts๊ฐ€ 2๊ฐœ์ด๋ฉด [thinking, ์ตœ์ข…๋‹ต๋ณ€]
380
  if len(parts) == 2 and not thinking_complete:
381
  thought_buffer += current_chunk
382
  messages[-1] = ChatMessage(
 
413
 
414
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
415
  """
416
+ doc_text(๋งˆํฌ๋‹ค์šด) ์‚ฌ์šฉํ•ด ์งˆ๋ฌธ์„ ์‚ด์ง ๋ณ€ํ˜•
417
  """
418
  if doc_text.strip():
419
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
 
435
  ###############################
436
  # (2) OCR FLEX ์ „์šฉ (์Šค๋‹ˆํŽซ)
437
  ###############################
 
438
  latex_delimiters = [
439
  {"left": "$$", "right": "$$", "display": True},
440
  {"left": '$', "right": '$', "display": False}
 
443
  def to_markdown_ocr_flex(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
444
  """
445
  ์Šค๋‹ˆํŽซ์—์„œ ์‚ฌ์šฉ:
446
+ ์—…๋กœ๋“œ๋œ PDF/์ด๋ฏธ์ง€ -> ์ „์ฒ˜๋ฆฌ -> PDF ๋ณ€ํ™˜ -> ๋งˆํฌ๋‹ค์šด ๋ณ€ํ™˜
 
447
  """
448
  file_path = to_pdf(file_path)
449
  if end_pages > 20:
 
473
  ###############################
474
  if __name__ == "__main__":
475
  with gr.Blocks(title="VisionOCR", css=create_css()) as demo:
 
476
  with gr.Tabs():
477
+ ###############################
478
  # Tab (1) : PDF -> Markdown ๋ณ€ํ™˜ + Chat
479
+ ###############################
480
  with gr.Tab("PDF Chat with LLM"):
481
  gr.HTML("""
482
  <div class="title-area">
 
488
  md_state = gr.State("") # ๋ณ€ํ™˜๋œ ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
489
  chat_history = gr.State([]) # ChatMessage ๋ฆฌ์ŠคํŠธ
490
 
 
491
  with gr.Row():
492
  file = gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
493
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
494
 
495
  chatbot = gr.Chatbot(height=600)
496
 
 
497
  file.change(
498
  fn=reset_states,
499
  inputs=file,
500
  outputs=[chat_history, md_state, chatbot]
501
  )
502
 
503
+ # ์ˆจ๊น€ ์˜ต์…˜๋“ค
504
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
505
  layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
506
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
 
515
  show_progress=True
516
  )
517
 
 
518
  gr.Markdown("## ์ถ”๋ก  LLM๊ณผ ๋Œ€ํ™”")
519
 
520
  with gr.Row():
 
540
  outputs=[chat_history, md_state, chatbot]
541
  )
542
 
543
+ ###############################
544
+ # Tab (2) : OCR FLEX
545
+ ###############################
546
  with gr.Tab("OCR FLEX"):
547
  gr.HTML("""
548
  <div class="title-area">
 
552
  """)
553
 
554
  with gr.Row():
 
555
  with gr.Column(variant='panel', scale=5):
556
+ file_ocr = gr.File(label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ํŒŒ์ผ", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
557
+ max_pages_ocr = gr.Slider(1, 20, 10, step=1, label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜')
 
 
 
 
 
 
 
 
558
 
559
  with gr.Row():
560
+ layout_mode_ocr = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ", value="doclayout_yolo")
561
+ language_ocr = gr.Dropdown(all_lang, label="์–ธ์–ด", value='auto')
 
 
 
 
 
 
 
 
562
 
563
  with gr.Row():
564
+ formula_enable_ocr = gr.Checkbox(label="์ˆ˜์‹ ์ธ์‹ ํ™œ์„ฑํ™”", value=True)
565
+ is_ocr_ocr = gr.Checkbox(label="OCR ๊ฐ•์ œ ํ™œ์„ฑํ™”", value=False)
566
+ table_enable_ocr = gr.Checkbox(label="ํ‘œ ์ธ์‹ ํ™œ์„ฑํ™”(ํ…Œ์ŠคํŠธ)", value=True)
 
 
 
 
 
 
 
 
 
567
 
568
  with gr.Row():
569
  change_bu_ocr = gr.Button("๋ณ€ํ™˜")
 
 
 
 
570
  clear_bu_ocr = gr.ClearButton(
571
+ components=[
572
+ file_ocr,
573
+ max_pages_ocr,
574
+ layout_mode_ocr,
575
+ language_ocr,
576
+ formula_enable_ocr,
577
+ is_ocr_ocr,
578
+ table_enable_ocr
579
+ ],
580
  value="์ดˆ๊ธฐํ™”"
581
  )
582
 
583
+ pdf_show_ocr = PDF(label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ', interactive=False, visible=True, height=800)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
 
 
585
  with gr.Column(variant='panel', scale=5):
586
+ output_file_ocr = gr.File(label="๋ณ€ํ™˜ ๊ฒฐ๊ณผ", interactive=False)
 
 
 
587
 
588
  with gr.Tabs():
589
  with gr.Tab("๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง"):
 
591
  label="๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง",
592
  height=1100,
593
  show_copy_button=True,
594
+ line_breaks=True,
595
+ latex_delimiters=latex_delimiters
596
  )
597
 
598
  with gr.Tab("๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ"):
599
+ md_text_ocr = gr.TextArea(lines=45, show_copy_button=True)
 
 
 
600
 
 
601
  file_ocr.change(
602
  fn=to_pdf,
603
  inputs=file_ocr,
 
626
  ]
627
  )
628
 
 
629
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)