openfree commited on
Commit
cf7458b
ยท
verified ยท
1 Parent(s): 16a2fde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -47
app.py CHANGED
@@ -8,9 +8,7 @@ import re
8
  import uuid
9
  import pymupdf
10
 
11
- # =======================================================
12
- # magic-pdf & MinerU ์„ค์น˜ ๊ด€๋ จ (๊ธฐ์กด ์ฝ”๋“œ ๊ทธ๋Œ€๋กœ ์˜ˆ์‹œ)
13
- # =======================================================
14
  os.system('pip uninstall -y magic-pdf')
15
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
16
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
@@ -29,6 +27,10 @@ with open('/home/user/magic-pdf.json', 'w') as file:
29
 
30
  os.system('cp -r paddleocr /home/user/.paddleocr')
31
 
 
 
 
 
32
  import gradio as gr
33
  from loguru import logger
34
 
@@ -36,14 +38,11 @@ from magic_pdf.data.data_reader_writer import FileBasedDataReader
36
  from magic_pdf.libs.hash_utils import compute_sha256
37
  from magic_pdf.tools.common import do_parse, prepare_env
38
 
39
-
40
- ###########################################
41
- # 1) UI ์Šคํƒ€์ผ(CSS) + PDF์ฒ˜๋ฆฌ ๊ด€๋ จ ํ•จ์ˆ˜๋“ค
42
- ###########################################
43
  def create_css():
44
- """
45
- ํ™”๋ฉด์„ ๊ฐ€๋“ ์ฑ„์šฐ๊ณ  ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•˜๋„๋ก ์„ค์ •
46
- """
47
  return """
48
  .gradio-container {
49
  width: 100vw !important;
@@ -152,10 +151,7 @@ def to_pdf(file_path):
152
  return tmp_file_path
153
 
154
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
155
- """
156
- - PDF ๋ณ€ํ™˜ ๊ณผ์ •์—์„œ 'progress(...)' ๋กœ ์ง„ํ–‰๋ฅ  ์—…๋ฐ์ดํŠธ
157
- - Gradio ๋ฒ„์ „์ด ๋‚ฎ์•„๋„ 'with progress:' ๋ฅผ ์“ฐ์ง€ ์•Š์œผ๋ฉด __enter__ ์˜ค๋ฅ˜๊ฐ€ ์•ˆ๋œธ
158
- """
159
  progress(0, "PDF๋กœ ๋ณ€ํ™˜ ์ค‘...")
160
  file_path = to_pdf(file_path)
161
  time.sleep(0.5)
@@ -177,13 +173,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
177
  logger.error("์••์ถ• ์‹คํŒจ")
178
  time.sleep(0.5)
179
 
180
- progress(70, "๋งˆํฌ๋‹ค์šด ๋กœ๋“œ ์ค‘...")
181
  md_path = os.path.join(local_md_dir, file_name + ".md")
182
  with open(md_path, 'r', encoding='utf-8') as f:
183
  txt_content = f.read()
184
  time.sleep(0.5)
185
 
186
- progress(90, "์ด๋ฏธ์ง€(base64) ๋ณ€ํ™˜ ์ค‘...")
187
  md_content = replace_image_with_base64(txt_content, local_md_dir)
188
  time.sleep(0.5)
189
 
@@ -191,9 +187,9 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
191
  return md_content
192
 
193
 
194
- ###############################
195
  # magic_pdf ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
196
- ###############################
197
  def init_model():
198
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
199
  try:
@@ -210,6 +206,9 @@ def init_model():
210
  model_init = init_model()
211
  logger.info(f"model_init: {model_init}")
212
 
 
 
 
213
  latin_lang = [
214
  'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
215
  'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
@@ -224,9 +223,9 @@ all_lang = ['', 'auto']
224
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
225
 
226
 
227
- #################################
228
  # 2) Gemini (google.generativeai)
229
- #################################
230
  import google.generativeai as genai
231
  from gradio import ChatMessage
232
  from typing import Iterator
@@ -239,7 +238,7 @@ model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
239
 
240
  def format_chat_history(messages: list) -> list:
241
  """
242
- Gemini๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹ (role, content)
243
  """
244
  formatted_history = []
245
  for message in messages:
@@ -252,15 +251,13 @@ def format_chat_history(messages: list) -> list:
252
 
253
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
254
  """
255
- Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ: user_message๊ฐ€ ๋น„์–ด์žˆ์œผ๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๋Œ€์ฒด
256
  """
257
- # ๋นˆ ๋ฌธ์ž์—ด์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๊ต์ฒด(์˜ค๋ฅ˜ ๋ฐฉ์ง€)
258
  if not user_message.strip():
259
- user_message = "โ€ฆ(No content from user)โ€ฆ"
260
 
261
  try:
262
  print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
263
-
264
  chat_history = format_chat_history(messages)
265
  chat = model.start_chat(history=chat_history)
266
  response = chat.send_message(user_message, stream=True)
@@ -302,7 +299,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
302
  response_buffer += current_chunk
303
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
304
  else:
305
- # Still thinking
306
  thought_buffer += current_chunk
307
  messages[-1] = ChatMessage(
308
  role="assistant",
@@ -328,7 +325,6 @@ def convert_chat_messages_to_gradio_format(messages):
328
 
329
  for msg in messages:
330
  if msg.role == "user":
331
- # ์ด์ „ ํ„ด ์ €์žฅ
332
  if user_text is not None or assistant_text is not None:
333
  gradio_chat.append((user_text or "", assistant_text or ""))
334
  user_text = msg.content
@@ -340,9 +336,8 @@ def convert_chat_messages_to_gradio_format(messages):
340
  if assistant_text is None:
341
  assistant_text = msg.content
342
  else:
343
- assistant_text += msg.content # ์ŠคํŠธ๋ฆฌ๋ฐ ์‹œ ๋ˆ„์ 
344
 
345
- # ๋งˆ์ง€๋ง‰ ํ„ด
346
  if user_text is not None or assistant_text is not None:
347
  gradio_chat.append((user_text or "", assistant_text or ""))
348
 
@@ -350,7 +345,7 @@ def convert_chat_messages_to_gradio_format(messages):
350
 
351
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
352
  """
353
- doc_text(๋งˆํฌ๋‹ค์šด) ์ฐธ๊ณ  ๋ฌธ๊ตฌ๋ฅผ ์ž๋™ ์‚ฝ์ž…
354
  """
355
  if doc_text.strip():
356
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
@@ -360,34 +355,60 @@ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
360
  history.append(ChatMessage(role="user", content=user_query))
361
  return "", history
362
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
  ################################
365
- # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ & ์‹คํ–‰
366
  ################################
367
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
368
  gr.HTML("""
369
  <div class="title-area">
370
  <h1>OCR FLEX + Gemini Chat</h1>
371
- <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, Gemini LLM ๋Œ€ํ™”</p>
372
  </div>
373
  """)
374
 
 
375
  md_state = gr.State("")
376
  chat_history = gr.State([])
377
 
 
378
  with gr.Row():
379
- file = gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
 
 
 
 
380
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
381
 
382
- # ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ๋“ค
383
- max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
384
- layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
 
 
 
 
 
 
 
 
385
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
386
  formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
387
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
388
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
389
 
390
- # ๋ณ€ํ™˜ ํด๋ฆญ -> to_markdown (progress)
391
  convert_btn.click(
392
  fn=to_markdown,
393
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
@@ -395,14 +416,15 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
395
  show_progress=True # ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ”+๋กœ๋”ฉ ํ‘œ์‹œ
396
  )
397
 
398
- # Gemini Chat
399
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
400
- chatbot = gr.Chatbot(height=600)
 
401
  with gr.Row():
402
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
403
  clear_btn = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
404
 
405
- # ํ”„๋กฌํ”„ํŠธ ์ „์†ก -> user_message -> stream_gemini_response
406
  chat_input.submit(
407
  fn=user_message,
408
  inputs=[chat_input, chat_history, md_state],
@@ -413,19 +435,15 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
413
  outputs=chatbot
414
  )
415
 
 
416
  def clear_states():
417
- return [], ""
418
 
419
  clear_btn.click(
420
  fn=clear_states,
421
  inputs=[],
422
- outputs=[chat_history, md_state]
423
- ).then(
424
- fn=lambda: [],
425
- inputs=[],
426
- outputs=chatbot
427
  )
428
 
429
-
430
  if __name__ == "__main__":
431
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
 
8
  import uuid
9
  import pymupdf
10
 
11
+ # (๊ธฐ์กด magic-pdf ์„ค์น˜ ๋ฐ ์„ค์ • ๋กœ์ง)
 
 
12
  os.system('pip uninstall -y magic-pdf')
13
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
14
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
 
27
 
28
  os.system('cp -r paddleocr /home/user/.paddleocr')
29
 
30
+
31
+ ###############################
32
+ # Gradio ๋ฐ ๊ธฐํƒ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
33
+ ###############################
34
  import gradio as gr
35
  from loguru import logger
36
 
 
38
  from magic_pdf.libs.hash_utils import compute_sha256
39
  from magic_pdf.tools.common import do_parse, prepare_env
40
 
41
+ #######################
42
+ # 1) UI CSS + PDF funcs
43
+ #######################
 
44
  def create_css():
45
+ """ํ™”๋ฉด ๊ฐ€๋“ ์‚ฌ์šฉ + ์Šคํฌ๋กค ํ—ˆ์šฉ"""
 
 
46
  return """
47
  .gradio-container {
48
  width: 100vw !important;
 
151
  return tmp_file_path
152
 
153
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
154
+ """PDF ๋ณ€ํ™˜ ํ•จ์ˆ˜ (ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ํ‘œ์‹œ)"""
 
 
 
155
  progress(0, "PDF๋กœ ๋ณ€ํ™˜ ์ค‘...")
156
  file_path = to_pdf(file_path)
157
  time.sleep(0.5)
 
173
  logger.error("์••์ถ• ์‹คํŒจ")
174
  time.sleep(0.5)
175
 
176
+ progress(70, "๋งˆํฌ๋‹ค์šด ์ฝ๋Š” ์ค‘...")
177
  md_path = os.path.join(local_md_dir, file_name + ".md")
178
  with open(md_path, 'r', encoding='utf-8') as f:
179
  txt_content = f.read()
180
  time.sleep(0.5)
181
 
182
+ progress(90, "์ด๋ฏธ์ง€ base64 ๋ณ€ํ™˜ ์ค‘...")
183
  md_content = replace_image_with_base64(txt_content, local_md_dir)
184
  time.sleep(0.5)
185
 
 
187
  return md_content
188
 
189
 
190
+ ################################
191
  # magic_pdf ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
192
+ ################################
193
  def init_model():
194
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
195
  try:
 
206
  model_init = init_model()
207
  logger.info(f"model_init: {model_init}")
208
 
209
+ ##################
210
+ # ์–ธ์–ด ๋ชฉ๋ก
211
+ ##################
212
  latin_lang = [
213
  'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
214
  'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
 
223
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
224
 
225
 
226
+ ##################################
227
  # 2) Gemini (google.generativeai)
228
+ ##################################
229
  import google.generativeai as genai
230
  from gradio import ChatMessage
231
  from typing import Iterator
 
238
 
239
  def format_chat_history(messages: list) -> list:
240
  """
241
+ Gemini๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” (role, parts[]) ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
242
  """
243
  formatted_history = []
244
  for message in messages:
 
251
 
252
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
253
  """
254
+ Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ (user_message๊ฐ€ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๊ต์ฒด)
255
  """
 
256
  if not user_message.strip():
257
+ user_message = "...(No content from user)..."
258
 
259
  try:
260
  print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
 
261
  chat_history = format_chat_history(messages)
262
  chat = model.start_chat(history=chat_history)
263
  response = chat.send_message(user_message, stream=True)
 
299
  response_buffer += current_chunk
300
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
301
  else:
302
+ # Still in "thinking"
303
  thought_buffer += current_chunk
304
  messages[-1] = ChatMessage(
305
  role="assistant",
 
325
 
326
  for msg in messages:
327
  if msg.role == "user":
 
328
  if user_text is not None or assistant_text is not None:
329
  gradio_chat.append((user_text or "", assistant_text or ""))
330
  user_text = msg.content
 
336
  if assistant_text is None:
337
  assistant_text = msg.content
338
  else:
339
+ assistant_text += msg.content
340
 
 
341
  if user_text is not None or assistant_text is not None:
342
  gradio_chat.append((user_text or "", assistant_text or ""))
343
 
 
345
 
346
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
347
  """
348
+ doc_text(๋งˆํฌ๋‹ค์šด) ์‚ฌ์šฉํ•ด ์งˆ๋ฌธ ์ž๋™ ๋ณ€ํ˜•
349
  """
350
  if doc_text.strip():
351
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
 
355
  history.append(ChatMessage(role="user", content=user_query))
356
  return "", history
357
 
358
+ ########################
359
+ # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ์‹œ ์ดˆ๊ธฐํ™”
360
+ ########################
361
+ def reset_states(_):
362
+ """
363
+ ํŒŒ์ผ์ด ๋ณ€๊ฒฝ๋˜๋ฉด(์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ๋˜๋ฉด)
364
+ - ๋Œ€ํ™” ์ด๋ ฅ(chat_history)
365
+ - ๋งˆํฌ๋‹ค์šด ์ƒํƒœ(md_state)
366
+ - ์ฑ—๋ด‡(๊ธฐ์กด ๋Œ€ํ™”)
367
+ ๋ชจ๋‘ ์ดˆ๊ธฐํ™”
368
+ """
369
+ return [], "", []
370
 
371
  ################################
372
+ # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ
373
  ################################
374
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
375
  gr.HTML("""
376
  <div class="title-area">
377
  <h1>OCR FLEX + Gemini Chat</h1>
378
+ <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, Gemini LLM๊ณผ ๋Œ€ํ™”</p>
379
  </div>
380
  """)
381
 
382
+ # ์ƒํƒœ
383
  md_state = gr.State("")
384
  chat_history = gr.State([])
385
 
386
+ # 1) ์—…๋กœ๋“œ & ๋ณ€ํ™˜
387
  with gr.Row():
388
+ file = gr.File(
389
+ label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
390
+ file_types=[".pdf", ".png", ".jpeg", ".jpg"],
391
+ interactive=True
392
+ )
393
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
394
 
395
+ # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ: ์ด์ „ ๋Œ€ํ™”/๋งˆํฌ๋‹ค์šด/์ฑ—๋ด‡ ์ดˆ๊ธฐํ™”
396
+ # => ๋งŒ์•ฝ "ํŒŒ์ผ ๋ณ€๊ฒฝ ์ด๋ฒคํŠธ"๋กœ ์ดˆ๊ธฐํ™”ํ•˜๊ธธ ์›์น˜ ์•Š์œผ๋ฉด ์ฃผ์„ ์ฒ˜๋ฆฌ
397
+ file.change(
398
+ fn=reset_states,
399
+ inputs=file,
400
+ outputs=[chat_history, md_state, "chatbot"]
401
+ )
402
+
403
+ # ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ
404
+ max_pages = gr.Slider(1,20,10, visible=False, elem_classes="invisible")
405
+ layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"],value="doclayout_yolo",visible=False,elem_classes="invisible")
406
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
407
  formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
408
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
409
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
410
 
411
+ # ๋ณ€ํ™˜ ๋ฒ„ํŠผ โ†’ to_markdown
412
  convert_btn.click(
413
  fn=to_markdown,
414
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
 
416
  show_progress=True # ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ”+๋กœ๋”ฉ ํ‘œ์‹œ
417
  )
418
 
419
+ # 2) Gemini Chat
420
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
421
+ chatbot = gr.Chatbot(height=600, elem_id="chatbot") # elem_id ์ง€์ • (reset์—์„œ outputs๋กœ ์‚ฌ์šฉ)
422
+
423
  with gr.Row():
424
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
425
  clear_btn = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
426
 
427
+ # ์ฑ„ํŒ… ํ๋ฆ„
428
  chat_input.submit(
429
  fn=user_message,
430
  inputs=[chat_input, chat_history, md_state],
 
435
  outputs=chatbot
436
  )
437
 
438
+ # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ: ๋Œ€ํ™”/์ƒํƒœ/์ฑ—๋ด‡ ๋ชจ๋‘ ์ดˆ๊ธฐํ™”
439
  def clear_states():
440
+ return [], "", []
441
 
442
  clear_btn.click(
443
  fn=clear_states,
444
  inputs=[],
445
+ outputs=[chat_history, md_state, chatbot]
 
 
 
 
446
  )
447
 
 
448
  if __name__ == "__main__":
449
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)