openfree commited on
Commit
21d018a
ยท
verified ยท
1 Parent(s): cf7458b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -64
app.py CHANGED
@@ -27,7 +27,6 @@ with open('/home/user/magic-pdf.json', 'w') as file:
27
 
28
  os.system('cp -r paddleocr /home/user/.paddleocr')
29
 
30
-
31
  ###############################
32
  # Gradio ๋ฐ ๊ธฐํƒ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
33
  ###############################
@@ -38,9 +37,6 @@ from magic_pdf.data.data_reader_writer import FileBasedDataReader
38
  from magic_pdf.libs.hash_utils import compute_sha256
39
  from magic_pdf.tools.common import do_parse, prepare_env
40
 
41
- #######################
42
- # 1) UI CSS + PDF funcs
43
- #######################
44
  def create_css():
45
  """ํ™”๋ฉด ๊ฐ€๋“ ์‚ฌ์šฉ + ์Šคํฌ๋กค ํ—ˆ์šฉ"""
46
  return """
@@ -186,10 +182,6 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
186
  progress(100, "๋ณ€ํ™˜ ์™„๋ฃŒ!")
187
  return md_content
188
 
189
-
190
- ################################
191
- # magic_pdf ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
192
- ################################
193
  def init_model():
194
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
195
  try:
@@ -206,9 +198,6 @@ def init_model():
206
  model_init = init_model()
207
  logger.info(f"model_init: {model_init}")
208
 
209
- ##################
210
- # ์–ธ์–ด ๋ชฉ๋ก
211
- ##################
212
  latin_lang = [
213
  'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
214
  'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
@@ -222,10 +211,6 @@ other_lang = ['ch','en','korean','japan','chinese_cht','ta','te','ka']
222
  all_lang = ['', 'auto']
223
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
224
 
225
-
226
- ##################################
227
- # 2) Gemini (google.generativeai)
228
- ##################################
229
  import google.generativeai as genai
230
  from gradio import ChatMessage
231
  from typing import Iterator
@@ -233,13 +218,10 @@ import time
233
 
234
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
235
  genai.configure(api_key=GEMINI_API_KEY)
236
-
237
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
238
 
239
  def format_chat_history(messages: list) -> list:
240
- """
241
- Gemini๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” (role, parts[]) ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
242
- """
243
  formatted_history = []
244
  for message in messages:
245
  if not (message.role == "assistant" and hasattr(message, "metadata")):
@@ -250,9 +232,7 @@ def format_chat_history(messages: list) -> list:
250
  return formatted_history
251
 
252
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
253
- """
254
- Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ (user_message๊ฐ€ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๊ต์ฒด)
255
- """
256
  if not user_message.strip():
257
  user_message = "...(No content from user)..."
258
 
@@ -266,7 +246,6 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
266
  response_buffer = ""
267
  thinking_complete = False
268
 
269
- # "Thinking" ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
270
  messages.append(
271
  ChatMessage(
272
  role="assistant",
@@ -281,7 +260,6 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
281
  current_chunk = parts[0].text
282
 
283
  if len(parts) == 2 and not thinking_complete:
284
- # Complete thought
285
  thought_buffer += current_chunk
286
  messages[-1] = ChatMessage(
287
  role="assistant",
@@ -290,16 +268,13 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
290
  )
291
  yield convert_chat_messages_to_gradio_format(messages)
292
 
293
- # Start final response
294
  response_buffer = parts[1].text
295
  messages.append(ChatMessage(role="assistant", content=response_buffer))
296
  thinking_complete = True
297
  elif thinking_complete:
298
- # Response ongoing
299
  response_buffer += current_chunk
300
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
301
  else:
302
- # Still in "thinking"
303
  thought_buffer += current_chunk
304
  messages[-1] = ChatMessage(
305
  role="assistant",
@@ -317,9 +292,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
317
  yield convert_chat_messages_to_gradio_format(messages)
318
 
319
  def convert_chat_messages_to_gradio_format(messages):
320
- """
321
- ChatMessage list -> [ (์œ ์ €๋ฐœํ™”, ๋ด‡์‘๋‹ต), (...), ... ]
322
- """
323
  gradio_chat = []
324
  user_text, assistant_text = None, None
325
 
@@ -330,7 +303,6 @@ def convert_chat_messages_to_gradio_format(messages):
330
  user_text = msg.content
331
  assistant_text = None
332
  else:
333
- # assistant
334
  if user_text is None:
335
  user_text = ""
336
  if assistant_text is None:
@@ -344,9 +316,7 @@ def convert_chat_messages_to_gradio_format(messages):
344
  return gradio_chat
345
 
346
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
347
- """
348
- doc_text(๋งˆํฌ๋‹ค์šด) ์‚ฌ์šฉํ•ด ์งˆ๋ฌธ ์ž๋™ ๋ณ€ํ˜•
349
- """
350
  if doc_text.strip():
351
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
352
  else:
@@ -355,22 +325,15 @@ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
355
  history.append(ChatMessage(role="user", content=user_query))
356
  return "", history
357
 
358
- ########################
359
- # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ์‹œ ์ดˆ๊ธฐํ™”
360
- ########################
361
  def reset_states(_):
362
  """
363
- ํŒŒ์ผ์ด ๋ณ€๊ฒฝ๋˜๋ฉด(์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ๋˜๋ฉด)
364
- - ๋Œ€ํ™” ์ด๋ ฅ(chat_history)
365
- - ๋งˆํฌ๋‹ค์šด ์ƒํƒœ(md_state)
366
- - ์ฑ—๋ด‡(๊ธฐ์กด ๋Œ€ํ™”)
367
- ๋ชจ๋‘ ์ดˆ๊ธฐํ™”
368
  """
369
  return [], "", []
370
 
371
- ################################
372
- # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ
373
- ################################
374
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
375
  gr.HTML("""
376
  <div class="title-area">
@@ -379,25 +342,21 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
379
  </div>
380
  """)
381
 
382
- # ์ƒํƒœ
383
- md_state = gr.State("")
384
- chat_history = gr.State([])
385
 
386
- # 1) ์—…๋กœ๋“œ & ๋ณ€ํ™˜
387
  with gr.Row():
388
- file = gr.File(
389
- label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
390
- file_types=[".pdf", ".png", ".jpeg", ".jpg"],
391
- interactive=True
392
- )
393
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
394
 
395
  # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ: ์ด์ „ ๋Œ€ํ™”/๋งˆํฌ๋‹ค์šด/์ฑ—๋ด‡ ์ดˆ๊ธฐํ™”
396
- # => ๋งŒ์•ฝ "ํŒŒ์ผ ๋ณ€๊ฒฝ ์ด๋ฒคํŠธ"๋กœ ์ดˆ๊ธฐํ™”ํ•˜๊ธธ ์›์น˜ ์•Š์œผ๋ฉด ์ฃผ์„ ์ฒ˜๋ฆฌ
 
397
  file.change(
398
  fn=reset_states,
399
  inputs=file,
400
- outputs=[chat_history, md_state, "chatbot"]
401
  )
402
 
403
  # ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ
@@ -408,23 +367,20 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
408
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
409
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
410
 
411
- # ๋ณ€ํ™˜ ๋ฒ„ํŠผ โ†’ to_markdown
412
  convert_btn.click(
413
  fn=to_markdown,
414
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
415
  outputs=md_state,
416
- show_progress=True # ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ”+๋กœ๋”ฉ ํ‘œ์‹œ
417
  )
418
 
419
- # 2) Gemini Chat
420
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
421
- chatbot = gr.Chatbot(height=600, elem_id="chatbot") # elem_id ์ง€์ • (reset์—์„œ outputs๋กœ ์‚ฌ์šฉ)
422
 
423
  with gr.Row():
424
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
425
  clear_btn = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
426
 
427
- # ์ฑ„ํŒ… ํ๋ฆ„
428
  chat_input.submit(
429
  fn=user_message,
430
  inputs=[chat_input, chat_history, md_state],
@@ -435,12 +391,11 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
435
  outputs=chatbot
436
  )
437
 
438
- # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ: ๋Œ€ํ™”/์ƒํƒœ/์ฑ—๋ด‡ ๋ชจ๋‘ ์ดˆ๊ธฐํ™”
439
- def clear_states():
440
  return [], "", []
441
 
442
  clear_btn.click(
443
- fn=clear_states,
444
  inputs=[],
445
  outputs=[chat_history, md_state, chatbot]
446
  )
 
27
 
28
  os.system('cp -r paddleocr /home/user/.paddleocr')
29
 
 
30
  ###############################
31
  # Gradio ๋ฐ ๊ธฐํƒ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
32
  ###############################
 
37
  from magic_pdf.libs.hash_utils import compute_sha256
38
  from magic_pdf.tools.common import do_parse, prepare_env
39
 
 
 
 
40
  def create_css():
41
  """ํ™”๋ฉด ๊ฐ€๋“ ์‚ฌ์šฉ + ์Šคํฌ๋กค ํ—ˆ์šฉ"""
42
  return """
 
182
  progress(100, "๋ณ€ํ™˜ ์™„๋ฃŒ!")
183
  return md_content
184
 
 
 
 
 
185
  def init_model():
186
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
187
  try:
 
198
  model_init = init_model()
199
  logger.info(f"model_init: {model_init}")
200
 
 
 
 
201
  latin_lang = [
202
  'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
203
  'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
 
211
  all_lang = ['', 'auto']
212
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
213
 
 
 
 
 
214
  import google.generativeai as genai
215
  from gradio import ChatMessage
216
  from typing import Iterator
 
218
 
219
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
220
  genai.configure(api_key=GEMINI_API_KEY)
 
221
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
222
 
223
  def format_chat_history(messages: list) -> list:
224
+ """Gemini๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” (role, parts[]) ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
 
 
225
  formatted_history = []
226
  for message in messages:
227
  if not (message.role == "assistant" and hasattr(message, "metadata")):
 
232
  return formatted_history
233
 
234
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
235
+ """Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ (user_message๊ฐ€ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋กœ ๊ต์ฒด)"""
 
 
236
  if not user_message.strip():
237
  user_message = "...(No content from user)..."
238
 
 
246
  response_buffer = ""
247
  thinking_complete = False
248
 
 
249
  messages.append(
250
  ChatMessage(
251
  role="assistant",
 
260
  current_chunk = parts[0].text
261
 
262
  if len(parts) == 2 and not thinking_complete:
 
263
  thought_buffer += current_chunk
264
  messages[-1] = ChatMessage(
265
  role="assistant",
 
268
  )
269
  yield convert_chat_messages_to_gradio_format(messages)
270
 
 
271
  response_buffer = parts[1].text
272
  messages.append(ChatMessage(role="assistant", content=response_buffer))
273
  thinking_complete = True
274
  elif thinking_complete:
 
275
  response_buffer += current_chunk
276
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
277
  else:
 
278
  thought_buffer += current_chunk
279
  messages[-1] = ChatMessage(
280
  role="assistant",
 
292
  yield convert_chat_messages_to_gradio_format(messages)
293
 
294
  def convert_chat_messages_to_gradio_format(messages):
295
+ """ChatMessage list -> [ (์œ ์ €๋ฐœํ™”, ๋ด‡์‘๋‹ต), (...), ... ]"""
 
 
296
  gradio_chat = []
297
  user_text, assistant_text = None, None
298
 
 
303
  user_text = msg.content
304
  assistant_text = None
305
  else:
 
306
  if user_text is None:
307
  user_text = ""
308
  if assistant_text is None:
 
316
  return gradio_chat
317
 
318
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
319
+ """doc_text(๋งˆํฌ๋‹ค์šด) ์‚ฌ์šฉํ•ด ์งˆ๋ฌธ ์ž๋™ ๋ณ€ํ˜•"""
 
 
320
  if doc_text.strip():
321
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
322
  else:
 
325
  history.append(ChatMessage(role="user", content=user_query))
326
  return "", history
327
 
 
 
 
328
  def reset_states(_):
329
  """
330
+ ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ
331
+ - chat_history -> ๋นˆ ๋ฆฌ์ŠคํŠธ
332
+ - md_state -> ๋นˆ ๋ฌธ์ž์—ด
333
+ - chatbot -> ๋นˆ list of tuples
 
334
  """
335
  return [], "", []
336
 
 
 
 
337
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
338
  gr.HTML("""
339
  <div class="title-area">
 
342
  </div>
343
  """)
344
 
345
+ md_state = gr.State("") # ๋ณ€ํ™˜๋œ ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
346
+ chat_history = gr.State([]) # ChatMessage ๋ฆฌ์ŠคํŠธ
 
347
 
348
+ # ์—…๋กœ๋“œ & ๋ณ€ํ™˜
349
  with gr.Row():
350
+ file = gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
 
 
 
 
351
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
352
 
353
  # ์ƒˆ ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ: ์ด์ „ ๋Œ€ํ™”/๋งˆํฌ๋‹ค์šด/์ฑ—๋ด‡ ์ดˆ๊ธฐํ™”
354
+ chatbot = gr.Chatbot(height=600) # ์‹ค์ œ Chatbot ์ปดํฌ๋„ŒํŠธ
355
+
356
  file.change(
357
  fn=reset_states,
358
  inputs=file,
359
+ outputs=[chat_history, md_state, chatbot] # <--- ๋ฌธ์ž์—ด "chatbot" ์•„๋‹ˆ๋ผ chatbot ์ปดํฌ๋„ŒํŠธ ๊ฐ์ฒด
360
  )
361
 
362
  # ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ
 
367
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
368
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
369
 
 
370
  convert_btn.click(
371
  fn=to_markdown,
372
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
373
  outputs=md_state,
374
+ show_progress=True # ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ํ‘œ์‹œ
375
  )
376
 
377
+ # Gemini Chat
378
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
 
379
 
380
  with gr.Row():
381
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
382
  clear_btn = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
383
 
 
384
  chat_input.submit(
385
  fn=user_message,
386
  inputs=[chat_input, chat_history, md_state],
 
391
  outputs=chatbot
392
  )
393
 
394
+ def clear_all():
 
395
  return [], "", []
396
 
397
  clear_btn.click(
398
+ fn=clear_all,
399
  inputs=[],
400
  outputs=[chat_history, md_state, chatbot]
401
  )