openfree commited on
Commit
00039aa
·
verified ·
1 Parent(s): 630dc5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -58
app.py CHANGED
@@ -1,7 +1,3 @@
1
- ##############################
2
- # 1) 기존 PDF 처리 코드
3
- ##############################
4
-
5
  import base64
6
  import json
7
  import os
@@ -12,6 +8,9 @@ import re
12
  import uuid
13
  import pymupdf
14
 
 
 
 
15
  os.system('pip uninstall -y magic-pdf')
16
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
17
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
@@ -37,9 +36,13 @@ from magic_pdf.data.data_reader_writer import FileBasedDataReader
37
  from magic_pdf.libs.hash_utils import compute_sha256
38
  from magic_pdf.tools.common import do_parse, prepare_env
39
 
 
 
 
 
40
  def create_css():
41
  """
42
- 화면을 채우고 스크롤 가능하도록 설정
43
  """
44
  return """
45
  .gradio-container {
@@ -50,7 +53,7 @@ def create_css():
50
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
51
  display: flex;
52
  flex-direction: column;
53
- overflow-y: auto !important;
54
  }
55
  .title-area {
56
  text-align: center;
@@ -87,7 +90,6 @@ def read_fn(path):
87
 
88
  def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
89
  os.makedirs(output_dir, exist_ok=True)
90
-
91
  try:
92
  file_name = f"{str(Path(doc_path).stem)}_{time.time()}"
93
  pdf_data = read_fn(doc_path)
@@ -105,7 +107,7 @@ def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_en
105
  formula_enable=formula_enable,
106
  table_enable=table_enable,
107
  lang=language,
108
- f_dump_orig_pdf=False,
109
  )
110
  return local_md_dir, file_name
111
  except Exception as e:
@@ -150,19 +152,23 @@ def to_pdf(file_path):
150
  return tmp_file_path
151
 
152
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
153
- progress(0, "파일을 PDF로 변환 중...")
 
 
 
 
154
  file_path = to_pdf(file_path)
155
  time.sleep(0.5)
156
 
157
  if end_pages > 20:
158
  end_pages = 20
159
 
160
- progress(30, "PDF 파싱 중...")
161
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
162
  layout_mode, formula_enable, table_enable, language)
163
  time.sleep(0.5)
164
 
165
- progress(50, "압축(Zip) 생성 중...")
166
  archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
167
  zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
168
  if zip_archive_success == 0:
@@ -171,13 +177,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
171
  logger.error("압축 실패")
172
  time.sleep(0.5)
173
 
174
- progress(70, "마크다운 읽는 중...")
175
  md_path = os.path.join(local_md_dir, file_name + ".md")
176
  with open(md_path, 'r', encoding='utf-8') as f:
177
  txt_content = f.read()
178
  time.sleep(0.5)
179
 
180
- progress(90, "이미지 base64 변환 중...")
181
  md_content = replace_image_with_base64(txt_content, local_md_dir)
182
  time.sleep(0.5)
183
 
@@ -185,6 +191,9 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
185
  return md_content
186
 
187
 
 
 
 
188
  def init_model():
189
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
190
  try:
@@ -215,9 +224,9 @@ all_lang = ['', 'auto']
215
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
216
 
217
 
218
- ##############################
219
- # 2) Gemini LLM 챗 코드
220
- ##############################
221
  import google.generativeai as genai
222
  from gradio import ChatMessage
223
  from typing import Iterator
@@ -230,13 +239,10 @@ model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
230
 
231
  def format_chat_history(messages: list) -> list:
232
  """
233
- Gemini가 이해할 수 있는 형식
234
  """
235
  formatted_history = []
236
  for message in messages:
237
- # ChatMessage => role, content
238
- # metadata는 "thinking" 표시 용도
239
- # Gemini input에서는 제외
240
  if not (message.role == "assistant" and hasattr(message, "metadata")):
241
  formatted_history.append({
242
  "role": "user" if message.role == "user" else "assistant",
@@ -246,14 +252,11 @@ def format_chat_history(messages: list) -> list:
246
 
247
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
248
  """
249
- Gemini 응답 스트리밍
250
- - user_message가 공백이어도 오류 X
251
  """
252
- # 1) 공백 입력도 그냥 통과(오류 안내 메시지 제거)
253
- # if not user_message.strip():
254
- # messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message."))
255
- # yield convert_chat_messages_to_gradio_format(messages)
256
- # return
257
 
258
  try:
259
  print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
@@ -266,7 +269,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
266
  response_buffer = ""
267
  thinking_complete = False
268
 
269
- # "Thinking" 메시지
270
  messages.append(
271
  ChatMessage(
272
  role="assistant",
@@ -299,7 +302,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
299
  response_buffer += current_chunk
300
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
301
  else:
302
- # Still in "Thinking"
303
  thought_buffer += current_chunk
304
  messages[-1] = ChatMessage(
305
  role="assistant",
@@ -318,28 +321,28 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
318
 
319
  def convert_chat_messages_to_gradio_format(messages):
320
  """
321
- ChatMessage 리스트 => Gradio (유저, ) 튜플 리스트
322
  """
323
  gradio_chat = []
324
  user_text, assistant_text = None, None
325
 
326
  for msg in messages:
327
- role = msg.role
328
- content = msg.content
329
- if role == "user":
330
  if user_text is not None or assistant_text is not None:
331
  gradio_chat.append((user_text or "", assistant_text or ""))
332
- user_text = content
333
  assistant_text = None
334
  else:
335
  # assistant
336
  if user_text is None:
337
  user_text = ""
338
  if assistant_text is None:
339
- assistant_text = content
340
  else:
341
- assistant_text += content # 스트리밍 시 이어붙임
342
 
 
343
  if user_text is not None or assistant_text is not None:
344
  gradio_chat.append((user_text or "", assistant_text or ""))
345
 
@@ -347,8 +350,7 @@ def convert_chat_messages_to_gradio_format(messages):
347
 
348
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
349
  """
350
- - doc_text(마크다운) 참고 문구를 자동으로 추가
351
- - 공백 입력도 에러 없이 진행
352
  """
353
  if doc_text.strip():
354
  user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
@@ -359,44 +361,38 @@ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
359
  return "", history
360
 
361
 
362
- ##############################
363
- # 3) 통합 Gradio 앱 구성
364
- ##############################
365
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
366
  gr.HTML("""
367
  <div class="title-area">
368
  <h1>OCR FLEX + Gemini Chat</h1>
369
- <p>PDF/이미지 -> 텍스트(마크다운) 변환 후, Gemini LLM 대화</p>
370
  </div>
371
  """)
372
 
373
- # 변환된 마크다운, 채팅 이력
374
  md_state = gr.State("")
375
  chat_history = gr.State([])
376
 
377
- # 업로드 & 변환
378
  with gr.Row():
379
- file = gr.File(
380
- label="PDF/이미지 업로드",
381
- file_types=[".pdf", ".png", ".jpeg", ".jpg"],
382
- interactive=True
383
- )
384
  convert_btn = gr.Button("변환하기")
385
 
386
- # 숨김 컴포넌트
387
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
388
- layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
389
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
390
  formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
391
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
392
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
393
 
394
- # 변환 버튼 to_markdown + progress
395
  convert_btn.click(
396
  fn=to_markdown,
397
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
398
  outputs=md_state,
399
- show_progress=True # Gradio 자체 로딩 스피너도 표시
400
  )
401
 
402
  # Gemini Chat
@@ -404,9 +400,9 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
404
  chatbot = gr.Chatbot(height=600)
405
  with gr.Row():
406
  chat_input = gr.Textbox(lines=1, placeholder="질문을 입력하세요...")
407
- clear_button = gr.Button("대화 초기화")
408
 
409
- # 프롬프트 입력 -> user_message -> stream_gemini_response
410
  chat_input.submit(
411
  fn=user_message,
412
  inputs=[chat_input, chat_history, md_state],
@@ -417,10 +413,10 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
417
  outputs=chatbot
418
  )
419
 
420
- # 초기화: 상태 리셋 + 챗봇 초기화
421
  def clear_states():
422
  return [], ""
423
- clear_button.click(
 
424
  fn=clear_states,
425
  inputs=[],
426
  outputs=[chat_history, md_state]
@@ -432,4 +428,4 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
432
 
433
 
434
  if __name__ == "__main__":
435
- demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
  import base64
2
  import json
3
  import os
 
8
  import uuid
9
  import pymupdf
10
 
11
+ # =======================================================
12
+ # magic-pdf & MinerU 설치 관련 (기존 코드 그대로 예시)
13
+ # =======================================================
14
  os.system('pip uninstall -y magic-pdf')
15
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
16
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
 
36
  from magic_pdf.libs.hash_utils import compute_sha256
37
  from magic_pdf.tools.common import do_parse, prepare_env
38
 
39
+
40
+ ###########################################
41
+ # 1) UI 스타일(CSS) + PDF처리 관련 함수들
42
+ ###########################################
43
  def create_css():
44
  """
45
+ 화면을 가득 채우고 스크롤 가능하도록 설정
46
  """
47
  return """
48
  .gradio-container {
 
53
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
54
  display: flex;
55
  flex-direction: column;
56
+ overflow-y: auto !important;
57
  }
58
  .title-area {
59
  text-align: center;
 
90
 
91
  def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_enable, table_enable, language):
92
  os.makedirs(output_dir, exist_ok=True)
 
93
  try:
94
  file_name = f"{str(Path(doc_path).stem)}_{time.time()}"
95
  pdf_data = read_fn(doc_path)
 
107
  formula_enable=formula_enable,
108
  table_enable=table_enable,
109
  lang=language,
110
+ f_dump_orig_pdf=False
111
  )
112
  return local_md_dir, file_name
113
  except Exception as e:
 
152
  return tmp_file_path
153
 
154
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
155
+ """
156
+ - PDF 변환 과정에서 'progress(...)' 로 진행률 업데이트
157
+ - Gradio 버전이 낮아도 'with progress:' 를 쓰지 않으면 __enter__ 오류가 안뜸
158
+ """
159
+ progress(0, "PDF로 변환 중...")
160
  file_path = to_pdf(file_path)
161
  time.sleep(0.5)
162
 
163
  if end_pages > 20:
164
  end_pages = 20
165
 
166
+ progress(20, "문서 파싱 중...")
167
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
168
  layout_mode, formula_enable, table_enable, language)
169
  time.sleep(0.5)
170
 
171
+ progress(50, "압축(zip) 생성 중...")
172
  archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
173
  zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
174
  if zip_archive_success == 0:
 
177
  logger.error("압축 실패")
178
  time.sleep(0.5)
179
 
180
+ progress(70, "마크다운 로드 중...")
181
  md_path = os.path.join(local_md_dir, file_name + ".md")
182
  with open(md_path, 'r', encoding='utf-8') as f:
183
  txt_content = f.read()
184
  time.sleep(0.5)
185
 
186
+ progress(90, "이미지(base64) 변환 중...")
187
  md_content = replace_image_with_base64(txt_content, local_md_dir)
188
  time.sleep(0.5)
189
 
 
191
  return md_content
192
 
193
 
194
+ ###############################
195
+ # magic_pdf 모델 초기화
196
+ ###############################
197
  def init_model():
198
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
199
  try:
 
224
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
225
 
226
 
227
+ #################################
228
+ # 2) Gemini (google.generativeai)
229
+ #################################
230
  import google.generativeai as genai
231
  from gradio import ChatMessage
232
  from typing import Iterator
 
239
 
240
  def format_chat_history(messages: list) -> list:
241
  """
242
+ Gemini가 이해할 수 있는 형식 (role, content)
243
  """
244
  formatted_history = []
245
  for message in messages:
 
 
 
246
  if not (message.role == "assistant" and hasattr(message, "metadata")):
247
  formatted_history.append({
248
  "role": "user" if message.role == "user" else "assistant",
 
252
 
253
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
254
  """
255
+ Gemini 응답 스트리밍: user_message가 비어있으면 기본 문구로 대체
 
256
  """
257
+ # 문자열이면 기본 문구로 교체(오류 방지)
258
+ if not user_message.strip():
259
+ user_message = "…(No content from user)…"
 
 
260
 
261
  try:
262
  print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
 
269
  response_buffer = ""
270
  thinking_complete = False
271
 
272
+ # "Thinking" 메시지 추가
273
  messages.append(
274
  ChatMessage(
275
  role="assistant",
 
302
  response_buffer += current_chunk
303
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
304
  else:
305
+ # Still thinking
306
  thought_buffer += current_chunk
307
  messages[-1] = ChatMessage(
308
  role="assistant",
 
321
 
322
  def convert_chat_messages_to_gradio_format(messages):
323
  """
324
+ ChatMessage list -> [ (유저발화, 봇응답), (...), ... ]
325
  """
326
  gradio_chat = []
327
  user_text, assistant_text = None, None
328
 
329
  for msg in messages:
330
+ if msg.role == "user":
331
+ # 이전 턴 저장
 
332
  if user_text is not None or assistant_text is not None:
333
  gradio_chat.append((user_text or "", assistant_text or ""))
334
+ user_text = msg.content
335
  assistant_text = None
336
  else:
337
  # assistant
338
  if user_text is None:
339
  user_text = ""
340
  if assistant_text is None:
341
+ assistant_text = msg.content
342
  else:
343
+ assistant_text += msg.content # 스트리밍 시 누적
344
 
345
+ # 마지막 턴
346
  if user_text is not None or assistant_text is not None:
347
  gradio_chat.append((user_text or "", assistant_text or ""))
348
 
 
350
 
351
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
352
  """
353
+ doc_text(마크다운) 참고 문구를 자동 삽입
 
354
  """
355
  if doc_text.strip():
356
  user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
 
361
  return "", history
362
 
363
 
364
+ ################################
365
+ # 3) 통합 Gradio 앱 구성 & 실행
366
+ ################################
367
  with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
368
  gr.HTML("""
369
  <div class="title-area">
370
  <h1>OCR FLEX + Gemini Chat</h1>
371
+ <p>PDF/이미지 -> 텍스트(마크다운) 변환 후, Gemini LLM 대화</p>
372
  </div>
373
  """)
374
 
 
375
  md_state = gr.State("")
376
  chat_history = gr.State([])
377
 
 
378
  with gr.Row():
379
+ file = gr.File(label="PDF/이미지 업로드", file_types=[".pdf", ".png", ".jpeg", ".jpg"], interactive=True)
 
 
 
 
380
  convert_btn = gr.Button("변환하기")
381
 
382
+ # 숨긴 컴포넌트들
383
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
384
+ layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
385
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
386
  formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
387
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
388
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
389
 
390
+ # 변환 클릭 -> to_markdown (progress)
391
  convert_btn.click(
392
  fn=to_markdown,
393
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
394
  outputs=md_state,
395
+ show_progress=True # 프로그레스바+로딩 표시
396
  )
397
 
398
  # Gemini Chat
 
400
  chatbot = gr.Chatbot(height=600)
401
  with gr.Row():
402
  chat_input = gr.Textbox(lines=1, placeholder="질문을 입력하세요...")
403
+ clear_btn = gr.Button("대화 초기화")
404
 
405
+ # 프롬프트 전송 -> user_message -> stream_gemini_response
406
  chat_input.submit(
407
  fn=user_message,
408
  inputs=[chat_input, chat_history, md_state],
 
413
  outputs=chatbot
414
  )
415
 
 
416
  def clear_states():
417
  return [], ""
418
+
419
+ clear_btn.click(
420
  fn=clear_states,
421
  inputs=[],
422
  outputs=[chat_history, md_state]
 
428
 
429
 
430
  if __name__ == "__main__":
431
+ demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)