Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,9 +8,7 @@ import re
|
|
8 |
import uuid
|
9 |
import pymupdf
|
10 |
|
11 |
-
#
|
12 |
-
# magic-pdf & MinerU ์ค์น ๊ด๋ จ (๊ธฐ์กด ์ฝ๋ ๊ทธ๋๋ก ์์)
|
13 |
-
# =======================================================
|
14 |
os.system('pip uninstall -y magic-pdf')
|
15 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
16 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
@@ -29,6 +27,10 @@ with open('/home/user/magic-pdf.json', 'w') as file:
|
|
29 |
|
30 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
31 |
|
|
|
|
|
|
|
|
|
32 |
import gradio as gr
|
33 |
from loguru import logger
|
34 |
|
@@ -36,14 +38,11 @@ from magic_pdf.data.data_reader_writer import FileBasedDataReader
|
|
36 |
from magic_pdf.libs.hash_utils import compute_sha256
|
37 |
from magic_pdf.tools.common import do_parse, prepare_env
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
###########################################
|
43 |
def create_css():
|
44 |
-
"""
|
45 |
-
ํ๋ฉด์ ๊ฐ๋ ์ฑ์ฐ๊ณ ์คํฌ๋กค ๊ฐ๋ฅํ๋๋ก ์ค์
|
46 |
-
"""
|
47 |
return """
|
48 |
.gradio-container {
|
49 |
width: 100vw !important;
|
@@ -152,10 +151,7 @@ def to_pdf(file_path):
|
|
152 |
return tmp_file_path
|
153 |
|
154 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
155 |
-
"""
|
156 |
-
- PDF ๋ณํ ๊ณผ์ ์์ 'progress(...)' ๋ก ์งํ๋ฅ ์
๋ฐ์ดํธ
|
157 |
-
- Gradio ๋ฒ์ ์ด ๋ฎ์๋ 'with progress:' ๋ฅผ ์ฐ์ง ์์ผ๋ฉด __enter__ ์ค๋ฅ๊ฐ ์๋ธ
|
158 |
-
"""
|
159 |
progress(0, "PDF๋ก ๋ณํ ์ค...")
|
160 |
file_path = to_pdf(file_path)
|
161 |
time.sleep(0.5)
|
@@ -177,13 +173,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
|
|
177 |
logger.error("์์ถ ์คํจ")
|
178 |
time.sleep(0.5)
|
179 |
|
180 |
-
progress(70, "๋งํฌ๋ค์ด
|
181 |
md_path = os.path.join(local_md_dir, file_name + ".md")
|
182 |
with open(md_path, 'r', encoding='utf-8') as f:
|
183 |
txt_content = f.read()
|
184 |
time.sleep(0.5)
|
185 |
|
186 |
-
progress(90, "์ด๋ฏธ์ง
|
187 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
188 |
time.sleep(0.5)
|
189 |
|
@@ -191,9 +187,9 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
|
|
191 |
return md_content
|
192 |
|
193 |
|
194 |
-
|
195 |
# magic_pdf ๋ชจ๋ธ ์ด๊ธฐํ
|
196 |
-
|
197 |
def init_model():
|
198 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
199 |
try:
|
@@ -210,6 +206,9 @@ def init_model():
|
|
210 |
model_init = init_model()
|
211 |
logger.info(f"model_init: {model_init}")
|
212 |
|
|
|
|
|
|
|
213 |
latin_lang = [
|
214 |
'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
|
215 |
'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
|
@@ -224,9 +223,9 @@ all_lang = ['', 'auto']
|
|
224 |
all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
|
225 |
|
226 |
|
227 |
-
|
228 |
# 2) Gemini (google.generativeai)
|
229 |
-
|
230 |
import google.generativeai as genai
|
231 |
from gradio import ChatMessage
|
232 |
from typing import Iterator
|
@@ -239,7 +238,7 @@ model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
|
|
239 |
|
240 |
def format_chat_history(messages: list) -> list:
|
241 |
"""
|
242 |
-
Gemini๊ฐ ์ดํดํ ์ ์๋
|
243 |
"""
|
244 |
formatted_history = []
|
245 |
for message in messages:
|
@@ -252,15 +251,13 @@ def format_chat_history(messages: list) -> list:
|
|
252 |
|
253 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
254 |
"""
|
255 |
-
Gemini ์๋ต
|
256 |
"""
|
257 |
-
# ๋น ๋ฌธ์์ด์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋ก ๊ต์ฒด(์ค๋ฅ ๋ฐฉ์ง)
|
258 |
if not user_message.strip():
|
259 |
-
user_message = "
|
260 |
|
261 |
try:
|
262 |
print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
|
263 |
-
|
264 |
chat_history = format_chat_history(messages)
|
265 |
chat = model.start_chat(history=chat_history)
|
266 |
response = chat.send_message(user_message, stream=True)
|
@@ -302,7 +299,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
|
302 |
response_buffer += current_chunk
|
303 |
messages[-1] = ChatMessage(role="assistant", content=response_buffer)
|
304 |
else:
|
305 |
-
# Still thinking
|
306 |
thought_buffer += current_chunk
|
307 |
messages[-1] = ChatMessage(
|
308 |
role="assistant",
|
@@ -328,7 +325,6 @@ def convert_chat_messages_to_gradio_format(messages):
|
|
328 |
|
329 |
for msg in messages:
|
330 |
if msg.role == "user":
|
331 |
-
# ์ด์ ํด ์ ์ฅ
|
332 |
if user_text is not None or assistant_text is not None:
|
333 |
gradio_chat.append((user_text or "", assistant_text or ""))
|
334 |
user_text = msg.content
|
@@ -340,9 +336,8 @@ def convert_chat_messages_to_gradio_format(messages):
|
|
340 |
if assistant_text is None:
|
341 |
assistant_text = msg.content
|
342 |
else:
|
343 |
-
assistant_text += msg.content
|
344 |
|
345 |
-
# ๋ง์ง๋ง ํด
|
346 |
if user_text is not None or assistant_text is not None:
|
347 |
gradio_chat.append((user_text or "", assistant_text or ""))
|
348 |
|
@@ -350,7 +345,7 @@ def convert_chat_messages_to_gradio_format(messages):
|
|
350 |
|
351 |
def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
352 |
"""
|
353 |
-
doc_text(๋งํฌ๋ค์ด)
|
354 |
"""
|
355 |
if doc_text.strip():
|
356 |
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
@@ -360,34 +355,60 @@ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
|
360 |
history.append(ChatMessage(role="user", content=user_query))
|
361 |
return "", history
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
################################
|
365 |
-
# 3) ํตํฉ Gradio ์ฑ ๊ตฌ์ฑ
|
366 |
################################
|
367 |
with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
|
368 |
gr.HTML("""
|
369 |
<div class="title-area">
|
370 |
<h1>OCR FLEX + Gemini Chat</h1>
|
371 |
-
<p>PDF/์ด๋ฏธ์ง -> ํ
์คํธ(๋งํฌ๋ค์ด) ๋ณํ ํ, Gemini LLM ๋ํ</p>
|
372 |
</div>
|
373 |
""")
|
374 |
|
|
|
375 |
md_state = gr.State("")
|
376 |
chat_history = gr.State([])
|
377 |
|
|
|
378 |
with gr.Row():
|
379 |
-
file = gr.File(
|
|
|
|
|
|
|
|
|
380 |
convert_btn = gr.Button("๋ณํํ๊ธฐ")
|
381 |
|
382 |
-
#
|
383 |
-
|
384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
|
386 |
formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
|
387 |
is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
|
388 |
table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
|
389 |
|
390 |
-
# ๋ณํ
|
391 |
convert_btn.click(
|
392 |
fn=to_markdown,
|
393 |
inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
|
@@ -395,14 +416,15 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
|
|
395 |
show_progress=True # ํ๋ก๊ทธ๋ ์ค๋ฐ+๋ก๋ฉ ํ์
|
396 |
)
|
397 |
|
398 |
-
# Gemini Chat
|
399 |
gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
|
400 |
-
chatbot = gr.Chatbot(height=600)
|
|
|
401 |
with gr.Row():
|
402 |
chat_input = gr.Textbox(lines=1, placeholder="์ง๋ฌธ์ ์
๋ ฅํ์ธ์...")
|
403 |
clear_btn = gr.Button("๋ํ ์ด๊ธฐํ")
|
404 |
|
405 |
-
#
|
406 |
chat_input.submit(
|
407 |
fn=user_message,
|
408 |
inputs=[chat_input, chat_history, md_state],
|
@@ -413,19 +435,15 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
|
|
413 |
outputs=chatbot
|
414 |
)
|
415 |
|
|
|
416 |
def clear_states():
|
417 |
-
return [], ""
|
418 |
|
419 |
clear_btn.click(
|
420 |
fn=clear_states,
|
421 |
inputs=[],
|
422 |
-
outputs=[chat_history, md_state]
|
423 |
-
).then(
|
424 |
-
fn=lambda: [],
|
425 |
-
inputs=[],
|
426 |
-
outputs=chatbot
|
427 |
)
|
428 |
|
429 |
-
|
430 |
if __name__ == "__main__":
|
431 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
|
|
8 |
import uuid
|
9 |
import pymupdf
|
10 |
|
11 |
+
# (๊ธฐ์กด magic-pdf ์ค์น ๋ฐ ์ค์ ๋ก์ง)
|
|
|
|
|
12 |
os.system('pip uninstall -y magic-pdf')
|
13 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
14 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
|
|
27 |
|
28 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
29 |
|
30 |
+
|
31 |
+
###############################
|
32 |
+
# Gradio ๋ฐ ๊ธฐํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ
|
33 |
+
###############################
|
34 |
import gradio as gr
|
35 |
from loguru import logger
|
36 |
|
|
|
38 |
from magic_pdf.libs.hash_utils import compute_sha256
|
39 |
from magic_pdf.tools.common import do_parse, prepare_env
|
40 |
|
41 |
+
#######################
|
42 |
+
# 1) UI CSS + PDF funcs
|
43 |
+
#######################
|
|
|
44 |
def create_css():
|
45 |
+
"""ํ๋ฉด ๊ฐ๋ ์ฌ์ฉ + ์คํฌ๋กค ํ์ฉ"""
|
|
|
|
|
46 |
return """
|
47 |
.gradio-container {
|
48 |
width: 100vw !important;
|
|
|
151 |
return tmp_file_path
|
152 |
|
153 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
|
154 |
+
"""PDF ๋ณํ ํจ์ (ํ๋ก๊ทธ๋ ์ค๋ฐ ํ์)"""
|
|
|
|
|
|
|
155 |
progress(0, "PDF๋ก ๋ณํ ์ค...")
|
156 |
file_path = to_pdf(file_path)
|
157 |
time.sleep(0.5)
|
|
|
173 |
logger.error("์์ถ ์คํจ")
|
174 |
time.sleep(0.5)
|
175 |
|
176 |
+
progress(70, "๋งํฌ๋ค์ด ์ฝ๋ ์ค...")
|
177 |
md_path = os.path.join(local_md_dir, file_name + ".md")
|
178 |
with open(md_path, 'r', encoding='utf-8') as f:
|
179 |
txt_content = f.read()
|
180 |
time.sleep(0.5)
|
181 |
|
182 |
+
progress(90, "์ด๋ฏธ์ง base64 ๋ณํ ์ค...")
|
183 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
184 |
time.sleep(0.5)
|
185 |
|
|
|
187 |
return md_content
|
188 |
|
189 |
|
190 |
+
################################
|
191 |
# magic_pdf ๋ชจ๋ธ ์ด๊ธฐํ
|
192 |
+
################################
|
193 |
def init_model():
|
194 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
195 |
try:
|
|
|
206 |
model_init = init_model()
|
207 |
logger.info(f"model_init: {model_init}")
|
208 |
|
209 |
+
##################
|
210 |
+
# ์ธ์ด ๋ชฉ๋ก
|
211 |
+
##################
|
212 |
latin_lang = [
|
213 |
'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
|
214 |
'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
|
|
|
223 |
all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
|
224 |
|
225 |
|
226 |
+
##################################
|
227 |
# 2) Gemini (google.generativeai)
|
228 |
+
##################################
|
229 |
import google.generativeai as genai
|
230 |
from gradio import ChatMessage
|
231 |
from typing import Iterator
|
|
|
238 |
|
239 |
def format_chat_history(messages: list) -> list:
|
240 |
"""
|
241 |
+
Gemini๊ฐ ์ดํดํ ์ ์๋ (role, parts[]) ํ์์ผ๋ก ๋ณํ
|
242 |
"""
|
243 |
formatted_history = []
|
244 |
for message in messages:
|
|
|
251 |
|
252 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
253 |
"""
|
254 |
+
Gemini ์๋ต ์คํธ๋ฆฌ๋ฐ (user_message๊ฐ ๊ณต๋ฐฑ์ด๋ฉด ๊ธฐ๋ณธ ๋ฌธ๊ตฌ๋ก ๊ต์ฒด)
|
255 |
"""
|
|
|
256 |
if not user_message.strip():
|
257 |
+
user_message = "...(No content from user)..."
|
258 |
|
259 |
try:
|
260 |
print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
|
|
|
261 |
chat_history = format_chat_history(messages)
|
262 |
chat = model.start_chat(history=chat_history)
|
263 |
response = chat.send_message(user_message, stream=True)
|
|
|
299 |
response_buffer += current_chunk
|
300 |
messages[-1] = ChatMessage(role="assistant", content=response_buffer)
|
301 |
else:
|
302 |
+
# Still in "thinking"
|
303 |
thought_buffer += current_chunk
|
304 |
messages[-1] = ChatMessage(
|
305 |
role="assistant",
|
|
|
325 |
|
326 |
for msg in messages:
|
327 |
if msg.role == "user":
|
|
|
328 |
if user_text is not None or assistant_text is not None:
|
329 |
gradio_chat.append((user_text or "", assistant_text or ""))
|
330 |
user_text = msg.content
|
|
|
336 |
if assistant_text is None:
|
337 |
assistant_text = msg.content
|
338 |
else:
|
339 |
+
assistant_text += msg.content
|
340 |
|
|
|
341 |
if user_text is not None or assistant_text is not None:
|
342 |
gradio_chat.append((user_text or "", assistant_text or ""))
|
343 |
|
|
|
345 |
|
346 |
def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
347 |
"""
|
348 |
+
doc_text(๋งํฌ๋ค์ด) ์ฌ์ฉํด ์ง๋ฌธ ์๋ ๋ณํ
|
349 |
"""
|
350 |
if doc_text.strip():
|
351 |
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
|
|
355 |
history.append(ChatMessage(role="user", content=user_query))
|
356 |
return "", history
|
357 |
|
358 |
+
########################
|
359 |
+
# ์ ํ์ผ ์
๋ก๋์ ์ด๊ธฐํ
|
360 |
+
########################
|
361 |
+
def reset_states(_):
|
362 |
+
"""
|
363 |
+
ํ์ผ์ด ๋ณ๊ฒฝ๋๋ฉด(์ ํ์ผ ์
๋ก๋๋๋ฉด)
|
364 |
+
- ๋ํ ์ด๋ ฅ(chat_history)
|
365 |
+
- ๋งํฌ๋ค์ด ์ํ(md_state)
|
366 |
+
- ์ฑ๋ด(๊ธฐ์กด ๋ํ)
|
367 |
+
๋ชจ๋ ์ด๊ธฐํ
|
368 |
+
"""
|
369 |
+
return [], "", []
|
370 |
|
371 |
################################
|
372 |
+
# 3) ํตํฉ Gradio ์ฑ ๊ตฌ์ฑ
|
373 |
################################
|
374 |
with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
|
375 |
gr.HTML("""
|
376 |
<div class="title-area">
|
377 |
<h1>OCR FLEX + Gemini Chat</h1>
|
378 |
+
<p>PDF/์ด๋ฏธ์ง -> ํ
์คํธ(๋งํฌ๋ค์ด) ๋ณํ ํ, Gemini LLM๊ณผ ๋ํ</p>
|
379 |
</div>
|
380 |
""")
|
381 |
|
382 |
+
# ์ํ
|
383 |
md_state = gr.State("")
|
384 |
chat_history = gr.State([])
|
385 |
|
386 |
+
# 1) ์
๋ก๋ & ๋ณํ
|
387 |
with gr.Row():
|
388 |
+
file = gr.File(
|
389 |
+
label="PDF/์ด๋ฏธ์ง ์
๋ก๋",
|
390 |
+
file_types=[".pdf", ".png", ".jpeg", ".jpg"],
|
391 |
+
interactive=True
|
392 |
+
)
|
393 |
convert_btn = gr.Button("๋ณํํ๊ธฐ")
|
394 |
|
395 |
+
# ์ ํ์ผ ์
๋ก๋ ์: ์ด์ ๋ํ/๋งํฌ๋ค์ด/์ฑ๋ด ์ด๊ธฐํ
|
396 |
+
# => ๋ง์ฝ "ํ์ผ ๋ณ๊ฒฝ ์ด๋ฒคํธ"๋ก ์ด๊ธฐํํ๊ธธ ์์น ์์ผ๋ฉด ์ฃผ์ ์ฒ๋ฆฌ
|
397 |
+
file.change(
|
398 |
+
fn=reset_states,
|
399 |
+
inputs=file,
|
400 |
+
outputs=[chat_history, md_state, "chatbot"]
|
401 |
+
)
|
402 |
+
|
403 |
+
# ์จ๊ธด ์ปดํฌ๋ํธ
|
404 |
+
max_pages = gr.Slider(1,20,10, visible=False, elem_classes="invisible")
|
405 |
+
layout_mode = gr.Dropdown(["layoutlmv3","doclayout_yolo"],value="doclayout_yolo",visible=False,elem_classes="invisible")
|
406 |
language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
|
407 |
formula_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
|
408 |
is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
|
409 |
table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
|
410 |
|
411 |
+
# ๋ณํ ๋ฒํผ โ to_markdown
|
412 |
convert_btn.click(
|
413 |
fn=to_markdown,
|
414 |
inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
|
|
|
416 |
show_progress=True # ํ๋ก๊ทธ๋ ์ค๋ฐ+๋ก๋ฉ ํ์
|
417 |
)
|
418 |
|
419 |
+
# 2) Gemini Chat
|
420 |
gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
|
421 |
+
chatbot = gr.Chatbot(height=600, elem_id="chatbot") # elem_id ์ง์ (reset์์ outputs๋ก ์ฌ์ฉ)
|
422 |
+
|
423 |
with gr.Row():
|
424 |
chat_input = gr.Textbox(lines=1, placeholder="์ง๋ฌธ์ ์
๋ ฅํ์ธ์...")
|
425 |
clear_btn = gr.Button("๋ํ ์ด๊ธฐํ")
|
426 |
|
427 |
+
# ์ฑํ
ํ๋ฆ
|
428 |
chat_input.submit(
|
429 |
fn=user_message,
|
430 |
inputs=[chat_input, chat_history, md_state],
|
|
|
435 |
outputs=chatbot
|
436 |
)
|
437 |
|
438 |
+
# ์ด๊ธฐํ ๋ฒํผ: ๋ํ/์ํ/์ฑ๋ด ๋ชจ๋ ์ด๊ธฐํ
|
439 |
def clear_states():
|
440 |
+
return [], "", []
|
441 |
|
442 |
clear_btn.click(
|
443 |
fn=clear_states,
|
444 |
inputs=[],
|
445 |
+
outputs=[chat_history, md_state, chatbot]
|
|
|
|
|
|
|
|
|
446 |
)
|
447 |
|
|
|
448 |
if __name__ == "__main__":
|
449 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|