Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,10 +8,16 @@ from typhoon_ocr import prepare_ocr_messages
|
|
8 |
import gradio as gr
|
9 |
from PIL import Image
|
10 |
|
|
|
11 |
load_dotenv()
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
|
|
15 |
theme = gr.themes.Soft(
|
16 |
primary_hue=gr.themes.Color(
|
17 |
c50="#f7f7fd",
|
@@ -30,6 +36,20 @@ theme = gr.themes.Soft(
|
|
30 |
neutral_hue="stone",
|
31 |
)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
def process_pdf(pdf_or_image_file, task_type, page_number):
|
34 |
if pdf_or_image_file is None:
|
35 |
return None, "No file uploaded"
|
@@ -37,7 +57,7 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
|
|
37 |
orig_filename = pdf_or_image_file.name
|
38 |
|
39 |
try:
|
40 |
-
#
|
41 |
messages = prepare_ocr_messages(
|
42 |
pdf_or_image_path=orig_filename,
|
43 |
task_type=task_type,
|
@@ -46,12 +66,12 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
|
|
46 |
page_num=page_number if page_number else 1
|
47 |
)
|
48 |
|
49 |
-
#
|
50 |
image_url = messages[0]["content"][1]["image_url"]["url"]
|
51 |
image_base64 = image_url.replace("data:image/png;base64,", "")
|
52 |
image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
|
53 |
|
54 |
-
#
|
55 |
response = openai.chat.completions.create(
|
56 |
model=os.getenv("TYPHOON_OCR_MODEL"),
|
57 |
messages=messages,
|
@@ -64,20 +84,22 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
|
|
64 |
)
|
65 |
text_output = response.choices[0].message.content
|
66 |
|
67 |
-
#
|
68 |
try:
|
69 |
json_data = json.loads(text_output)
|
70 |
markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
|
71 |
except Exception as e:
|
72 |
markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
|
73 |
|
74 |
-
|
|
|
|
|
|
|
75 |
|
76 |
except Exception as e:
|
77 |
-
return None, f"Error processing file: {str(e)}"
|
78 |
-
|
79 |
|
80 |
-
#
|
81 |
with gr.Blocks(theme=theme) as demo:
|
82 |
title = gr.HTML("""
|
83 |
<h1>Typhoon OCR</h1>
|
@@ -87,7 +109,7 @@ with gr.Blocks(theme=theme) as demo:
|
|
87 |
<br />
|
88 |
<details>
|
89 |
<summary><strong>Disclaimer</strong></summary>
|
90 |
-
The responses generated by this
|
91 |
</details>
|
92 |
<br />
|
93 |
<details>
|
@@ -99,12 +121,14 @@ with gr.Blocks(theme=theme) as demo:
|
|
99 |
<summary><strong>License</strong></summary>
|
100 |
This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
|
101 |
</details>
|
102 |
-
""")
|
|
|
103 |
with gr.Row():
|
104 |
with gr.Column(scale=1):
|
105 |
-
#
|
106 |
pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
|
107 |
|
|
|
108 |
with gr.Group(elem_classes=["task-background"]):
|
109 |
task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
|
110 |
gr.HTML("""
|
@@ -115,7 +139,6 @@ with gr.Blocks(theme=theme) as demo:
|
|
115 |
demo.css = """
|
116 |
.task-background {
|
117 |
background: var(--block-background-fill) !important;
|
118 |
-
|
119 |
}
|
120 |
.task-background > * {
|
121 |
background: var(--block-background-fill) !important;
|
@@ -125,19 +148,31 @@ with gr.Blocks(theme=theme) as demo:
|
|
125 |
font-size: 12px;
|
126 |
}
|
127 |
"""
|
|
|
128 |
page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
|
|
|
|
|
129 |
run_button = gr.Button("🚀 Run")
|
|
|
|
|
130 |
image_output = gr.Image(label="📸 Preview Image", type="pil")
|
|
|
131 |
with gr.Column(scale=2):
|
|
|
132 |
markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
|
|
|
|
|
|
|
133 |
|
134 |
-
|
135 |
-
# Connect the UI inputs to the processing function.
|
136 |
run_button.click(
|
137 |
fn=process_pdf,
|
138 |
inputs=[pdf_input, task_dropdown, page_number],
|
139 |
-
outputs=[image_output, markdown_output]
|
140 |
)
|
141 |
|
142 |
-
#
|
143 |
-
|
|
|
|
|
|
|
|
8 |
import gradio as gr
|
9 |
from PIL import Image
|
10 |
|
11 |
+
# โหลด environment variables จาก .env
|
12 |
load_dotenv()
|
13 |
|
14 |
+
# ตั้งค่า OpenAI (ใช้ API ของ Typhoon OCR)
|
15 |
+
openai = OpenAI(
|
16 |
+
base_url=os.getenv("TYPHOON_BASE_URL"),
|
17 |
+
api_key=os.getenv("TYPHOON_API_KEY")
|
18 |
+
)
|
19 |
|
20 |
+
# ตั้งค่า Theme (ใช้ของเดิม)
|
21 |
theme = gr.themes.Soft(
|
22 |
primary_hue=gr.themes.Color(
|
23 |
c50="#f7f7fd",
|
|
|
36 |
neutral_hue="stone",
|
37 |
)
|
38 |
|
39 |
+
# ตัวแปรสำหรับบันทึกผลลัพธ์
|
40 |
+
OUTPUT_FILE = "ocr_results.txt"
|
41 |
+
|
42 |
+
def save_ocr_result(text):
|
43 |
+
"""บันทึกผลลัพธ์ OCR แบบต่อเนื่องในไฟล์เดียว พร้อมเว้น 2 บรรทัดระหว่างข้อมูล"""
|
44 |
+
with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
|
45 |
+
f.write(text + "\n\n")
|
46 |
+
return OUTPUT_FILE
|
47 |
+
|
48 |
+
def clear_output_file():
|
49 |
+
"""ล้างไฟล์ผลลัพธ์เก่า (เรียกครั้งเดียวเมื่อเริ่มใช้งานใหม่)"""
|
50 |
+
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
|
51 |
+
f.write("")
|
52 |
+
|
53 |
def process_pdf(pdf_or_image_file, task_type, page_number):
|
54 |
if pdf_or_image_file is None:
|
55 |
return None, "No file uploaded"
|
|
|
57 |
orig_filename = pdf_or_image_file.name
|
58 |
|
59 |
try:
|
60 |
+
# ใช้ prepare_ocr_messages ตามเดิม
|
61 |
messages = prepare_ocr_messages(
|
62 |
pdf_or_image_path=orig_filename,
|
63 |
task_type=task_type,
|
|
|
66 |
page_num=page_number if page_number else 1
|
67 |
)
|
68 |
|
69 |
+
# ดึงภาพจากผลลัพธ์
|
70 |
image_url = messages[0]["content"][1]["image_url"]["url"]
|
71 |
image_base64 = image_url.replace("data:image/png;base64,", "")
|
72 |
image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
|
73 |
|
74 |
+
# ส่งไป API
|
75 |
response = openai.chat.completions.create(
|
76 |
model=os.getenv("TYPHOON_OCR_MODEL"),
|
77 |
messages=messages,
|
|
|
84 |
)
|
85 |
text_output = response.choices[0].message.content
|
86 |
|
87 |
+
# ดึง natural_text
|
88 |
try:
|
89 |
json_data = json.loads(text_output)
|
90 |
markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
|
91 |
except Exception as e:
|
92 |
markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
|
93 |
|
94 |
+
# บันทึกผลลัพธ์ต่อเนื่องในไฟล์
|
95 |
+
save_ocr_result(markdown_out)
|
96 |
+
|
97 |
+
return image_pil, markdown_out, gr.File.update(value=OUTPUT_FILE)
|
98 |
|
99 |
except Exception as e:
|
100 |
+
return None, f"Error processing file: {str(e)}", None
|
|
|
101 |
|
102 |
+
# สร้าง UI
|
103 |
with gr.Blocks(theme=theme) as demo:
|
104 |
title = gr.HTML("""
|
105 |
<h1>Typhoon OCR</h1>
|
|
|
109 |
<br />
|
110 |
<details>
|
111 |
<summary><strong>Disclaimer</strong></summary>
|
112 |
+
The responses generated by this AI system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
|
113 |
</details>
|
114 |
<br />
|
115 |
<details>
|
|
|
121 |
<summary><strong>License</strong></summary>
|
122 |
This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
|
123 |
</details>
|
124 |
+
""")
|
125 |
+
|
126 |
with gr.Row():
|
127 |
with gr.Column(scale=1):
|
128 |
+
# อัปโหลดไฟล์
|
129 |
pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
|
130 |
|
131 |
+
# เลือก Task
|
132 |
with gr.Group(elem_classes=["task-background"]):
|
133 |
task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
|
134 |
gr.HTML("""
|
|
|
139 |
demo.css = """
|
140 |
.task-background {
|
141 |
background: var(--block-background-fill) !important;
|
|
|
142 |
}
|
143 |
.task-background > * {
|
144 |
background: var(--block-background-fill) !important;
|
|
|
148 |
font-size: 12px;
|
149 |
}
|
150 |
"""
|
151 |
+
# เลือกเพจ
|
152 |
page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
|
153 |
+
|
154 |
+
# ปุ่มรัน
|
155 |
run_button = gr.Button("🚀 Run")
|
156 |
+
|
157 |
+
# แสดงภาพ
|
158 |
image_output = gr.Image(label="📸 Preview Image", type="pil")
|
159 |
+
|
160 |
with gr.Column(scale=2):
|
161 |
+
# แสดงผลลัพธ์ Markdown
|
162 |
markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
|
163 |
+
|
164 |
+
# ปุ่มดาวน์โหลดไฟล์
|
165 |
+
download_button = gr.File(label="📥 ดาวน์โหลดผลลัพธ์ทั้งหมด (Text File)", interactive=False)
|
166 |
|
167 |
+
# เชื่อมต่อ UI กับฟังก์ชัน
|
|
|
168 |
run_button.click(
|
169 |
fn=process_pdf,
|
170 |
inputs=[pdf_input, task_dropdown, page_number],
|
171 |
+
outputs=[image_output, markdown_output, download_button]
|
172 |
)
|
173 |
|
174 |
+
# เรียกครั้งเดียวเมื่อเริ่มเพื่อล้างไฟล์เก่า
|
175 |
+
clear_output_file()
|
176 |
+
|
177 |
+
# รันแอป
|
178 |
+
demo.launch(share=False)
|