protae5544 commited on
Commit
fe3ba7e
·
verified ·
1 Parent(s): c65aaa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -18
app.py CHANGED
@@ -8,10 +8,16 @@ from typhoon_ocr import prepare_ocr_messages
8
  import gradio as gr
9
  from PIL import Image
10
 
 
11
  load_dotenv()
12
 
13
- openai = OpenAI(base_url=os.getenv("TYPHOON_BASE_URL"), api_key=os.getenv("TYPHOON_API_KEY"))
 
 
 
 
14
 
 
15
  theme = gr.themes.Soft(
16
  primary_hue=gr.themes.Color(
17
  c50="#f7f7fd",
@@ -30,6 +36,20 @@ theme = gr.themes.Soft(
30
  neutral_hue="stone",
31
  )
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def process_pdf(pdf_or_image_file, task_type, page_number):
34
  if pdf_or_image_file is None:
35
  return None, "No file uploaded"
@@ -37,7 +57,7 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
37
  orig_filename = pdf_or_image_file.name
38
 
39
  try:
40
- # Use the new simplified function to prepare OCR messages with page number
41
  messages = prepare_ocr_messages(
42
  pdf_or_image_path=orig_filename,
43
  task_type=task_type,
@@ -46,12 +66,12 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
46
  page_num=page_number if page_number else 1
47
  )
48
 
49
- # Extract the image from the message content for display
50
  image_url = messages[0]["content"][1]["image_url"]["url"]
51
  image_base64 = image_url.replace("data:image/png;base64,", "")
52
  image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
53
 
54
- # Send messages to OpenAI compatible API
55
  response = openai.chat.completions.create(
56
  model=os.getenv("TYPHOON_OCR_MODEL"),
57
  messages=messages,
@@ -64,20 +84,22 @@ def process_pdf(pdf_or_image_file, task_type, page_number):
64
  )
65
  text_output = response.choices[0].message.content
66
 
67
- # Try to parse the output assuming it is a Python dictionary containing 'natural_text'
68
  try:
69
  json_data = json.loads(text_output)
70
  markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
71
  except Exception as e:
72
  markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
73
 
74
- return image_pil, markdown_out
 
 
 
75
 
76
  except Exception as e:
77
- return None, f"Error processing file: {str(e)}"
78
-
79
 
80
- # Build the Gradio UI.
81
  with gr.Blocks(theme=theme) as demo:
82
  title = gr.HTML("""
83
  <h1>Typhoon OCR</h1>
@@ -87,7 +109,7 @@ with gr.Blocks(theme=theme) as demo:
87
  <br />
88
  <details>
89
  <summary><strong>Disclaimer</strong></summary>
90
- The responses generated by this Artificial Intelligence (AI) system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
91
  </details>
92
  <br />
93
  <details>
@@ -99,12 +121,14 @@ with gr.Blocks(theme=theme) as demo:
99
  <summary><strong>License</strong></summary>
100
  This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
101
  </details>
102
- """)
 
103
  with gr.Row():
104
  with gr.Column(scale=1):
105
- # Update file_types to accept PDF as well as common image formats.
106
  pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
107
 
 
108
  with gr.Group(elem_classes=["task-background"]):
109
  task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
110
  gr.HTML("""
@@ -115,7 +139,6 @@ with gr.Blocks(theme=theme) as demo:
115
  demo.css = """
116
  .task-background {
117
  background: var(--block-background-fill) !important;
118
-
119
  }
120
  .task-background > * {
121
  background: var(--block-background-fill) !important;
@@ -125,19 +148,31 @@ with gr.Blocks(theme=theme) as demo:
125
  font-size: 12px;
126
  }
127
  """
 
128
  page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
 
 
129
  run_button = gr.Button("🚀 Run")
 
 
130
  image_output = gr.Image(label="📸 Preview Image", type="pil")
 
131
  with gr.Column(scale=2):
 
132
  markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
 
 
 
133
 
134
-
135
- # Connect the UI inputs to the processing function.
136
  run_button.click(
137
  fn=process_pdf,
138
  inputs=[pdf_input, task_dropdown, page_number],
139
- outputs=[image_output, markdown_output]
140
  )
141
 
142
- # Launch the Gradio demo (temporary public share for 72 hours)
143
- demo.launch(share=False)
 
 
 
 
8
  import gradio as gr
9
  from PIL import Image
10
 
11
+ # โหลด environment variables จาก .env
12
  load_dotenv()
13
 
14
+ # ตั้งค่า OpenAI (ใช้ API ของ Typhoon OCR)
15
+ openai = OpenAI(
16
+ base_url=os.getenv("TYPHOON_BASE_URL"),
17
+ api_key=os.getenv("TYPHOON_API_KEY")
18
+ )
19
 
20
+ # ตั้งค่า Theme (ใช้ของเดิม)
21
  theme = gr.themes.Soft(
22
  primary_hue=gr.themes.Color(
23
  c50="#f7f7fd",
 
36
  neutral_hue="stone",
37
  )
38
 
39
+ # ตัวแปรสำหรับบันทึกผลลัพธ์
40
+ OUTPUT_FILE = "ocr_results.txt"
41
+
42
+ def save_ocr_result(text):
43
+ """บันทึกผลลัพธ์ OCR แบบต่อเนื่องในไฟล์เดียว พร้อมเว้น 2 บรรทัดระหว่างข้อมูล"""
44
+ with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
45
+ f.write(text + "\n\n")
46
+ return OUTPUT_FILE
47
+
48
+ def clear_output_file():
49
+ """ล้างไฟล์ผลลัพธ์เก่า (เรียกครั้งเดียวเมื่อเริ่มใช้งานใหม่)"""
50
+ with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
51
+ f.write("")
52
+
53
  def process_pdf(pdf_or_image_file, task_type, page_number):
54
  if pdf_or_image_file is None:
55
  return None, "No file uploaded"
 
57
  orig_filename = pdf_or_image_file.name
58
 
59
  try:
60
+ # ใช้ prepare_ocr_messages ตามเดิม
61
  messages = prepare_ocr_messages(
62
  pdf_or_image_path=orig_filename,
63
  task_type=task_type,
 
66
  page_num=page_number if page_number else 1
67
  )
68
 
69
+ # ดึงภาพจากผลลัพธ์
70
  image_url = messages[0]["content"][1]["image_url"]["url"]
71
  image_base64 = image_url.replace("data:image/png;base64,", "")
72
  image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
73
 
74
+ # ส่งไป API
75
  response = openai.chat.completions.create(
76
  model=os.getenv("TYPHOON_OCR_MODEL"),
77
  messages=messages,
 
84
  )
85
  text_output = response.choices[0].message.content
86
 
87
+ # ดึง natural_text
88
  try:
89
  json_data = json.loads(text_output)
90
  markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
91
  except Exception as e:
92
  markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
93
 
94
+ # บันทึกผลลัพธ์ต่อเนื่องในไฟล์
95
+ save_ocr_result(markdown_out)
96
+
97
+ return image_pil, markdown_out, gr.File.update(value=OUTPUT_FILE)
98
 
99
  except Exception as e:
100
+ return None, f"Error processing file: {str(e)}", None
 
101
 
102
+ # สร้าง UI
103
  with gr.Blocks(theme=theme) as demo:
104
  title = gr.HTML("""
105
  <h1>Typhoon OCR</h1>
 
109
  <br />
110
  <details>
111
  <summary><strong>Disclaimer</strong></summary>
112
+ The responses generated by this AI system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
113
  </details>
114
  <br />
115
  <details>
 
121
  <summary><strong>License</strong></summary>
122
  This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
123
  </details>
124
+ """)
125
+
126
  with gr.Row():
127
  with gr.Column(scale=1):
128
+ # อัปโหลดไฟล์
129
  pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
130
 
131
+ # เลือก Task
132
  with gr.Group(elem_classes=["task-background"]):
133
  task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
134
  gr.HTML("""
 
139
  demo.css = """
140
  .task-background {
141
  background: var(--block-background-fill) !important;
 
142
  }
143
  .task-background > * {
144
  background: var(--block-background-fill) !important;
 
148
  font-size: 12px;
149
  }
150
  """
151
+ # เลือกเพจ
152
  page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
153
+
154
+ # ปุ่มรัน
155
  run_button = gr.Button("🚀 Run")
156
+
157
+ # แสดงภาพ
158
  image_output = gr.Image(label="📸 Preview Image", type="pil")
159
+
160
  with gr.Column(scale=2):
161
+ # แสดงผลลัพธ์ Markdown
162
  markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
163
+
164
+ # ปุ่มดาวน์โหลดไฟล์
165
+ download_button = gr.File(label="📥 ดาวน์โหลดผลลัพธ์ทั้งหมด (Text File)", interactive=False)
166
 
167
+ # เชื่อมต่อ UI กับฟังก์ชัน
 
168
  run_button.click(
169
  fn=process_pdf,
170
  inputs=[pdf_input, task_dropdown, page_number],
171
+ outputs=[image_output, markdown_output, download_button]
172
  )
173
 
174
+ # เรียกครั้งเดียวเมื่อเริ่มเพื่อล้างไฟล์เก่า
175
+ clear_output_file()
176
+
177
+ # รันแอป
178
+ demo.launch(share=False)