Spaces:
Running
Running
File size: 5,701 Bytes
a383d0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import detect_text.ocr as ocr
from detect_text.Text import Text
import numpy as np
import cv2
import json
import time
import os
from os.path import join as pjoin
def save_detection_json(file_path, texts, img_shape):
f_out = open(file_path, 'w')
output = {'img_shape': img_shape, 'texts': []}
for text in texts:
c = {'id': text.id, 'content': text.content}
loc = text.location
c['column_min'], c['row_min'], c['column_max'], c['row_max'] = loc['left'], loc['top'], loc['right'], loc['bottom']
c['width'] = text.width
c['height'] = text.height
output['texts'].append(c)
json.dump(output, f_out, indent=4)
def visualize_texts(org_img, texts, shown_resize_height=None, show=False, write_path=None):
img = org_img.copy()
for text in texts:
text.visualize_element(img, line=2)
img_resize = img
if shown_resize_height is not None:
img_resize = cv2.resize(img, (int(shown_resize_height * (img.shape[1]/img.shape[0])), shown_resize_height))
if show:
cv2.imshow('texts', img_resize)
cv2.waitKey(0)
cv2.destroyWindow('texts')
if write_path is not None:
cv2.imwrite(write_path, img)
def text_sentences_recognition(texts):
'''
Merge separate words detected by Google ocr into a sentence
'''
changed = True
while changed:
changed = False
temp_set = []
for text_a in texts:
merged = False
for text_b in temp_set:
if text_a.is_on_same_line(text_b, 'h', bias_justify=0.2 * min(text_a.height, text_b.height), bias_gap=2 * max(text_a.word_width, text_b.word_width)):
text_b.merge_text(text_a)
merged = True
changed = True
break
if not merged:
temp_set.append(text_a)
texts = temp_set.copy()
for i, text in enumerate(texts):
text.id = i
return texts
def merge_intersected_texts(texts):
'''
Merge intersected texts (sentences or words)
'''
changed = True
while changed:
changed = False
temp_set = []
for text_a in texts:
merged = False
for text_b in temp_set:
if text_a.is_intersected(text_b, bias=2):
text_b.merge_text(text_a)
merged = True
changed = True
break
if not merged:
temp_set.append(text_a)
texts = temp_set.copy()
return texts
def text_cvt_orc_format(ocr_result):
texts = []
if ocr_result is not None:
for i, result in enumerate(ocr_result):
error = False
x_coordinates = []
y_coordinates = []
text_location = result['boundingPoly']['vertices']
content = result['description']
for loc in text_location:
if 'x' not in loc or 'y' not in loc:
error = True
break
x_coordinates.append(loc['x'])
y_coordinates.append(loc['y'])
if error: continue
location = {'left': min(x_coordinates), 'top': min(y_coordinates),
'right': max(x_coordinates), 'bottom': max(y_coordinates)}
texts.append(Text(i, content, location))
return texts
def text_cvt_orc_format_paddle(paddle_result):
texts = []
for i, line in enumerate(paddle_result):
points = np.array(line[0])
location = {'left': int(min(points[:, 0])), 'top': int(min(points[:, 1])), 'right': int(max(points[:, 0])),
'bottom': int(max(points[:, 1]))}
content = line[1][0]
texts.append(Text(i, content, location))
return texts
def text_filter_noise(texts):
valid_texts = []
for text in texts:
if len(text.content) <= 1 and text.content.lower() not in ['a', ',', '.', '!', '?', '$', '%', ':', '&', '+']:
continue
valid_texts.append(text)
return valid_texts
def text_detection(input_file='../data/input/30800.jpg', output_file='../data/output', show=False, method='paddle', paddle_model=None):
'''
:param method: google or paddle
:param paddle_model: the preload paddle model for paddle ocr
'''
start = time.perf_counter()
name = input_file.split('/')[-1][:-4]
ocr_root = pjoin(output_file, 'ocr')
img = cv2.imread(input_file)
if method == 'google':
print('*** Detect Text through Google OCR ***')
ocr_result = ocr.ocr_detection_google(input_file)
texts = text_cvt_orc_format(ocr_result)
texts = merge_intersected_texts(texts)
texts = text_filter_noise(texts)
texts = text_sentences_recognition(texts)
elif method == 'paddle':
# The import of the paddle ocr can be separate to the beginning of the program if you decide to use this method
from paddleocr import PaddleOCR
print('*** Detect Text through Paddle OCR ***')
if paddle_model is None:
paddle_model = PaddleOCR(use_angle_cls=True, lang="ch")
result = paddle_model.ocr(input_file)
texts = text_cvt_orc_format_paddle(result)
else:
raise ValueError('Method has to be "google" or "paddle"')
visualize_texts(img, texts, shown_resize_height=800, show=show, write_path=pjoin(ocr_root, name+'.png'))
save_detection_json(pjoin(ocr_root, name+'.json'), texts, img.shape)
print("[Text Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_file, pjoin(ocr_root, name+'.json')))
# text_detection()
|