hytos / DTI_PID / DTI_PID / TextDetector.py @ 82be49cb
이력 | 보기 | 이력해설 | 다운로드 (36.7 KB)
1 |
# coding: utf-8
|
---|---|
2 |
"""
|
3 |
This is text detector module
|
4 |
"""
|
5 |
import sys |
6 |
import os |
7 |
import cv2 |
8 |
import numpy as np |
9 |
from PyQt5.QtCore import * |
10 |
from PyQt5.QtGui import * |
11 |
from PyQt5.QtWidgets import * |
12 |
from PyQt5.QtSvg import * |
13 |
|
14 |
from AppDocData import * |
15 |
import TextInfo as ti |
16 |
import tesseract_ocr_module as TOCR |
17 |
|
18 |
MIN_TEXT_SIZE = 10
|
19 |
THREAD_MAX_WORKER = os.cpu_count() |
20 |
|
21 |
|
22 |
class TextDetector: |
23 |
'''
|
24 |
@brief constructor
|
25 |
@author humkyung
|
26 |
@date 2018.07.11
|
27 |
'''
|
28 |
|
29 |
def __init__(self): |
30 |
self.textInfoList = []
|
31 |
self.otherTextInfoList = []
|
32 |
self.titleBlockTextInfoList = []
|
33 |
|
34 |
'''
|
35 |
@brief detect text areas
|
36 |
@author humkyung
|
37 |
@date 2018.06.16
|
38 |
'''
|
39 |
|
40 |
def detectTextAreas(self, img, offset): |
41 |
try:
|
42 |
app_doc_data = AppDocData.instance() |
43 |
|
44 |
configs = app_doc_data.getConfigs('Engine', 'Text Area') |
45 |
if (configs and int(configs[0].value) is 1) or not configs: |
46 |
# get text box original way
|
47 |
return self.getTextAreaInfo(img, offset[0], offset[1]) |
48 |
else:
|
49 |
# using craft
|
50 |
return self.get_text_box_using_craft(img, offset[0], offset[1], web=True) |
51 |
|
52 |
except Exception as ex: |
53 |
from App import App |
54 |
from AppDocData import MessageType |
55 |
message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, |
56 |
sys.exc_info()[-1].tb_lineno)
|
57 |
App.mainWnd().addMessage.emit(MessageType.Error, message) |
58 |
|
59 |
return [], None |
60 |
|
61 |
def decode_predictions(self, scores, geometry): |
62 |
# grab the number of rows and columns from the scores volume, then
|
63 |
# initialize our set of bounding box rectangles and corresponding
|
64 |
# confidence scores
|
65 |
(numRows, numCols) = scores.shape[2:4] |
66 |
rects = [] |
67 |
confidences = [] |
68 |
|
69 |
# loop over the number of rows
|
70 |
for y in range(0, numRows): |
71 |
# extract the scores (probabilities), followed by the
|
72 |
# geometrical data used to derive potential bounding box
|
73 |
# coordinates that surround text
|
74 |
scoresData = scores[0, 0, y] |
75 |
xData0 = geometry[0, 0, y] |
76 |
xData1 = geometry[0, 1, y] |
77 |
xData2 = geometry[0, 2, y] |
78 |
xData3 = geometry[0, 3, y] |
79 |
anglesData = geometry[0, 4, y] |
80 |
|
81 |
# loop over the number of columns
|
82 |
for x in range(0, numCols): |
83 |
# if our score does not have sufficient probability,
|
84 |
# ignore it
|
85 |
if scoresData[x] < 0.5: # args["min_confidence"]: |
86 |
continue
|
87 |
|
88 |
# compute the offset factor as our resulting feature
|
89 |
# maps will be 4x smaller than the input image
|
90 |
(offsetX, offsetY) = (x * 4.0, y * 4.0) |
91 |
|
92 |
# extract the rotation angle for the prediction and
|
93 |
# then compute the sin and cosine
|
94 |
angle = anglesData[x] |
95 |
cos = np.cos(angle) |
96 |
sin = np.sin(angle) |
97 |
|
98 |
# use the geometry volume to derive the width and height
|
99 |
# of the bounding box
|
100 |
h = xData0[x] + xData2[x] |
101 |
w = xData1[x] + xData3[x] |
102 |
|
103 |
# compute both the starting and ending (x, y)-coordinates
|
104 |
# for the text prediction bounding box
|
105 |
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
|
106 |
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
|
107 |
startX = int(endX - w)
|
108 |
startY = int(endY - h)
|
109 |
|
110 |
# add the bounding box coordinates and probability score
|
111 |
# to our respective lists
|
112 |
rects.append((startX, startY, endX, endY)) |
113 |
confidences.append(scoresData[x]) |
114 |
|
115 |
# return a tuple of the bounding boxes and associated confidences
|
116 |
return (rects, confidences)
|
117 |
|
118 |
'''
|
119 |
@brief Get Text Area info by contour
|
120 |
@author Jeongwoo
|
121 |
@date 2018.06.05
|
122 |
@history 2018.06.08 Jeongwoo Add angle
|
123 |
humkyung 2018.06.18 fixed logic to detect text area
|
124 |
'''
|
125 |
def getTextAreaInfo(self, imgGray, offset_x, offset_y): |
126 |
#from imutils.object_detection import non_max_suppression
|
127 |
from AppDocData import AppDocData |
128 |
|
129 |
res_list = [] |
130 |
ocr_image = None
|
131 |
try:
|
132 |
app_doc_data = AppDocData.instance() |
133 |
project = app_doc_data.getCurrentProject() |
134 |
|
135 |
configs = app_doc_data.getConfigs('Text Size', 'Max Text Size') |
136 |
maxTextSize = int(configs[0].value) if 1 == len(configs) else 100 |
137 |
configs = app_doc_data.getConfigs('Text Size', 'Min Text Size') |
138 |
minSize = int(configs[0].value) if 1 == len(configs) else 15 |
139 |
|
140 |
ocr_image = imgGray.copy() # np.ones(imgGray.shape, np.uint8) * 255
|
141 |
|
142 |
not_containing_bbox, binary_image = self.getTextBox(ocr_image, imgGray, maxTextSize, minSize)
|
143 |
|
144 |
rects = [] |
145 |
|
146 |
for bbox in not_containing_bbox: |
147 |
x, y = bbox.left(), bbox.top() |
148 |
w, h = bbox.width(), bbox.height() |
149 |
img = binary_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()] |
150 |
img = cv2.dilate(img, np.ones((2, 2), np.uint8)) |
151 |
img = cv2.bitwise_not(img) |
152 |
|
153 |
horizontal, max_width = 0, 0 |
154 |
vertical, max_height = 0, 0 |
155 |
_contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
156 |
for xx in _contours: |
157 |
[_x, _y, _w, _h] = cv2.boundingRect(xx) |
158 |
|
159 |
if min(_w, _h) / max(_w, _h) < 0.3: |
160 |
continue
|
161 |
|
162 |
max_width = _x if _x > max_width else max_width |
163 |
max_height = _y if _y > max_height else max_height |
164 |
|
165 |
if (_w < _h) or (_w > maxTextSize > _h): # count character that looks like horizontal |
166 |
horizontal += 1# + (_w * _h) / (w * h) |
167 |
else:
|
168 |
vertical += 1# + (_w * _h) / (w * h) |
169 |
|
170 |
if (w < minSize and h < minSize) or (max_width > maxTextSize and max_height > maxTextSize): |
171 |
continue # skip too small or big one |
172 |
|
173 |
rects.append([0 if horizontal >= vertical else 90, QRect(x, y, w, h)]) |
174 |
|
175 |
configs = app_doc_data.getConfigs('Text Recognition', 'Merge Size') |
176 |
mergeSize = int(configs[0].value) if 1 == len(configs) else 10 |
177 |
# merge rectangles
|
178 |
interestings = [] |
179 |
while rects:
|
180 |
rect = rects.pop() |
181 |
|
182 |
if 0 == rect[0]: # x-direction text |
183 |
rectExpand = rect[1].adjusted(-mergeSize, 0, mergeSize, 0) |
184 |
matches = [x for x in rects if (x[0] == rect[0]) and |
185 |
abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height())*0.5 and |
186 |
abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.25 and |
187 |
rectExpand.intersects(x[1])]
|
188 |
else: # y -direction text |
189 |
rectExpand = rect[1].adjusted(0, -mergeSize, 0, mergeSize) |
190 |
matches = [x for x in rects if (x[0] == rect[0]) and |
191 |
abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width())*0.5 and |
192 |
abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.25 and |
193 |
rectExpand.intersects(x[1])]
|
194 |
|
195 |
if matches:
|
196 |
for _rect in matches: |
197 |
rect[1] = rect[1].united(_rect[1]) |
198 |
if _rect in rects: |
199 |
rects.remove(_rect) |
200 |
rects.append(rect) |
201 |
else:
|
202 |
interestings.append(rect) |
203 |
|
204 |
for rect in interestings: |
205 |
matches = [_rect for _rect in interestings if rect != _rect and _rect[1].contains(rect[1])] |
206 |
# if there is no boxes which contains
|
207 |
if not matches: |
208 |
angle = rect[0]
|
209 |
res_list.append(ti.TextInfo('', round(offset_x) + rect[1].x(), round(offset_y) + rect[1].y(), rect[1].width(), |
210 |
rect[1].height(), angle))
|
211 |
except Exception as ex: |
212 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, |
213 |
sys.exc_info()[-1].tb_lineno)
|
214 |
print(message) |
215 |
|
216 |
return res_list, ocr_image
|
217 |
|
218 |
def get_text_image_tile(self, img, size=[1300, 1300], overlap=100): |
219 |
""" return image tile """
|
220 |
width, height = img.shape[1], img.shape[0] |
221 |
width_count, height_count = width // size[0] + 1, height // size[1] + 1 |
222 |
b_width, b_height = width_count * size[0], height_count * size[1] |
223 |
b_img = np.zeros((b_height, b_width), np.uint8) + 255
|
224 |
b_img[:height, :width] = img[:, :] |
225 |
|
226 |
tile_info_list = [] |
227 |
for row in range(height_count): |
228 |
for col in range(width_count): |
229 |
t_width = size[0] if height_count == 1 else (\ |
230 |
size[0] + overlap * 2 if col != 0 and col != height_count - 1 else size[0] + overlap) |
231 |
t_height = size[1] if width_count == 1 else (\ |
232 |
size[1] + overlap * 2 if row != 0 and row != width_count - 1 else size[1] + overlap) |
233 |
|
234 |
t_y = 0 if row == 0 else row * size[1] - overlap |
235 |
t_x = 0 if col == 0 else col * size[0] - overlap |
236 |
t_img = b_img[t_y:t_y + t_height, t_x:t_x + t_width] |
237 |
|
238 |
tile_info_list.append([row, col, t_x, t_y, t_img.copy()]) |
239 |
|
240 |
#Image.fromarray(tile_info_list[-1][4]).show()
|
241 |
|
242 |
return tile_info_list
|
243 |
|
244 |
def get_text_box_using_craft(self, imgGray, offset_x, offset_y, web=False): |
245 |
""" get text box by using craft """
|
246 |
|
247 |
from AppWebService import AppWebService |
248 |
from AppDocData import AppDocData |
249 |
|
250 |
app_doc_data = AppDocData.instance() |
251 |
project = app_doc_data.getCurrentProject() |
252 |
|
253 |
ocr_image = imgGray.copy() |
254 |
|
255 |
configs = app_doc_data.getConfigs('Text Size', 'Max Text Size') |
256 |
maxTextSize = int(configs[0].value) if 1 == len(configs) else 100 |
257 |
configs = app_doc_data.getConfigs('Text Size', 'Min Text Size') |
258 |
minSize = int(configs[0].value) if 1 == len(configs) else 15 |
259 |
|
260 |
binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1] |
261 |
|
262 |
score_path = os.path.join(project.getTempPath(), 'OCR_CRAFT_SCORE_{}.png'.format(app_doc_data.imgName))
|
263 |
img_path = os.path.join(project.getTempPath(), 'OCR_CRAFT_{}.png'.format(app_doc_data.imgName))
|
264 |
|
265 |
if not web: |
266 |
sys.path.insert(0, os.path.dirname(os.path.realpath('./'))+ '\\WebServer\\CRAFT_pytorch_master') |
267 |
import text_craft |
268 |
|
269 |
boxes = text_craft.get_text_box(ocr_image, img_path, score_path, os.path.dirname(os.path.realpath('./')) + '\\WebServer\\CRAFT_pytorch_master\\weights\\craft_mlt_25k.pth') |
270 |
elif False: |
271 |
app_web_service = AppWebService() |
272 |
boxes = app_web_service.request_text_box(ocr_image, img_path, score_path) |
273 |
else:
|
274 |
app_web_service = AppWebService() |
275 |
#boxes = app_web_service.request_text_box(ocr_image, img_path, score_path)
|
276 |
|
277 |
tile_image_infos = self.get_text_image_tile(ocr_image)
|
278 |
img_infos = app_web_service.request_text_box_tile(tile_image_infos, img_path, score_path) |
279 |
|
280 |
boxes = [] |
281 |
for info in img_infos: |
282 |
for box in info[5]: |
283 |
box[0] = box[0] + info[2] |
284 |
box[1] = box[1] + info[3] |
285 |
box[4] = box[4] + info[2] |
286 |
box[5] = box[5] + info[3] |
287 |
|
288 |
boxes.extend(info[5])
|
289 |
|
290 |
rects = [] |
291 |
|
292 |
for box in boxes: |
293 |
rects.append(QRect(box[0], box[1], box[4] - box[0], box[5] - box[1])) |
294 |
|
295 |
# merge tile text box
|
296 |
overlap_merges = [] |
297 |
for rect1 in rects: |
298 |
for rect2 in rects: |
299 |
if rect1 is rect2: |
300 |
continue
|
301 |
l1, l2 = rect1.left(), rect2.left() |
302 |
r1, r2 = rect1.right(), rect2.right() |
303 |
l_x, s_x = [l1, r1], [l2, r2] |
304 |
t1, t2 = rect1.top(), rect2.top() |
305 |
b1, b2 = rect1.bottom(), rect2.bottom() |
306 |
l_y, s_y = [t1, b1], [t2, b2] |
307 |
if not (max(l_x) < min(s_x) or max(s_x) < min(l_x)) and \ |
308 |
not (max(l_y) < min(s_y) or max(s_y) < min(l_y)): |
309 |
inserted = False
|
310 |
for merge in overlap_merges: |
311 |
if (rect1 in merge) and (rect2 in merge): |
312 |
inserted = True
|
313 |
break
|
314 |
elif (rect1 in merge) and (rect2 not in merge): |
315 |
merge.append(rect2) |
316 |
inserted = True
|
317 |
break
|
318 |
elif (rect2 in merge) and (rect1 not in merge): |
319 |
merge.append(rect1) |
320 |
inserted = True
|
321 |
break
|
322 |
if not inserted: |
323 |
overlap_merges.append([rect1, rect2]) |
324 |
|
325 |
for merge in overlap_merges: |
326 |
for rect in merge: |
327 |
if rect in rects: |
328 |
rects.remove(rect) |
329 |
else:
|
330 |
pass
|
331 |
#print(str(rect))
|
332 |
|
333 |
for merge in overlap_merges: |
334 |
max_x, max_y, min_x, min_y = 0, 0, sys.maxsize, sys.maxsize |
335 |
for rect in merge: |
336 |
if rect.left() < min_x:
|
337 |
min_x = rect.left() |
338 |
if rect.right() > max_x:
|
339 |
max_x = rect.right() |
340 |
if rect.top() < min_y:
|
341 |
min_y = rect.top() |
342 |
if rect.bottom() > max_y:
|
343 |
max_y = rect.bottom() |
344 |
|
345 |
rect = QRect(min_x, min_y, max_x - min_x, max_y - min_y) |
346 |
rects.append(rect) |
347 |
# up to here
|
348 |
|
349 |
# merge adjacent text box
|
350 |
configs = app_doc_data.getConfigs('Text Recognition', 'Merge Size') |
351 |
mergeSize = int(configs[0].value) if 1 == len(configs) else 10 |
352 |
#gap_size = mergeSize / 2
|
353 |
gap_size = 3
|
354 |
|
355 |
verticals = [] |
356 |
horizontals = [] |
357 |
invalid_rects = [] |
358 |
for rect in rects: |
359 |
if rect.width() < minSize and rect.height() < maxTextSize: |
360 |
rect._vertical = False
|
361 |
horizontals.append(rect) |
362 |
elif rect.height() < minSize and rect.width() < maxTextSize: |
363 |
rect._vertical = True
|
364 |
verticals.append(rect) |
365 |
elif rect.width() < minSize or rect.height() < minSize: |
366 |
invalid_rects.append(rect) |
367 |
elif rect.height() > rect.width():
|
368 |
rect._vertical = True
|
369 |
verticals.append(rect) |
370 |
else:
|
371 |
rect._vertical = False
|
372 |
horizontals.append(rect) |
373 |
|
374 |
for rect in invalid_rects: |
375 |
rects.remove(rect) |
376 |
|
377 |
v_merges = [] |
378 |
for vertical1 in verticals: |
379 |
for vertical2 in verticals: |
380 |
if vertical1 is vertical2: |
381 |
continue
|
382 |
if abs(vertical1.center().x() - vertical2.center().x()) < gap_size: |
383 |
t1, t2 = vertical1.top() - mergeSize, vertical2.top() - mergeSize |
384 |
b1, b2 = vertical1.bottom() + mergeSize, vertical2.bottom() + mergeSize |
385 |
l_x_y, s_x_y = [t1, b1], [t2, b2] |
386 |
if not (max(l_x_y) < min(s_x_y) or max(s_x_y) < min(l_x_y)): |
387 |
inserted = False
|
388 |
for merge in v_merges: |
389 |
if vertical1 in merge and vertical2 in merge: |
390 |
inserted = True
|
391 |
break
|
392 |
elif vertical1 in merge and vertical2 not in merge: |
393 |
merge.append(vertical2) |
394 |
inserted = True
|
395 |
break
|
396 |
elif vertical2 in merge and vertical1 not in merge: |
397 |
merge.append(vertical1) |
398 |
inserted = True
|
399 |
break
|
400 |
if not inserted: |
401 |
v_merges.append([vertical1, vertical2]) |
402 |
|
403 |
h_merges = [] |
404 |
for horizontal1 in horizontals: |
405 |
for horizontal2 in horizontals: |
406 |
if horizontal1 is horizontal2: |
407 |
continue
|
408 |
if abs(horizontal1.center().y() - horizontal2.center().y()) < gap_size: |
409 |
l1, l2 = horizontal1.left() - mergeSize, horizontal2.left() - mergeSize |
410 |
r1, r2 = horizontal1.right() + mergeSize, horizontal2.right() + mergeSize |
411 |
l_x_y, s_x_y = [l1, r1], [l2, r2] |
412 |
if not (max(l_x_y) < min(s_x_y) or max(s_x_y) < min(l_x_y)): |
413 |
inserted = False
|
414 |
for merge in h_merges: |
415 |
if horizontal1 in merge and horizontal2 in merge: |
416 |
inserted = True
|
417 |
break
|
418 |
elif horizontal1 in merge and horizontal2 not in merge: |
419 |
merge.append(horizontal2) |
420 |
inserted = True
|
421 |
break
|
422 |
elif horizontal2 in merge and horizontal1 not in merge: |
423 |
merge.append(horizontal1) |
424 |
inserted = True
|
425 |
break
|
426 |
if not inserted: |
427 |
h_merges.append([horizontal1, horizontal2]) |
428 |
|
429 |
for merge in v_merges + h_merges: |
430 |
for rect in merge: |
431 |
if rect in rects: |
432 |
rects.remove(rect) |
433 |
else:
|
434 |
pass
|
435 |
#print(str(rect))
|
436 |
|
437 |
for merge in v_merges + h_merges: |
438 |
max_x, max_y, min_x, min_y = 0, 0, sys.maxsize, sys.maxsize |
439 |
for rect in merge: |
440 |
if rect.left() < min_x:
|
441 |
min_x = rect.left() |
442 |
if rect.right() > max_x:
|
443 |
max_x = rect.right() |
444 |
if rect.top() < min_y:
|
445 |
min_y = rect.top() |
446 |
if rect.bottom() > max_y:
|
447 |
max_y = rect.bottom() |
448 |
|
449 |
rect = QRect(min_x, min_y, max_x - min_x, max_y - min_y) |
450 |
if merge in v_merges: |
451 |
rect._vertical = True
|
452 |
else:
|
453 |
rect._vertical = False
|
454 |
rects.append(rect) |
455 |
# up to here
|
456 |
|
457 |
res_rects = [] |
458 |
for rect in rects: |
459 |
res_rects.append(ti.TextInfo('', round(offset_x) + rect.x(), round(offset_y) + rect.y(), rect.width(), |
460 |
rect.height(), 90 if rect._vertical else 0)) |
461 |
|
462 |
return res_rects, binary_image
|
463 |
|
464 |
def getTextBox(self, ocr_image, imgGray, maxTextSize, minSize): |
465 |
""" get text box """
|
466 |
from AppDocData import AppDocData |
467 |
|
468 |
app_doc_data = AppDocData.instance() |
469 |
project = app_doc_data.getCurrentProject() |
470 |
|
471 |
cv2.rectangle(ocr_image, (0, 0), ocr_image.shape[::-1], (255, 255, 255), -1) |
472 |
|
473 |
mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)[1] |
474 |
|
475 |
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) |
476 |
for contour in contours: |
477 |
# remove too big one or horizontal/vertical line
|
478 |
[x, y, w, h] = cv2.boundingRect(contour) |
479 |
area = cv2.contourArea(contour, True)
|
480 |
|
481 |
# skip one which size is greater than max size or less then minimum size
|
482 |
if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize): |
483 |
#cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
|
484 |
continue
|
485 |
|
486 |
if area >= 0: |
487 |
cv2.drawContours(ocr_image, [contour], -1, (0, 0, 0), -1) |
488 |
#cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), 1)
|
489 |
#else:
|
490 |
# cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
|
491 |
|
492 |
path = os.path.join(project.getTempPath(), 'OCR_{}.png'.format(app_doc_data.imgName))
|
493 |
cv2.imwrite(path, ocr_image) |
494 |
|
495 |
"""
|
496 |
east = False
|
497 |
if east:
|
498 |
# define the two output layer names for the EAST detector model that
|
499 |
# we are interested -- the first is the output probabilities and the
|
500 |
# second can be used to derive the bounding box coordinates of text
|
501 |
layerNames = [
|
502 |
"feature_fusion/Conv_7/Sigmoid",
|
503 |
"feature_fusion/concat_3"]
|
504 |
|
505 |
# load the pre-trained EAST text detector
|
506 |
net = cv2.dnn.readNet("C:\\ProgramData\\Digital PID\\frozen_east_text_detection.pb")
|
507 |
|
508 |
(H, W) = ocr_image.shape[:2]
|
509 |
# construct a blob from the image and then perform a forward pass of
|
510 |
# the model to obtain the two output layer sets
|
511 |
blob = cv2.dnn.blobFromImage(ocr_image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
|
512 |
net.setInput(blob)
|
513 |
(scores, geometry) = net.forward(layerNames)
|
514 |
|
515 |
# decode the predictions, then apply non-maxima suppression to
|
516 |
# suppress weak, overlapping bounding boxes
|
517 |
(rects, confidences) = self.decode_predictions(scores, geometry)
|
518 |
boxes = non_max_suppression(np.array(rects), probs=confidences)
|
519 |
# loop over the bounding boxes
|
520 |
for (startX, startY, endX, endY) in boxes:
|
521 |
pass
|
522 |
else:
|
523 |
"""
|
524 |
configs = app_doc_data.getConfigs('Text Recognition', 'Expand Size') |
525 |
expand_size = int(configs[0].value) if 1 == len(configs) else 10 |
526 |
configs = app_doc_data.getConfigs('Text Recognition', 'Shrink Size') |
527 |
shrinkSize = int(configs[0].value) if 1 == len(configs) else 0 |
528 |
|
529 |
binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1] |
530 |
eroded = cv2.erode(binary_image, np.ones((expand_size, expand_size), np.uint8)) |
531 |
eroded = cv2.bitwise_not(eroded) |
532 |
|
533 |
bboxes = [] |
534 |
contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
535 |
for contour in contours: |
536 |
area = cv2.contourArea(contour, True)
|
537 |
if area < 0: |
538 |
[x, y, w, h] = cv2.boundingRect(contour) |
539 |
bboxes.append(QRect(x, y, w, h)) |
540 |
|
541 |
# exclude bounding boxes contains child bounding box
|
542 |
not_containing_bbox = [] |
543 |
for bbox in bboxes: |
544 |
matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)] |
545 |
if not matches: |
546 |
not_containing_bbox.append(bbox) |
547 |
# up to here
|
548 |
|
549 |
return not_containing_bbox, binary_image
|
550 |
|
551 |
|
552 |
'''
|
553 |
@brief recognize text of given text info
|
554 |
@author humkyung
|
555 |
@date 2018.07.24
|
556 |
@history change parameter updateProgressSignal to worker
|
557 |
2018.11.08 euisung add white char list check process on db
|
558 |
'''
|
559 |
@staticmethod
|
560 |
def recognizeTextFromImage(tInfos, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue): |
561 |
import re |
562 |
res = [] |
563 |
|
564 |
app_doc_data = AppDocData.instance() |
565 |
|
566 |
try:
|
567 |
for tInfo in tInfos: |
568 |
x = tInfo.getX() - round(offset[0]) |
569 |
y = tInfo.getY() - round(offset[1]) |
570 |
img = imgOCR[y:y + tInfo.getH(), x:x + tInfo.getW()] |
571 |
|
572 |
# set angle 0 if symbol contains the text area is instrumentation
|
573 |
category = None
|
574 |
if searchedSymbolList:
|
575 |
contains = [symbol for symbol in searchedSymbolList if symbol.contains(tInfo)] |
576 |
if contains:
|
577 |
_type = contains[0].getType()
|
578 |
category = app_doc_data.getSymbolCategoryByType(_type) |
579 |
if 'Instrumentation' == category: |
580 |
tInfo.setAngle(0)
|
581 |
# up to here
|
582 |
|
583 |
white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List') |
584 |
resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=app_doc_data.OCRData, |
585 |
conf=white_char_list[0].value if white_char_list else '') |
586 |
|
587 |
if resultTextInfo and len(resultTextInfo) > 0: |
588 |
for result in resultTextInfo: |
589 |
result.setX(result.getX() + round(offset[0])) |
590 |
result.setY(result.getY() + round(offset[1])) |
591 |
if 'Instrumentation' == category: |
592 |
text = re.sub('[^a-zA-Z0-9]+', '', result.getText()) |
593 |
result.setText(text) |
594 |
|
595 |
res.extend(resultTextInfo) |
596 |
|
597 |
if listWidget is not None: |
598 |
item = QListWidgetItem( |
599 |
'{},{},{} is recognized'.format(resultTextInfo[0].getX(), resultTextInfo[0].getY(), |
600 |
resultTextInfo[0].getText()))
|
601 |
listWidget.addItem(item) |
602 |
else:
|
603 |
pass
|
604 |
|
605 |
if worker is not None: |
606 |
worker.updateProgress.emit(maxProgressValue, |
607 |
resultTextInfo[0].getText() if resultTextInfo is not None and 1 == len( |
608 |
resultTextInfo) else None) |
609 |
except Exception as ex: |
610 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, |
611 |
sys.exc_info()[-1].tb_lineno)
|
612 |
if worker is not None: |
613 |
worker.displayLog.emit(MessageType.Error, message) |
614 |
|
615 |
return res
|
616 |
|
617 |
'''
|
618 |
@brief read image drawing and then remove text
|
619 |
@author jwkim
|
620 |
@date
|
621 |
@history humkyung 2018.04.06 check if file exists
|
622 |
Jeongwoo 2018.05.09 Use Tesseract OCR after Azure OCR (Azure OCR : Getting text area)
|
623 |
Jeongwoo 2018.05.25 Add condition on if-statement
|
624 |
Jeongwoo 2018.06.05 Get text area data list by config.type
|
625 |
Jeongwoo 2018.06.08 Add angle Parameter on TOCR.getTextInfo
|
626 |
humkyung 2018.06.16 update proessbar while recognizing text
|
627 |
humkyung 2018.07.03 remove white space and replace given oldStr with newStr
|
628 |
humkyung 2018.07.07 change method name to recognizeText
|
629 |
euisung 2018.11.08 add white char list check process on db
|
630 |
euisung 2018.11.12 add title block properties
|
631 |
'''
|
632 |
|
633 |
def recognizeText(self, imgSrc, offset, tInfoList, searchedSymbolList, worker, listWidget, maxProgressValue, |
634 |
onlyTextArea=False):
|
635 |
import concurrent.futures as futures |
636 |
from App import App |
637 |
from Area import Area |
638 |
|
639 |
try:
|
640 |
self.otherTextInfoList = []
|
641 |
self.titleBlockTextInfoList = []
|
642 |
self.textInfoList = []
|
643 |
|
644 |
app_doc_data = AppDocData.instance() |
645 |
project = app_doc_data.getCurrentProject() |
646 |
|
647 |
text_info_array = np.array_split(tInfoList, App.THREAD_MAX_WORKER |
648 |
if len(tInfoList) > App.THREAD_MAX_WORKER else len(tInfoList)) |
649 |
with futures.ThreadPoolExecutor(max_workers=App.THREAD_MAX_WORKER) as pool: |
650 |
future_text = {pool.submit(TextDetector.recognizeTextFromImage, tInfo, imgSrc, offset, |
651 |
searchedSymbolList, worker, listWidget, maxProgressValue): |
652 |
tInfo for tInfo in text_info_array} |
653 |
|
654 |
for future in futures.as_completed(future_text): |
655 |
try:
|
656 |
data = future.result() |
657 |
if data:
|
658 |
self.textInfoList.extend(data)
|
659 |
except Exception as ex: |
660 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, |
661 |
sys.exc_info()[-1].tb_lineno)
|
662 |
if worker:
|
663 |
worker.displayLog.emit(MessageType.Error, message) |
664 |
|
665 |
if onlyTextArea:
|
666 |
return
|
667 |
# parse texts in area except Drawing area
|
668 |
whiteCharList = app_doc_data.getConfigs('Text Recognition', 'White Character List') |
669 |
for area in app_doc_data.getAreaList(): |
670 |
if area.name == 'Drawing': continue |
671 |
|
672 |
if area.name == 'Note': |
673 |
if area is not None and hasattr(area, 'img') and area.img is not None: |
674 |
if len(whiteCharList) is 0: |
675 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), 0, language='eng') |
676 |
else:
|
677 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), 0, language='eng', |
678 |
conf=whiteCharList[0].value)
|
679 |
self.otherTextInfoList.append([area.name, texts])
|
680 |
else:
|
681 |
img = app_doc_data.imgSrc[round(area.y):round(area.y + area.height), |
682 |
round(area.x):round(area.x + area.width)] |
683 |
if len(whiteCharList) is 0: |
684 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng') |
685 |
else:
|
686 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', |
687 |
conf=whiteCharList[0].value)
|
688 |
if texts is not None and len(texts) > 0: |
689 |
if area.name == 'Unit': |
690 |
app_doc_data.activeDrawing.setAttr('Unit', texts[0].getText()) |
691 |
self.otherTextInfoList.append([area.name, texts])
|
692 |
|
693 |
titleBlockProps = app_doc_data.getTitleBlockProperties() |
694 |
if titleBlockProps:
|
695 |
for titleBlockProp in titleBlockProps: |
696 |
area = Area(titleBlockProp[0])
|
697 |
area.parse(titleBlockProp[2])
|
698 |
if not (titleBlockProp[3] and titleBlockProp[3] != ''): |
699 |
img = app_doc_data.imgSrc[round(area.y):round(area.y + area.height), |
700 |
round(area.x):round(area.x + area.width)] |
701 |
if len(whiteCharList) is 0: |
702 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=app_doc_data.OCRData)
|
703 |
else:
|
704 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', |
705 |
conf=whiteCharList[0].value)
|
706 |
texts = [ti.TextInfo('\n'.join([textInfo.getText() for textInfo in texts]), area.x, area.y, |
707 |
area.width, area.height, 0)]
|
708 |
else:
|
709 |
texts = [ti.TextInfo(titleBlockProp[3], area.x, area.y, area.width, area.height, 0)] |
710 |
self.titleBlockTextInfoList.append([area.name, texts])
|
711 |
|
712 |
if worker is not None: worker.updateProgress.emit(maxProgressValue, None) |
713 |
|
714 |
"""
|
715 |
for text_box in tInfoList:
|
716 |
x = text_box.getX()
|
717 |
y = text_box.getY()
|
718 |
cv2.rectangle(imgSrc, (x - offset[0], y - offset[1]),
|
719 |
(x - offset[0] + text_box.getW(), y - offset[1] + text_box.getH()), 1, 1)
|
720 |
cv2.imwrite('c:\\Temp\\text_box.png', imgSrc)
|
721 |
"""
|
722 |
except Exception as ex: |
723 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, |
724 |
sys.exc_info()[-1].tb_lineno)
|
725 |
if worker:
|
726 |
worker.displayLog.emit(MessageType.Error, message) |
727 |
|
728 |
'''
|
729 |
@brief remove text from image
|
730 |
@author humkyung
|
731 |
@date 2018.07.24
|
732 |
'''
|
733 |
|
734 |
def remove_text_from_image(self, imgSrc, offset): |
735 |
# remove recognized text from image
|
736 |
for text in self.textInfoList: |
737 |
x = round(text.getX() - offset[0]) |
738 |
y = round(text.getY() - offset[1]) |
739 |
width = round(text.getW())
|
740 |
height = round(text.getH())
|
741 |
cv2.rectangle(imgSrc, (x, y), (x + width, y + height), 255, -1) |
742 |
# up to here
|
743 |
|
744 |
# DEBUG
|
745 |
#cv2.imwrite("c:\\temp\\remove_texts.png", imgSrc)
|
746 |
|
747 |
|
748 |
if __name__ == "__main__": |
749 |
image = cv2.imread('d:\\Projects\\DTIPID\\Projects\\IX3\\Temp\\OCR_Document_2_Page1.png')
|
750 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
751 |
output = gray.copy() |
752 |
gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1] |
753 |
|
754 |
expand_size = 5
|
755 |
eroded = cv2.erode(gray, np.ones((expand_size, expand_size), np.uint8)) |
756 |
eroded = cv2.bitwise_not(eroded) |
757 |
cv2.imwrite('c:\\temp\\eroded.png', eroded)
|
758 |
|
759 |
bboxes = [] |
760 |
contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
761 |
for contour in contours: |
762 |
area = cv2.contourArea(contour, True)
|
763 |
if area < 0: |
764 |
[x, y, w, h] = cv2.boundingRect(contour) |
765 |
bboxes.append(QRect(x, y, w, h)) |
766 |
|
767 |
# exclude bounding boxes contains child bounding box
|
768 |
not_containing_bbox = [] |
769 |
for bbox in bboxes: |
770 |
matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)] |
771 |
if not matches: |
772 |
not_containing_bbox.append(bbox) |
773 |
# up to here
|
774 |
|
775 |
rects = [] |
776 |
for bbox in not_containing_bbox: |
777 |
x, y = bbox.left(), bbox.top() |
778 |
w, h = bbox.width(), bbox.height() |
779 |
img = gray[bbox.top():bbox.bottom(), bbox.left():bbox.right()] |
780 |
img = cv2.bitwise_not(img) |
781 |
|
782 |
horizontal, max_width = 0, 0 |
783 |
vertical, max_height = 0, 0 |
784 |
_contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
785 |
for xx in _contours: |
786 |
[_x, _y, _w, _h] = cv2.boundingRect(xx) |
787 |
|
788 |
max_width = _x if _x > max_width else max_width |
789 |
max_height = _y if _y > max_height else max_height |
790 |
|
791 |
if (_w*0.9 < _h) or (_w > 80 > _h): # width is greater than height |
792 |
horizontal += 1 + (_w * _h) / (w * h)
|
793 |
else:
|
794 |
vertical += 1 + (_w * _h) / (w * h)
|
795 |
|
796 |
if (w < 5 and h < 5) or (max_width > 80 and max_height > 80): |
797 |
continue # skip too small or big one |
798 |
|
799 |
rects.append([0 if horizontal > vertical else 90, QRect(x, y, w, h)]) |
800 |
|
801 |
merge_size = 10
|
802 |
# merge rectangles
|
803 |
interestings = [] |
804 |
while rects:
|
805 |
rect = rects.pop() |
806 |
|
807 |
if 0 == rect[0]: # x-direction text |
808 |
rect_expand = rect[1].adjusted(-merge_size, 0, merge_size, 0) |
809 |
matches = [x for x in rects if (x[0] == rect[0]) and |
810 |
abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height()) * 0.5 and |
811 |
abs(x[1].center().y() - rect[1].center().y()) < rect[1].height() * 0.25 and |
812 |
rect_expand.intersects(x[1].adjusted(-merge_size, 0, merge_size, 0))] |
813 |
else: # y -direction text |
814 |
rect_expand = rect[1].adjusted(0, -merge_size, 0, merge_size) |
815 |
matches = [x for x in rects if (x[0] == rect[0]) and |
816 |
abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width()) * 0.5 and |
817 |
abs(x[1].center().x() - rect[1].center().x()) < rect[1].width() * 0.25 and |
818 |
rect_expand.intersects(x[1].adjusted(0, -merge_size, 0, merge_size))] |
819 |
|
820 |
if matches:
|
821 |
for _rect in matches: |
822 |
rect[1] = rect[1].united(_rect[1]) |
823 |
if _rect in rects: |
824 |
rects.remove(_rect) |
825 |
rects.append(rect) |
826 |
else:
|
827 |
interestings.append(rect) |
828 |
|
829 |
for orientation, bbox in interestings: |
830 |
cv2.rectangle(output, (bbox.x(), bbox.y()), (bbox.right(), bbox.bottom()), (0, 255, 0), 1) |
831 |
|
832 |
"""
|
833 |
mser = cv2.MSER_create(_min_area=10)
|
834 |
regions, _ = mser.detectRegions(gray) # Get the text area
|
835 |
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions] # Drawing text areas
|
836 |
# Processing irregular detection boxes into rectangular boxes
|
837 |
keep = []
|
838 |
for c in hulls:
|
839 |
x, y, w, h = cv2.boundingRect(c)
|
840 |
cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 1)
|
841 |
"""
|
842 |
#cv2.polylines(output, hulls, 1, (0, 255, 0))
|
843 |
cv2.imwrite('c:\\temp\\mser.png', output)
|
844 |
|