46 |
46 |
|
47 |
47 |
return None, None
|
48 |
48 |
|
|
49 |
def decode_predictions(self, scores, geometry):
|
|
50 |
# grab the number of rows and columns from the scores volume, then
|
|
51 |
# initialize our set of bounding box rectangles and corresponding
|
|
52 |
# confidence scores
|
|
53 |
(numRows, numCols) = scores.shape[2:4]
|
|
54 |
rects = []
|
|
55 |
confidences = []
|
|
56 |
|
|
57 |
# loop over the number of rows
|
|
58 |
for y in range(0, numRows):
|
|
59 |
# extract the scores (probabilities), followed by the
|
|
60 |
# geometrical data used to derive potential bounding box
|
|
61 |
# coordinates that surround text
|
|
62 |
scoresData = scores[0, 0, y]
|
|
63 |
xData0 = geometry[0, 0, y]
|
|
64 |
xData1 = geometry[0, 1, y]
|
|
65 |
xData2 = geometry[0, 2, y]
|
|
66 |
xData3 = geometry[0, 3, y]
|
|
67 |
anglesData = geometry[0, 4, y]
|
|
68 |
|
|
69 |
# loop over the number of columns
|
|
70 |
for x in range(0, numCols):
|
|
71 |
# if our score does not have sufficient probability,
|
|
72 |
# ignore it
|
|
73 |
if scoresData[x] < 0.5: # args["min_confidence"]:
|
|
74 |
continue
|
|
75 |
|
|
76 |
# compute the offset factor as our resulting feature
|
|
77 |
# maps will be 4x smaller than the input image
|
|
78 |
(offsetX, offsetY) = (x * 4.0, y * 4.0)
|
|
79 |
|
|
80 |
# extract the rotation angle for the prediction and
|
|
81 |
# then compute the sin and cosine
|
|
82 |
angle = anglesData[x]
|
|
83 |
cos = np.cos(angle)
|
|
84 |
sin = np.sin(angle)
|
|
85 |
|
|
86 |
# use the geometry volume to derive the width and height
|
|
87 |
# of the bounding box
|
|
88 |
h = xData0[x] + xData2[x]
|
|
89 |
w = xData1[x] + xData3[x]
|
|
90 |
|
|
91 |
# compute both the starting and ending (x, y)-coordinates
|
|
92 |
# for the text prediction bounding box
|
|
93 |
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
|
|
94 |
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
|
|
95 |
startX = int(endX - w)
|
|
96 |
startY = int(endY - h)
|
|
97 |
|
|
98 |
# add the bounding box coordinates and probability score
|
|
99 |
# to our respective lists
|
|
100 |
rects.append((startX, startY, endX, endY))
|
|
101 |
confidences.append(scoresData[x])
|
|
102 |
|
|
103 |
# return a tuple of the bounding boxes and associated confidences
|
|
104 |
return (rects, confidences)
|
|
105 |
|
49 |
106 |
'''
|
50 |
107 |
@brief Get Text Area info by contour
|
51 |
108 |
@author Jeongwoo
|
... | ... | |
54 |
111 |
humkyung 2018.06.18 fixed logic to detect text area
|
55 |
112 |
'''
|
56 |
113 |
|
57 |
|
def getTextAreaInfo(self, imgGray, offsetX, offsetY):
|
|
114 |
def getTextAreaInfo(self, imgGray, offset_x, offset_y):
|
|
115 |
#from imutils.object_detection import non_max_suppression
|
58 |
116 |
from AppDocData import AppDocData
|
59 |
117 |
|
60 |
118 |
list = []
|
... | ... | |
67 |
125 |
maxTextSize = int(configs[0].value) if 1 == len(configs) else 100
|
68 |
126 |
minSize = 5
|
69 |
127 |
|
70 |
|
ocr_image = np.ones(imgGray.shape, np.uint8) * 255
|
71 |
|
# binaryImg, mask = cv2.threshold(imgGray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
72 |
|
binaryImg, mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)
|
|
128 |
ocr_image = imgGray.copy() # np.ones(imgGray.shape, np.uint8) * 255
|
|
129 |
cv2.rectangle(ocr_image, (0, 0), ocr_image.shape[::-1], (255, 255, 255), -1)
|
|
130 |
|
|
131 |
mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)[1]
|
73 |
132 |
|
74 |
133 |
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
|
75 |
134 |
for contour in contours:
|
... | ... | |
78 |
137 |
area = cv2.contourArea(contour, True)
|
79 |
138 |
|
80 |
139 |
# skip one which size is greater than max size or less then minimum size
|
81 |
|
if area >= 0:
|
82 |
|
if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize):
|
83 |
|
continue
|
|
140 |
if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize):
|
|
141 |
cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
|
|
142 |
continue
|
84 |
143 |
|
85 |
144 |
if area >= 0:
|
86 |
145 |
cv2.drawContours(ocr_image, [contour], -1, (0, 0, 0), -1)
|
... | ... | |
91 |
150 |
path = os.path.join(project.getTempPath(), 'OCR_{}.png'.format(app_doc_data.imgName))
|
92 |
151 |
cv2.imwrite(path, ocr_image)
|
93 |
152 |
|
|
153 |
"""
|
|
154 |
east = False
|
|
155 |
if east:
|
|
156 |
# define the two output layer names for the EAST detector model that
|
|
157 |
# we are interested -- the first is the output probabilities and the
|
|
158 |
# second can be used to derive the bounding box coordinates of text
|
|
159 |
layerNames = [
|
|
160 |
"feature_fusion/Conv_7/Sigmoid",
|
|
161 |
"feature_fusion/concat_3"]
|
|
162 |
|
|
163 |
# load the pre-trained EAST text detector
|
|
164 |
net = cv2.dnn.readNet("C:\\ProgramData\\Digital PID\\frozen_east_text_detection.pb")
|
|
165 |
|
|
166 |
(H, W) = ocr_image.shape[:2]
|
|
167 |
# construct a blob from the image and then perform a forward pass of
|
|
168 |
# the model to obtain the two output layer sets
|
|
169 |
blob = cv2.dnn.blobFromImage(ocr_image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
|
|
170 |
net.setInput(blob)
|
|
171 |
(scores, geometry) = net.forward(layerNames)
|
|
172 |
|
|
173 |
# decode the predictions, then apply non-maxima suppression to
|
|
174 |
# suppress weak, overlapping bounding boxes
|
|
175 |
(rects, confidences) = self.decode_predictions(scores, geometry)
|
|
176 |
boxes = non_max_suppression(np.array(rects), probs=confidences)
|
|
177 |
# loop over the bounding boxes
|
|
178 |
for (startX, startY, endX, endY) in boxes:
|
|
179 |
pass
|
|
180 |
else:
|
|
181 |
"""
|
94 |
182 |
rects = []
|
95 |
183 |
configs = app_doc_data.getConfigs('Text Recognition', 'Expand Size')
|
96 |
|
expandSize = int(configs[0].value) if 1 == len(configs) else 10
|
|
184 |
expand_size = int(configs[0].value) if 1 == len(configs) else 10
|
97 |
185 |
configs = app_doc_data.getConfigs('Text Recognition', 'Shrink Size')
|
98 |
186 |
shrinkSize = int(configs[0].value) if 1 == len(configs) else 0
|
99 |
187 |
|
100 |
|
eroded = cv2.erode(ocr_image, np.ones((expandSize, expandSize), np.uint8))
|
|
188 |
binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1]
|
|
189 |
eroded = cv2.erode(binary_image, np.ones((expand_size, expand_size), np.uint8))
|
101 |
190 |
eroded = cv2.bitwise_not(eroded)
|
102 |
191 |
|
103 |
192 |
bboxes = []
|
... | ... | |
119 |
208 |
for bbox in not_containing_bbox:
|
120 |
209 |
x, y = bbox.left(), bbox.top()
|
121 |
210 |
w, h = bbox.width(), bbox.height()
|
122 |
|
img = ocr_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
|
|
211 |
img = binary_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
|
123 |
212 |
img = cv2.bitwise_not(img)
|
124 |
213 |
|
125 |
214 |
horizontal, max_width = 0, 0
|
... | ... | |
131 |
220 |
max_width = _x if _x > max_width else max_width
|
132 |
221 |
max_height = _y if _y > max_height else max_height
|
133 |
222 |
|
134 |
|
if (_w < _h) or (_w > maxTextSize > _h): # width is greater than height
|
|
223 |
if (_w*0.9 < _h) or (_w > maxTextSize > _h): # width is greater than height
|
135 |
224 |
horizontal += 1 + (_w * _h) / (w * h)
|
136 |
225 |
else:
|
137 |
226 |
vertical += 1 + (_w * _h) / (w * h)
|
... | ... | |
152 |
241 |
rectExpand = rect[1].adjusted(-mergeSize, 0, mergeSize, 0)
|
153 |
242 |
matches = [x for x in rects if (x[0] == rect[0]) and
|
154 |
243 |
abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height())*0.5 and
|
155 |
|
abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.5 and
|
|
244 |
abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.25 and
|
156 |
245 |
rectExpand.intersects(x[1])]
|
157 |
246 |
else: # y -direction text
|
158 |
247 |
rectExpand = rect[1].adjusted(0, -mergeSize, 0, mergeSize)
|
159 |
248 |
matches = [x for x in rects if (x[0] == rect[0]) and
|
160 |
249 |
abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width())*0.5 and
|
161 |
|
abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.5 and
|
|
250 |
abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.25 and
|
162 |
251 |
rectExpand.intersects(x[1])]
|
163 |
252 |
|
164 |
253 |
if matches:
|
... | ... | |
175 |
264 |
# if there is no boxes which contains
|
176 |
265 |
if not matches:
|
177 |
266 |
angle = rect[0]
|
178 |
|
list.append(ti.TextInfo('', round(offsetX) + rect[1].x(), round(offsetY) + rect[1].y(), rect[1].width(),
|
|
267 |
list.append(ti.TextInfo('', round(offset_x) + rect[1].x(), round(offset_y) + rect[1].y(), rect[1].width(),
|
179 |
268 |
rect[1].height(), angle))
|
180 |
269 |
except Exception as ex:
|
181 |
270 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
|
... | ... | |
380 |
469 |
# DEBUG
|
381 |
470 |
#cv2.imwrite("c:\\temp\\remove_texts.png", imgSrc)
|
382 |
471 |
|
|
472 |
|
|
473 |
if __name__ == "__main__":
|
|
474 |
image = cv2.imread('d:\\Projects\\DTIPID\\Projects\\IX3\\Temp\\OCR_Document_2_Page1.png')
|
|
475 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
476 |
output = gray.copy()
|
|
477 |
gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
|
|
478 |
|
|
479 |
expand_size = 5
|
|
480 |
eroded = cv2.erode(gray, np.ones((expand_size, expand_size), np.uint8))
|
|
481 |
eroded = cv2.bitwise_not(eroded)
|
|
482 |
cv2.imwrite('c:\\temp\\eroded.png', eroded)
|
|
483 |
|
|
484 |
bboxes = []
|
|
485 |
contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
486 |
for contour in contours:
|
|
487 |
area = cv2.contourArea(contour, True)
|
|
488 |
if area < 0:
|
|
489 |
[x, y, w, h] = cv2.boundingRect(contour)
|
|
490 |
bboxes.append(QRect(x, y, w, h))
|
|
491 |
|
|
492 |
# exclude bounding boxes contains child bounding box
|
|
493 |
not_containing_bbox = []
|
|
494 |
for bbox in bboxes:
|
|
495 |
matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)]
|
|
496 |
if not matches:
|
|
497 |
not_containing_bbox.append(bbox)
|
|
498 |
# up to here
|
|
499 |
|
|
500 |
rects = []
|
|
501 |
for bbox in not_containing_bbox:
|
|
502 |
x, y = bbox.left(), bbox.top()
|
|
503 |
w, h = bbox.width(), bbox.height()
|
|
504 |
img = gray[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
|
|
505 |
img = cv2.bitwise_not(img)
|
|
506 |
|
|
507 |
horizontal, max_width = 0, 0
|
|
508 |
vertical, max_height = 0, 0
|
|
509 |
_contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
510 |
for xx in _contours:
|
|
511 |
[_x, _y, _w, _h] = cv2.boundingRect(xx)
|
|
512 |
|
|
513 |
max_width = _x if _x > max_width else max_width
|
|
514 |
max_height = _y if _y > max_height else max_height
|
|
515 |
|
|
516 |
if (_w*0.9 < _h) or (_w > 80 > _h): # width is greater than height
|
|
517 |
horizontal += 1 + (_w * _h) / (w * h)
|
|
518 |
else:
|
|
519 |
vertical += 1 + (_w * _h) / (w * h)
|
|
520 |
|
|
521 |
if (w < 5 and h < 5) or (max_width > 80 and max_height > 80):
|
|
522 |
continue # skip too small or big one
|
|
523 |
|
|
524 |
rects.append([0 if horizontal > vertical else 90, QRect(x, y, w, h)])
|
|
525 |
|
|
526 |
merge_size = 10
|
|
527 |
# merge rectangles
|
|
528 |
interestings = []
|
|
529 |
while rects:
|
|
530 |
rect = rects.pop()
|
|
531 |
|
|
532 |
if 0 == rect[0]: # x-direction text
|
|
533 |
rect_expand = rect[1].adjusted(-merge_size, 0, merge_size, 0)
|
|
534 |
matches = [x for x in rects if (x[0] == rect[0]) and
|
|
535 |
abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height()) * 0.5 and
|
|
536 |
abs(x[1].center().y() - rect[1].center().y()) < rect[1].height() * 0.25 and
|
|
537 |
rect_expand.intersects(x[1].adjusted(-merge_size, 0, merge_size, 0))]
|
|
538 |
else: # y -direction text
|
|
539 |
rect_expand = rect[1].adjusted(0, -merge_size, 0, merge_size)
|
|
540 |
matches = [x for x in rects if (x[0] == rect[0]) and
|
|
541 |
abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width()) * 0.5 and
|
|
542 |
abs(x[1].center().x() - rect[1].center().x()) < rect[1].width() * 0.25 and
|
|
543 |
rect_expand.intersects(x[1].adjusted(0, -merge_size, 0, merge_size))]
|
|
544 |
|
|
545 |
if matches:
|
|
546 |
for _rect in matches:
|
|
547 |
rect[1] = rect[1].united(_rect[1])
|
|
548 |
if _rect in rects:
|
|
549 |
rects.remove(_rect)
|
|
550 |
rects.append(rect)
|
|
551 |
else:
|
|
552 |
interestings.append(rect)
|
|
553 |
|
|
554 |
for orientation, bbox in interestings:
|
|
555 |
cv2.rectangle(output, (bbox.x(), bbox.y()), (bbox.right(), bbox.bottom()), (0, 255, 0), 1)
|
|
556 |
|
|
557 |
"""
|
|
558 |
mser = cv2.MSER_create(_min_area=10)
|
|
559 |
regions, _ = mser.detectRegions(gray) # Get the text area
|
|
560 |
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions] # Drawing text areas
|
|
561 |
# Processing irregular detection boxes into rectangular boxes
|
|
562 |
keep = []
|
|
563 |
for c in hulls:
|
|
564 |
x, y, w, h = cv2.boundingRect(c)
|
|
565 |
cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 1)
|
|
566 |
"""
|
|
567 |
#cv2.polylines(output, hulls, 1, (0, 255, 0))
|
|
568 |
cv2.imwrite('c:\\temp\\mser.png', output)
|
|
569 |
|