프로젝트

일반

사용자정보

개정판 8744138e

ID8744138eb90f9f74e8306c34f7b9264f20d61d81
상위 37c32a96
하위 71e34951

백흠경이(가) 약 5년 전에 추가함

issue #478: 텍스트 영역 인식 로직 수정

Change-Id: I9e112fa9603309ab64e593ded5c01cebdb3907f9

차이점 보기:

DTI_PID/DTI_PID/MainWindow.py
1469 1469
                with open(save_file_path, 'w', encoding='utf-8') as output_file:
1470 1470
                    output_file.write(self.prettify(svg))
1471 1471

  
1472
                """
1472
                """ create a svg file by using QSvgGenerator of pyqt5
1473 1473
                svg_gen = QSvgGenerator()
1474 1474

  
1475 1475
                svg_gen.setFileName(save_file_path)
DTI_PID/DTI_PID/Shapes/SymbolSvgItem.py
1611 1611
        try:
1612 1612
            node = Element('g')
1613 1613
            node.attrib['ID'] = str(self.uid)
1614
            node.attrib['class'] = self.name
1614 1615
            trans = self.sceneTransform()
1615 1616
            node.attrib['transform'] = f"matrix(" \
1616 1617
                                       f"{trans.m11()},{trans.m12()}," \
DTI_PID/DTI_PID/TextDetector.py
46 46

  
47 47
        return None, None
48 48

  
49
    def decode_predictions(self, scores, geometry):
50
        # grab the number of rows and columns from the scores volume, then
51
        # initialize our set of bounding box rectangles and corresponding
52
        # confidence scores
53
        (numRows, numCols) = scores.shape[2:4]
54
        rects = []
55
        confidences = []
56

  
57
        # loop over the number of rows
58
        for y in range(0, numRows):
59
            # extract the scores (probabilities), followed by the
60
            # geometrical data used to derive potential bounding box
61
            # coordinates that surround text
62
            scoresData = scores[0, 0, y]
63
            xData0 = geometry[0, 0, y]
64
            xData1 = geometry[0, 1, y]
65
            xData2 = geometry[0, 2, y]
66
            xData3 = geometry[0, 3, y]
67
            anglesData = geometry[0, 4, y]
68

  
69
            # loop over the number of columns
70
            for x in range(0, numCols):
71
                # if our score does not have sufficient probability,
72
                # ignore it
73
                if scoresData[x] < 0.5:  # args["min_confidence"]:
74
                    continue
75

  
76
                # compute the offset factor as our resulting feature
77
                # maps will be 4x smaller than the input image
78
                (offsetX, offsetY) = (x * 4.0, y * 4.0)
79

  
80
                # extract the rotation angle for the prediction and
81
                # then compute the sin and cosine
82
                angle = anglesData[x]
83
                cos = np.cos(angle)
84
                sin = np.sin(angle)
85

  
86
                # use the geometry volume to derive the width and height
87
                # of the bounding box
88
                h = xData0[x] + xData2[x]
89
                w = xData1[x] + xData3[x]
90

  
91
                # compute both the starting and ending (x, y)-coordinates
92
                # for the text prediction bounding box
93
                endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
94
                endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
95
                startX = int(endX - w)
96
                startY = int(endY - h)
97

  
98
                # add the bounding box coordinates and probability score
99
                # to our respective lists
100
                rects.append((startX, startY, endX, endY))
101
                confidences.append(scoresData[x])
102

  
103
        # return a tuple of the bounding boxes and associated confidences
104
        return (rects, confidences)
105

  
49 106
    '''
50 107
        @brief      Get Text Area info by contour
51 108
        @author     Jeongwoo
......
54 111
                    humkyung 2018.06.18 fixed logic to detect text area
55 112
    '''
56 113

  
57
    def getTextAreaInfo(self, imgGray, offsetX, offsetY):
114
    def getTextAreaInfo(self, imgGray, offset_x, offset_y):
115
        #from imutils.object_detection import non_max_suppression
58 116
        from AppDocData import AppDocData
59 117

  
60 118
        list = []
......
67 125
            maxTextSize = int(configs[0].value) if 1 == len(configs) else 100
68 126
            minSize = 5
69 127

  
70
            ocr_image = np.ones(imgGray.shape, np.uint8) * 255
71
            # binaryImg, mask = cv2.threshold(imgGray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
72
            binaryImg, mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)
128
            ocr_image = imgGray.copy()  # np.ones(imgGray.shape, np.uint8) * 255
129
            cv2.rectangle(ocr_image, (0, 0), ocr_image.shape[::-1], (255, 255, 255), -1)
130

  
131
            mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)[1]
73 132

  
74 133
            contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
75 134
            for contour in contours:
......
78 137
                area = cv2.contourArea(contour, True)
79 138

  
80 139
                # skip one which size is greater than max size or less then minimum size
81
                if area >= 0:
82
                    if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize):
83
                        continue
140
                if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize):
141
                    cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
142
                    continue
84 143

  
85 144
                if area >= 0:
86 145
                    cv2.drawContours(ocr_image, [contour], -1, (0, 0, 0), -1)
......
91 150
            path = os.path.join(project.getTempPath(), 'OCR_{}.png'.format(app_doc_data.imgName))
92 151
            cv2.imwrite(path, ocr_image)
93 152

  
153
            """
154
            east = False
155
            if east:
156
                # define the two output layer names for the EAST detector model that
157
                # we are interested -- the first is the output probabilities and the
158
                # second can be used to derive the bounding box coordinates of text
159
                layerNames = [
160
                    "feature_fusion/Conv_7/Sigmoid",
161
                    "feature_fusion/concat_3"]
162

  
163
                # load the pre-trained EAST text detector
164
                net = cv2.dnn.readNet("C:\\ProgramData\\Digital PID\\frozen_east_text_detection.pb")
165

  
166
                (H, W) = ocr_image.shape[:2]
167
                # construct a blob from the image and then perform a forward pass of
168
                # the model to obtain the two output layer sets
169
                blob = cv2.dnn.blobFromImage(ocr_image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
170
                net.setInput(blob)
171
                (scores, geometry) = net.forward(layerNames)
172

  
173
                # decode the predictions, then  apply non-maxima suppression to
174
                # suppress weak, overlapping bounding boxes
175
                (rects, confidences) = self.decode_predictions(scores, geometry)
176
                boxes = non_max_suppression(np.array(rects), probs=confidences)
177
                # loop over the bounding boxes
178
                for (startX, startY, endX, endY) in boxes:
179
                    pass
180
            else:
181
            """
94 182
            rects = []
95 183
            configs = app_doc_data.getConfigs('Text Recognition', 'Expand Size')
96
            expandSize = int(configs[0].value) if 1 == len(configs) else 10
184
            expand_size = int(configs[0].value) if 1 == len(configs) else 10
97 185
            configs = app_doc_data.getConfigs('Text Recognition', 'Shrink Size')
98 186
            shrinkSize = int(configs[0].value) if 1 == len(configs) else 0
99 187

  
100
            eroded = cv2.erode(ocr_image, np.ones((expandSize, expandSize), np.uint8))
188
            binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1]
189
            eroded = cv2.erode(binary_image, np.ones((expand_size, expand_size), np.uint8))
101 190
            eroded = cv2.bitwise_not(eroded)
102 191

  
103 192
            bboxes = []
......
119 208
            for bbox in not_containing_bbox:
120 209
                x, y = bbox.left(), bbox.top()
121 210
                w, h = bbox.width(), bbox.height()
122
                img = ocr_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
211
                img = binary_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
123 212
                img = cv2.bitwise_not(img)
124 213

  
125 214
                horizontal, max_width = 0, 0
......
131 220
                    max_width = _x if _x > max_width else max_width
132 221
                    max_height = _y if _y > max_height else max_height
133 222

  
134
                    if (_w < _h) or (_w > maxTextSize > _h):  # width is greater than height
223
                    if (_w*0.9 < _h) or (_w > maxTextSize > _h):  # width is greater than height
135 224
                        horizontal += 1 + (_w * _h) / (w * h)
136 225
                    else:
137 226
                        vertical += 1 + (_w * _h) / (w * h)
......
152 241
                    rectExpand = rect[1].adjusted(-mergeSize, 0, mergeSize, 0)
153 242
                    matches = [x for x in rects if (x[0] == rect[0]) and
154 243
                               abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height())*0.5 and
155
                               abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.5 and
244
                               abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.25 and
156 245
                               rectExpand.intersects(x[1])]
157 246
                else:               # y -direction text
158 247
                    rectExpand = rect[1].adjusted(0, -mergeSize, 0, mergeSize)
159 248
                    matches = [x for x in rects if (x[0] == rect[0]) and
160 249
                               abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width())*0.5 and
161
                               abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.5 and
250
                               abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.25 and
162 251
                               rectExpand.intersects(x[1])]
163 252

  
164 253
                if matches:
......
175 264
                # if there is no boxes which contains
176 265
                if not matches:
177 266
                    angle = rect[0]
178
                    list.append(ti.TextInfo('', round(offsetX) + rect[1].x(), round(offsetY) + rect[1].y(), rect[1].width(),
267
                    list.append(ti.TextInfo('', round(offset_x) + rect[1].x(), round(offset_y) + rect[1].y(), rect[1].width(),
179 268
                                            rect[1].height(), angle))
180 269
        except Exception as ex:
181 270
            message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
......
380 469
        # DEBUG
381 470
        #cv2.imwrite("c:\\temp\\remove_texts.png", imgSrc)
382 471

  
472

  
473
if __name__ == "__main__":
474
    image = cv2.imread('d:\\Projects\\DTIPID\\Projects\\IX3\\Temp\\OCR_Document_2_Page1.png')
475
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
476
    output = gray.copy()
477
    gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
478

  
479
    expand_size = 5
480
    eroded = cv2.erode(gray, np.ones((expand_size, expand_size), np.uint8))
481
    eroded = cv2.bitwise_not(eroded)
482
    cv2.imwrite('c:\\temp\\eroded.png', eroded)
483

  
484
    bboxes = []
485
    contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
486
    for contour in contours:
487
        area = cv2.contourArea(contour, True)
488
        if area < 0:
489
            [x, y, w, h] = cv2.boundingRect(contour)
490
            bboxes.append(QRect(x, y, w, h))
491

  
492
    # exclude bounding boxes contains child bounding box
493
    not_containing_bbox = []
494
    for bbox in bboxes:
495
        matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)]
496
        if not matches:
497
            not_containing_bbox.append(bbox)
498
    # up to here
499

  
500
    rects = []
501
    for bbox in not_containing_bbox:
502
        x, y = bbox.left(), bbox.top()
503
        w, h = bbox.width(), bbox.height()
504
        img = gray[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
505
        img = cv2.bitwise_not(img)
506

  
507
        horizontal, max_width = 0, 0
508
        vertical, max_height = 0, 0
509
        _contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
510
        for xx in _contours:
511
            [_x, _y, _w, _h] = cv2.boundingRect(xx)
512

  
513
            max_width = _x if _x > max_width else max_width
514
            max_height = _y if _y > max_height else max_height
515

  
516
            if (_w*0.9 < _h) or (_w > 80 > _h):  # width is greater than height
517
                horizontal += 1 + (_w * _h) / (w * h)
518
            else:
519
                vertical += 1 + (_w * _h) / (w * h)
520

  
521
        if (w < 5 and h < 5) or (max_width > 80 and max_height > 80):
522
            continue  # skip too small or big one
523

  
524
        rects.append([0 if horizontal > vertical else 90, QRect(x, y, w, h)])
525

  
526
    merge_size = 10
527
    # merge rectangles
528
    interestings = []
529
    while rects:
530
        rect = rects.pop()
531

  
532
        if 0 == rect[0]:  # x-direction text
533
            rect_expand = rect[1].adjusted(-merge_size, 0, merge_size, 0)
534
            matches = [x for x in rects if (x[0] == rect[0]) and
535
                       abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height()) * 0.5 and
536
                       abs(x[1].center().y() - rect[1].center().y()) < rect[1].height() * 0.25 and
537
                       rect_expand.intersects(x[1].adjusted(-merge_size, 0, merge_size, 0))]
538
        else:  # y -direction text
539
            rect_expand = rect[1].adjusted(0, -merge_size, 0, merge_size)
540
            matches = [x for x in rects if (x[0] == rect[0]) and
541
                       abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width()) * 0.5 and
542
                       abs(x[1].center().x() - rect[1].center().x()) < rect[1].width() * 0.25 and
543
                       rect_expand.intersects(x[1].adjusted(0, -merge_size, 0, merge_size))]
544

  
545
        if matches:
546
            for _rect in matches:
547
                rect[1] = rect[1].united(_rect[1])
548
                if _rect in rects:
549
                    rects.remove(_rect)
550
            rects.append(rect)
551
        else:
552
            interestings.append(rect)
553

  
554
    for orientation, bbox in interestings:
555
        cv2.rectangle(output, (bbox.x(), bbox.y()), (bbox.right(), bbox.bottom()), (0, 255, 0), 1)
556

  
557
    """
558
    mser = cv2.MSER_create(_min_area=10)
559
    regions, _ = mser.detectRegions(gray)  # Get the text area
560
    hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]  # Drawing text areas
561
    # Processing irregular detection boxes into rectangular boxes
562
    keep = []
563
    for c in hulls:
564
        x, y, w, h = cv2.boundingRect(c)
565
        cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 1)
566
    """
567
    #cv2.polylines(output, hulls, 1, (0, 255, 0))
568
    cv2.imwrite('c:\\temp\\mser.png', output)
569

  
DTI_PID/DTI_PID/TextInfo.py
1
class TextInfo():
1
# coding: utf-8
2

  
3
class TextInfo:
2 4
    def __init__(self, text, x, y, w, h, angle):
3
        self._text = text.replace('—', '-')
5
        self._text = text.replace('—', '-')  # TODO: check this code!!!
4 6
        self.x = x
5 7
        self.y = y
6 8
        self.w = w
......
9 11

  
10 12
    @property
11 13
    def area(self):
12
        """
13
        return area of area
14
        """
15
        return self.w*self.h
14
        """ return area of area """
15
        return self.w * self.h
16 16

  
17 17
    def contains(self, pt):
18 18
        if len(pt) == 2:
......
35 35

  
36 36
    @property
37 37
    def center(self):
38
        """
39
        return center of text
40
        """
41
        return (self.getX() + self.getW()*0.5, self.getY() + self.getH()*0.5)
38
        """ return center of text """
39
        return (self.getX() + self.getW() * 0.5, self.getY() + self.getH() * 0.5)
42 40

  
43 41
    def setText(self, text):
44 42
        self._text = text
......
74 72
        self.angle = angle
75 73

  
76 74
    def getAngle(self):
77
        return self.angle
75
        return self.angle
DTI_PID/DTI_PID/pagefold.qss
78 78
    height: 13px;
79 79
}
80 80

  
81
/* Customize arrows. */
82

  
83
*::down-arrow, *::menu-indicator {
84
    width: 7px;
85
    height: 7px;
86
}
87

  
88
*::up-arrow {
89
    width: 7px;
90
    height: 7px;
91
}
92

  
93 81
/* Customize push buttons and comboboxes. Our read-only combobox
94 82
   is very similar to a push button, so they share the same border image. */
95 83

  

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)