/DTI_PID/DTI_PID/TextDetector.py - HYTOS - 일정관리

hytos / DTI_PID / DTI_PID / TextDetector.py @ d559e1d7

       # coding: utf-8
       """
           This is text detector module
       """
       import sys
       import os
       import cv2
       import numpy as np
       from PyQt5.QtCore import *
       from PyQt5.QtGui import *
       from PyQt5.QtWidgets import *
       from PyQt5.QtSvg import *
       from AppDocData import *
       import TextInfo as ti
       import tesseract_ocr_module as TOCR
       MIN_TEXT_SIZE = 10
       THREAD_MAX_WORKER = os.cpu_count()
       class TextDetector:
           '''
               @brief  constructor
               @author humkyung
               @date   2018.07.11
           '''
           def __init__(self):
               self.textInfoList = []
               self.otherTextInfoList = []
               self.titleBlockTextInfoList = []
           '''
               @brief  detect text areas
               @author humkyung
               @date   2018.06.16
           '''
           def detectTextAreas(self, img, offset):
               try:
                   return self.getTextAreaInfo(img, offset[0], offset[1])
               except Exception as ex:
                   print('error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename,
                                                              sys.exc_info()[-1].tb_lineno))
               return None, None
           def decode_predictions(self, scores, geometry):
               # grab the number of rows and columns from the scores volume, then
               # initialize our set of bounding box rectangles and corresponding
               # confidence scores
               (numRows, numCols) = scores.shape[2:4]
               rects = []
               confidences = []
               # loop over the number of rows
               for y in range(0, numRows):
                   # extract the scores (probabilities), followed by the
                   # geometrical data used to derive potential bounding box
                   # coordinates that surround text
                   scoresData = scores[0, 0, y]
                   xData0 = geometry[0, 0, y]
                   xData1 = geometry[0, 1, y]
                   xData2 = geometry[0, 2, y]
                   xData3 = geometry[0, 3, y]
                   anglesData = geometry[0, 4, y]
                   # loop over the number of columns
                   for x in range(0, numCols):
                       # if our score does not have sufficient probability,
                       # ignore it
                       if scoresData[x] < 0.5:  # args["min_confidence"]:
                           continue
                       # compute the offset factor as our resulting feature
                       # maps will be 4x smaller than the input image
                       (offsetX, offsetY) = (x * 4.0, y * 4.0)
                       # extract the rotation angle for the prediction and
                       # then compute the sin and cosine
                       angle = anglesData[x]
                       cos = np.cos(angle)
                       sin = np.sin(angle)
                       # use the geometry volume to derive the width and height
                       # of the bounding box
                       h = xData0[x] + xData2[x]
                       w = xData1[x] + xData3[x]
                       # compute both the starting and ending (x, y)-coordinates
                       # for the text prediction bounding box
                       endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
                       endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
                       startX = int(endX - w)
                       startY = int(endY - h)
                       # add the bounding box coordinates and probability score
                       # to our respective lists
                       rects.append((startX, startY, endX, endY))
                       confidences.append(scoresData[x])
               # return a tuple of the bounding boxes and associated confidences
               return (rects, confidences)
           '''
               @brief      Get Text Area info by contour
               @author     Jeongwoo
               @date       2018.06.05
               @history    2018.06.08  Jeongwoo    Add angle
                           humkyung 2018.06.18 fixed logic to detect text area
           '''
           def getTextAreaInfo(self, imgGray, offset_x, offset_y):
               #from imutils.object_detection import non_max_suppression
               from AppDocData import AppDocData
               res_list = []
               ocr_image = None
               try:
                   app_doc_data = AppDocData.instance()
                   project = app_doc_data.getCurrentProject()
                   configs = app_doc_data.getConfigs('Text Size', 'Max Text Size')
                   maxTextSize = int(configs[0].value) if 1 == len(configs) else 100
                   configs = app_doc_data.getConfigs('Text Size', 'Min Text Size')
                   minSize = int(configs[0].value) if 1 == len(configs) else 15
                   ocr_image = imgGray.copy()  # np.ones(imgGray.shape, np.uint8) * 255
                   configs = app_doc_data.getConfigs('Engine', 'Text Area')
                   if configs and int(configs[0].value) is 1:
                       # get text box original way
                       not_containing_bbox, binary_image = self.getTextBox(ocr_image, imgGray, maxTextSize, minSize)
                   else:
                       # using craft
                       return self.getTextBox_craft(ocr_image, maxTextSize, minSize, offset_x, offset_y, web=True)
                   rects = []
                   for bbox in not_containing_bbox:
                       x, y = bbox.left(), bbox.top()
                       w, h = bbox.width(), bbox.height()
                       img = binary_image[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
                       img = cv2.dilate(img, np.ones((2, 2), np.uint8))
                       img = cv2.bitwise_not(img)
                       horizontal, max_width = 0, 0
                       vertical, max_height = 0, 0
                       _contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                       for xx in _contours:
                           [_x, _y, _w, _h] = cv2.boundingRect(xx)
                           if min(_w, _h) / max(_w, _h) < 0.3:
                               continue
                           max_width = _x if _x > max_width else max_width
                           max_height = _y if _y > max_height else max_height
                           if (_w < _h) or (_w > maxTextSize > _h):  # count character that looks like horizontal
                               horizontal += 1# + (_w * _h) / (w * h)
                           else:
                               vertical += 1# + (_w * _h) / (w * h)
                       if (w < minSize and h < minSize) or (max_width > maxTextSize and max_height > maxTextSize):
                           continue  # skip too small or big one
                       rects.append([0 if horizontal >= vertical else 90, QRect(x, y, w, h)])
                   configs = app_doc_data.getConfigs('Text Recognition', 'Merge Size')
                   mergeSize = int(configs[0].value) if 1 == len(configs) else 10
                   # merge rectangles
                   interestings = []
                   while rects:
                       rect = rects.pop()
                       if 0 == rect[0]:    # x-direction text
                           rectExpand = rect[1].adjusted(-mergeSize, 0, mergeSize, 0)
                           matches = [x for x in rects if (x[0] == rect[0]) and
                                      abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height())*0.5 and
                                      abs(x[1].center().y() - rect[1].center().y()) < rect[1].height()*0.25 and
                                      rectExpand.intersects(x[1])]
                       else:               # y -direction text
                           rectExpand = rect[1].adjusted(0, -mergeSize, 0, mergeSize)
                           matches = [x for x in rects if (x[0] == rect[0]) and
                                      abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width())*0.5 and
                                      abs(x[1].center().x() - rect[1].center().x()) < rect[1].width()*0.25 and
                                      rectExpand.intersects(x[1])]
                       if matches:
                           for _rect in matches:
                               rect[1] = rect[1].united(_rect[1])
                               if _rect in rects:
                                   rects.remove(_rect)
                           rects.append(rect)
                       else:
                           interestings.append(rect)
                   for rect in interestings:
                       matches = [_rect for _rect in interestings if rect != _rect and _rect[1].contains(rect[1])]
                       # if there is no boxes which contains
                       if not matches:
                           angle = rect[0]
                           res_list.append(ti.TextInfo('', round(offset_x) + rect[1].x(), round(offset_y) + rect[1].y(), rect[1].width(),
                                                   rect[1].height(), angle))
               except Exception as ex:
                   message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
                                                                  sys.exc_info()[-1].tb_lineno)
                   print(message)
               return res_list, ocr_image
           def getTextBox_craft(self, ocr_image, maxTextSize, minSize, offset_x, offset_y, web=False):
               """ get text box by using craft """
               from AppWebService import AppWebService
               from AppDocData import AppDocData
               app_doc_data = AppDocData.instance()
               project = app_doc_data.getCurrentProject()
               binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1]
               score_path = os.path.join(project.getTempPath(), 'OCR_CRAFT_SCORE_{}.png'.format(app_doc_data.imgName))
               img_path = os.path.join(project.getTempPath(), 'OCR_CRAFT_{}.png'.format(app_doc_data.imgName))
               if not web:
                   sys.path.insert(0, os.path.dirname(os.path.realpath('./'))+ '\\WebServer\\CRAFT_pytorch_master')
                   import text_craft
                   boxes = text_craft.get_text_box(ocr_image, img_path, score_path, os.path.dirname(os.path.realpath('./')) + '\\WebServer\\CRAFT_pytorch_master\\weights\\craft_mlt_25k.pth')
               else:
                   app_web_service = AppWebService()
                   boxes = app_web_service.request_text_box(ocr_image, img_path, score_path)
               rects = []
               for box in boxes:
                   rects.append(QRect(box[0], box[1], box[4] - box[0], box[5] - box[1]))
               configs = app_doc_data.getConfigs('Text Recognition', 'Merge Size')
               mergeSize = int(configs[0].value) if 1 == len(configs) else 10
               #gap_size = mergeSize / 2
               gap_size = 3
               verticals = []
               horizontals = []
               for rect in rects:
                   if rect.width() < minSize and rect.height() < maxTextSize:
                       rect._vertical = False
                       horizontals.append(rect)
                   elif rect.height() < minSize and rect.width() < maxTextSize:
                       rect._vertical = True
                       verticals.append(rect)
                   elif rect.width() < minSize or rect.height() < minSize:
                       continue
                   elif rect.height() > rect.width():
                       rect._vertical = True
                       verticals.append(rect)
                   else:
                       rect._vertical = False
                       horizontals.append(rect)
               v_merges = []
               for vertical1 in verticals:
                   for vertical2 in verticals:
                       if vertical1 is vertical2:
                           continue
                       if abs(vertical1.center().x() - vertical2.center().x()) < gap_size:
                           t1, t2 = vertical1.top() - mergeSize, vertical2.top() - mergeSize
                           b1, b2 = vertical1.bottom() + mergeSize, vertical2.bottom() + mergeSize
                           l_x_y, s_x_y = [t1, b1], [t2, b2]
                           if not (max(l_x_y) < min(s_x_y) or max(s_x_y) < min(l_x_y)):
                               inserted = False
                               for merge in v_merges:
                                   if vertical1 in merge and vertical2 in merge:
                                       inserted = True
                                       break
                                   elif vertical1 in merge and vertical2 not in merge:
                                       merge.append(vertical2)
                                       inserted = True
                                       break
                                   elif vertical2 in merge and vertical1 not in merge:
                                       merge.append(vertical1)
                                       inserted = True
                                       break
                               if not inserted:
                                   v_merges.append([vertical1, vertical2])
               h_merges = []
               for horizontal1 in horizontals:
                   for horizontal2 in horizontals:
                       if horizontal1 is horizontal2:
                           continue
                       if abs(horizontal1.center().y() - horizontal2.center().y()) < gap_size:
                           l1, l2 = horizontal1.left() - mergeSize, horizontal2.left() - mergeSize
                           r1, r2 = horizontal1.right() + mergeSize, horizontal2.right() + mergeSize
                           l_x_y, s_x_y = [l1, r1], [l2, r2]
                           if not (max(l_x_y) < min(s_x_y) or max(s_x_y) < min(l_x_y)):
                               inserted = False
                               for merge in h_merges:
                                   if horizontal1 in merge and horizontal2 in merge:
                                       inserted = True
                                       break
                                   elif horizontal1 in merge and horizontal2 not in merge:
                                       merge.append(horizontal2)
                                       inserted = True
                                       break
                                   elif horizontal2 in merge and horizontal1 not in merge:
                                       merge.append(horizontal1)
                                       inserted = True
                                       break
                               if not inserted:
                                   h_merges.append([horizontal1, horizontal2])
               for merge in v_merges + h_merges:
                   for rect in merge:
                       if rect in rects:
                           rects.remove(rect)
                       else:
                           print(str(rect))
               for merge in v_merges:
                   max_x, max_y, min_x, min_y = 0, 0, sys.maxsize, sys.maxsize
                   for rect in merge:
                       if rect.left() < min_x:
                           min_x = rect.left()
                       if rect.right() > max_x:
                           max_x = rect.right()
                       if rect.top() < min_y:
                           min_y = rect.top()
                       if rect.bottom() > max_y:
                           max_y = rect.bottom()
                   rect = QRect(min_x, min_y, max_x - min_x, max_y - min_y)
                   rect._vertical = True
                   rects.append(rect)
               for merge in h_merges:
                   max_x, max_y, min_x, min_y = 0, 0, sys.maxsize, sys.maxsize
                   for rect in merge:
                       if rect.left() < min_x:
                           min_x = rect.left()
                       if rect.right() > max_x:
                           max_x = rect.right()
                       if rect.top() < min_y:
                           min_y = rect.top()
                       if rect.bottom() > max_y:
                           max_y = rect.bottom()
                   rect = QRect(min_x, min_y, max_x - min_x, max_y - min_y)
                   rect._vertical = False
                   rects.append(rect)
               res_rects = []
               for rect in rects:
                   res_rects.append(ti.TextInfo('', round(offset_x) + rect.x(), round(offset_y) + rect.y(), rect.width(),
                                                   rect.height(), 90 if rect._vertical else 0))
               return res_rects, binary_image
           def getTextBox(self, ocr_image, imgGray, maxTextSize, minSize):
               """ get text box """
               from AppDocData import AppDocData
               app_doc_data = AppDocData.instance()
               project = app_doc_data.getCurrentProject()
               cv2.rectangle(ocr_image, (0, 0), ocr_image.shape[::-1], (255, 255, 255), -1)
               mask = cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY)[1]
               contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
               for contour in contours:
                   # remove too big one or horizontal/vertical line
                   [x, y, w, h] = cv2.boundingRect(contour)
                   area = cv2.contourArea(contour, True)
                   # skip one which size is greater than max size or less then minimum size
                   if (w > maxTextSize or h > maxTextSize) or (w <= minSize and h <= minSize):
                       #cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
                       continue
                   if area >= 0:
                       cv2.drawContours(ocr_image, [contour], -1, (0, 0, 0), -1)
                       #cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), 1)
                   #else:
                   #    cv2.drawContours(ocr_image, [contour], -1, (255, 255, 255), -1)
               path = os.path.join(project.getTempPath(), 'OCR_{}.png'.format(app_doc_data.imgName))
               cv2.imwrite(path, ocr_image)
               """
               east = False
               if east:
                   # define the two output layer names for the EAST detector model that
                   # we are interested -- the first is the output probabilities and the
                   # second can be used to derive the bounding box coordinates of text
                   layerNames = [
                       "feature_fusion/Conv_7/Sigmoid",
                       "feature_fusion/concat_3"]
                   # load the pre-trained EAST text detector
                   net = cv2.dnn.readNet("C:\\ProgramData\\Digital PID\\frozen_east_text_detection.pb")
                   (H, W) = ocr_image.shape[:2]
                   # construct a blob from the image and then perform a forward pass of
                   # the model to obtain the two output layer sets
                   blob = cv2.dnn.blobFromImage(ocr_image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
                   net.setInput(blob)
                   (scores, geometry) = net.forward(layerNames)
                   # decode the predictions, then  apply non-maxima suppression to
                   # suppress weak, overlapping bounding boxes
                   (rects, confidences) = self.decode_predictions(scores, geometry)
                   boxes = non_max_suppression(np.array(rects), probs=confidences)
                   # loop over the bounding boxes
                   for (startX, startY, endX, endY) in boxes:
                       pass
               else:
               """
               configs = app_doc_data.getConfigs('Text Recognition', 'Expand Size')
               expand_size = int(configs[0].value) if 1 == len(configs) else 10
               configs = app_doc_data.getConfigs('Text Recognition', 'Shrink Size')
               shrinkSize = int(configs[0].value) if 1 == len(configs) else 0
               binary_image = cv2.threshold(ocr_image, 200, 255, cv2.THRESH_BINARY)[1]
               eroded = cv2.erode(binary_image, np.ones((expand_size, expand_size), np.uint8))
               eroded = cv2.bitwise_not(eroded)
               bboxes = []
               contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
               for contour in contours:
                   area = cv2.contourArea(contour, True)
                   if area < 0:
                       [x, y, w, h] = cv2.boundingRect(contour)
                       bboxes.append(QRect(x, y, w, h))
               # exclude bounding boxes contains child bounding box
               not_containing_bbox = []
               for bbox in bboxes:
                   matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)]
                   if not matches:
                       not_containing_bbox.append(bbox)
               # up to here
               return not_containing_bbox, binary_image
           '''
               @brief      recognize text of given text info
               @author     humkyung
               @date       2018.07.24
               @history    change parameter updateProgressSignal to worker
 .11.08 euisung     add white char list check process on db
           '''
           @staticmethod
           def recognizeTextFromImage(tInfos, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue):
               import re
               res = []
               app_doc_data = AppDocData.instance()
               try:
                   for tInfo in tInfos:
                       x = tInfo.getX() - round(offset[0])
                       y = tInfo.getY() - round(offset[1])
                       img = imgOCR[y:y + tInfo.getH(), x:x + tInfo.getW()]
                       # set angle 0 if symbol contains the text area is instrumentation
                       category = None
                       if searchedSymbolList:
                           contains = [symbol for symbol in searchedSymbolList if symbol.contains(tInfo)]
                           if contains:
                               _type = contains[0].getType()
                               category = app_doc_data.getSymbolCategoryByType(_type)
                               if 'Instrumentation' == category:
                                   tInfo.setAngle(0)
                       # up to here
                       white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List')
                       resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=app_doc_data.OCRData,
                                                         conf=white_char_list[0].value if white_char_list else '')
                       if resultTextInfo and len(resultTextInfo) > 0:
                           for result in resultTextInfo:
                               result.setX(result.getX() + round(offset[0]))
                               result.setY(result.getY() + round(offset[1]))
                               if 'Instrumentation' == category:
                                   text = re.sub('[^a-zA-Z0-9]+', '', result.getText())
                                   result.setText(text)
                           res.extend(resultTextInfo)
                           if listWidget is not None:
                               item = QListWidgetItem(
                                   '{},{},{} is recognized'.format(resultTextInfo[0].getX(), resultTextInfo[0].getY(),
                                                                   resultTextInfo[0].getText()))
                               listWidget.addItem(item)
                       else:
                           pass
                       if worker is not None:
                           worker.updateProgress.emit(maxProgressValue,
                                                      resultTextInfo[0].getText() if resultTextInfo is not None and 1 == len(
                                                          resultTextInfo) else None)
               except Exception as ex:
                   message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
                                                                  sys.exc_info()[-1].tb_lineno)
                   if worker is not None:
                       worker.displayLog.emit(MessageType.Error, message)
               return res
           '''
               @brief      read image drawing and then remove text
               @author     jwkim
               @date
               @history    humkyung 2018.04.06 check if file exists
                           Jeongwoo 2018.05.09 Use Tesseract OCR after Azure OCR (Azure OCR : Getting text area)
                           Jeongwoo 2018.05.25 Add condition on if-statement
                           Jeongwoo 2018.06.05 Get text area data list by config.type
                           Jeongwoo 2018.06.08 Add angle Parameter on TOCR.getTextInfo
                           humkyung 2018.06.16 update proessbar while recognizing text
                           humkyung 2018.07.03 remove white space and replace given oldStr with newStr
                           humkyung 2018.07.07 change method name to recognizeText
                           euisung  2018.11.08 add white char list check process on db
                           euisung  2018.11.12 add title block properties
           '''
           def recognizeText(self, imgSrc, offset, tInfoList, searchedSymbolList, worker, listWidget, maxProgressValue,
                             onlyTextArea=False):
               import concurrent.futures as futures
               from App import App
               from Area import Area
               try:
                   self.otherTextInfoList = []
                   self.titleBlockTextInfoList = []
                   self.textInfoList = []
                   app_doc_data = AppDocData.instance()
                   project = app_doc_data.getCurrentProject()
                   text_info_array = np.array_split(tInfoList, App.THREAD_MAX_WORKER
                   if len(tInfoList) > App.THREAD_MAX_WORKER else len(tInfoList))
                   with futures.ThreadPoolExecutor(max_workers=App.THREAD_MAX_WORKER) as pool:
                       future_text = {pool.submit(TextDetector.recognizeTextFromImage, tInfo, imgSrc, offset,
                                              searchedSymbolList, worker, listWidget, maxProgressValue):
                                      tInfo for tInfo in text_info_array}
                       for future in futures.as_completed(future_text):
                           try:
                               data = future.result()
                               if data:
                                   self.textInfoList.extend(data)
                           except Exception as ex:
                               message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
                                                                              sys.exc_info()[-1].tb_lineno)
                               if worker:
                                   worker.displayLog.emit(MessageType.Error, message)
                   if onlyTextArea:
                       return
                   # parse texts in area except Drawing area
                   whiteCharList = app_doc_data.getConfigs('Text Recognition', 'White Character List')
                   for area in app_doc_data.getAreaList():
                       if area.name == 'Drawing': continue
                       if area.name == 'Note':
                           if area is not None and hasattr(area, 'img') and area.img is not None:
                               if len(whiteCharList) is 0:
                                   texts = TOCR.getTextInfo(area.img, (area.x, area.y), 0, language='eng')
                               else:
                                   texts = TOCR.getTextInfo(area.img, (area.x, area.y), 0, language='eng',
                                                            conf=whiteCharList[0].value)
                               self.otherTextInfoList.append([area.name, texts])
                       else:
                           img = app_doc_data.imgSrc[round(area.y):round(area.y + area.height),
                                 round(area.x):round(area.x + area.width)]
                           if len(whiteCharList) is 0:
                               texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng')
                           else:
                               texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng',
                                                        conf=whiteCharList[0].value)
                           if texts is not None and len(texts) > 0:
                               if area.name == 'Unit':
                                   app_doc_data.activeDrawing.setAttr('Unit', texts[0].getText())
                               self.otherTextInfoList.append([area.name, texts])
                   titleBlockProps = app_doc_data.getTitleBlockProperties()
                   if titleBlockProps:
                       for titleBlockProp in titleBlockProps:
                           area = Area(titleBlockProp[0])
                           area.parse(titleBlockProp[2])
                           if not (titleBlockProp[3] and titleBlockProp[3] != ''):
                               img = app_doc_data.imgSrc[round(area.y):round(area.y + area.height),
                                     round(area.x):round(area.x + area.width)]
                               if len(whiteCharList) is 0:
                                   texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=app_doc_data.OCRData)
                               else:
                                   texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng',
                                                            conf=whiteCharList[0].value)
                               texts = [ti.TextInfo('\n'.join([textInfo.getText() for textInfo in texts]), area.x, area.y,
                                                    area.width, area.height, 0)]
                           else:
                               texts = [ti.TextInfo(titleBlockProp[3], area.x, area.y, area.width, area.height, 0)]
                           self.titleBlockTextInfoList.append([area.name, texts])
                   if worker is not None: worker.updateProgress.emit(maxProgressValue, None)
                   """
                   for text_box in tInfoList:
                       x = text_box.getX()
                       y = text_box.getY()
                       cv2.rectangle(imgSrc, (x - offset[0], y - offset[1]),
                                     (x - offset[0] + text_box.getW(), y - offset[1] + text_box.getH()), 1, 1)
                   cv2.imwrite('c:\\Temp\\text_box.png', imgSrc)
                   """
               except Exception as ex:
                   message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
                                                                  sys.exc_info()[-1].tb_lineno)
                   if worker:
                       worker.displayLog.emit(MessageType.Error, message)
           '''
               @brief      remove text from image
               @author     humkyung
               @date       2018.07.24
           '''
           def remove_text_from_image(self, imgSrc, offset):
               # remove recognized text from image
               for text in self.textInfoList:
                   x = round(text.getX() - offset[0])
                   y = round(text.getY() - offset[1])
                   width = round(text.getW())
                   height = round(text.getH())
                   cv2.rectangle(imgSrc, (x, y), (x + width, y + height), 255, -1)
               # up to here
               # DEBUG
               #cv2.imwrite("c:\\temp\\remove_texts.png", imgSrc)
       if __name__ == "__main__":
           image = cv2.imread('d:\\Projects\\DTIPID\\Projects\\IX3\\Temp\\OCR_Document_2_Page1.png')
           gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
           output = gray.copy()
           gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
           expand_size = 5
           eroded = cv2.erode(gray, np.ones((expand_size, expand_size), np.uint8))
           eroded = cv2.bitwise_not(eroded)
           cv2.imwrite('c:\\temp\\eroded.png', eroded)
           bboxes = []
           contours, hierarchy = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
           for contour in contours:
               area = cv2.contourArea(contour, True)
               if area < 0:
                   [x, y, w, h] = cv2.boundingRect(contour)
                   bboxes.append(QRect(x, y, w, h))
           # exclude bounding boxes contains child bounding box
           not_containing_bbox = []
           for bbox in bboxes:
               matches = [_bbox for _bbox in bboxes if bbox != _bbox and bbox.contains(_bbox)]
               if not matches:
                   not_containing_bbox.append(bbox)
           # up to here
           rects = []
           for bbox in not_containing_bbox:
               x, y = bbox.left(), bbox.top()
               w, h = bbox.width(), bbox.height()
               img = gray[bbox.top():bbox.bottom(), bbox.left():bbox.right()]
               img = cv2.bitwise_not(img)
               horizontal, max_width = 0, 0
               vertical, max_height = 0, 0
               _contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
               for xx in _contours:
                   [_x, _y, _w, _h] = cv2.boundingRect(xx)
                   max_width = _x if _x > max_width else max_width
                   max_height = _y if _y > max_height else max_height
                   if (_w*0.9 < _h) or (_w > 80 > _h):  # width is greater than height
                       horizontal += 1 + (_w * _h) / (w * h)
                   else:
                       vertical += 1 + (_w * _h) / (w * h)
               if (w < 5 and h < 5) or (max_width > 80 and max_height > 80):
                   continue  # skip too small or big one
               rects.append([0 if horizontal > vertical else 90, QRect(x, y, w, h)])
           merge_size = 10
           # merge rectangles
           interestings = []
           while rects:
               rect = rects.pop()
               if 0 == rect[0]:  # x-direction text
                   rect_expand = rect[1].adjusted(-merge_size, 0, merge_size, 0)
                   matches = [x for x in rects if (x[0] == rect[0]) and
                              abs(x[1].height() - rect[1].height()) < (x[1].height() + rect[1].height()) * 0.5 and
                              abs(x[1].center().y() - rect[1].center().y()) < rect[1].height() * 0.25 and
                              rect_expand.intersects(x[1].adjusted(-merge_size, 0, merge_size, 0))]
               else:  # y -direction text
                   rect_expand = rect[1].adjusted(0, -merge_size, 0, merge_size)
                   matches = [x for x in rects if (x[0] == rect[0]) and
                              abs(x[1].width() - rect[1].width()) < (x[1].width() + rect[1].width()) * 0.5 and
                              abs(x[1].center().x() - rect[1].center().x()) < rect[1].width() * 0.25 and
                              rect_expand.intersects(x[1].adjusted(0, -merge_size, 0, merge_size))]
               if matches:
                   for _rect in matches:
                       rect[1] = rect[1].united(_rect[1])
                       if _rect in rects:
                           rects.remove(_rect)
                   rects.append(rect)
               else:
                   interestings.append(rect)
           for orientation, bbox in interestings:
               cv2.rectangle(output, (bbox.x(), bbox.y()), (bbox.right(), bbox.bottom()), (0, 255, 0), 1)
           """
           mser = cv2.MSER_create(_min_area=10)
           regions, _ = mser.detectRegions(gray)  # Get the text area
           hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]  # Drawing text areas
           # Processing irregular detection boxes into rectangular boxes
           keep = []
           for c in hulls:
               x, y, w, h = cv2.boundingRect(c)
               cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 1)
           """
           #cv2.polylines(output, hulls, 1, (0, 255, 0))
           cv2.imwrite('c:\\temp\\mser.png', output)

프로젝트

일반

사용자정보

HYTOS

hytos / DTI_PID / DTI_PID / TextDetector.py @ d559e1d7