프로젝트

일반

사용자정보

개정판 9e64682b

ID9e64682b06b02763441d74216c281e7195d464b8
상위 ff3d1c7a
하위 0d0dfc38

김정우 이(가) 약 7년 전에 추가함

DTI_PID 중 drawFoundSymbols 내 Tesseract 사용 구문을 tesseract_ocr_module에 병합 / tesseract_ocr_module의 각 함수의 conf를 전역 변수 사용하여 통일화

차이점 보기:

DTI_PID/DTI_PID/DTI_PID.py
552 552

  
553 553
'''
554 554
    @history    2018.04.27  Jeongwoo    Remove Tesseract Log on listWidget
555
                2018.05.04  Jeongwoo    Change method to OCR with tesseract_ocr_module.py
555 556
'''
556 557
def drawFoundSymbols(symbol, listWidget):
557 558
    global src
......
599 600
        #kernel2 = np.ones((1, 1), np.uint8)
600 601
        #bitImg = cv2.erode(bitImg, kernel2)
601 602

  
602
        bitImg = cv2.resize(bitImg, None, fx = 2.0, fy = 2.0)
603

  
604
        im = Image.fromarray(bitImg)
605

  
606
        ocrData = pytesseract.image_to_boxes(im, config='-c tessedit_char_whitelist="-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -psm 6')
607

  
608
        ### For image_to_boxes()
609
        if ocrData:
610
            splitOcrData = ocrData.split('\n')
611
            tList = []
612
            lastCoord = (-1, -1) # Top-Right Coord
613
            tText = []
614
            ftSp = (-1, -1)
603
        try:
615 604
            threadLock.acquire()
616
            for data in splitOcrData:
617
                sData = data.split(' ')
618
                text = sData[0]
619
                tsx = int(sData[1]) // 2
620
                tsy = int(sData[2]) // 2
621
                tex = int(sData[3]) // 2
622
                tey = int(sData[4]) // 2
623
                tw = tex - tsx
624
                th = tey - tsy
625

  
626
                global MIN_TEXT_SIZE
627
                if WHITE_LIST_CHARS.find(text) >= 0:
628
                    if tw >= MIN_TEXT_SIZE or th >= MIN_TEXT_SIZE:
629
                        realTextSp = (-1, -1)
630
                        if symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF:
631
                            realTextSp = (symbolSp[0]+inSqX+tsx, symbolSp[1]+((h//2) - tsy + inSqY))
632
                        else:
633
                            realTextSp = (symbolSp[0]+inSqX+tsx, symbolSp[1]+tsy)
634
                        #cv2.rectangle(src, realTextSp, (realTextSp[0] + tw, realTextSp[1] + th), (0, 255, 255), 3)
635
                        removeText(srcGray, text, realTextSp[0], realTextSp[1], tw, th)
636
                        if lastCoord == (-1, -1):
637
                            tText.append(text)
638
                            ftSp = (tsx, tsy)
639
                        else:
640
                            COORD_ADJUSTMENT = 15
641
                            if (abs(lastCoord[1] - tsy) <= COORD_ADJUSTMENT and lastCoord[0] >= tsx - COORD_ADJUSTMENT and lastCoord[0] <= tsx + COORD_ADJUSTMENT) or (abs(lastCoord[0] - tsx) <= COORD_ADJUSTMENT and lastCoord[1] >= tsy - COORD_ADJUSTMENT and lastCoord[1] <= tsy + COORD_ADJUSTMENT):
642
                                tText.append(text)
643
                            else:
644
                                if symbolOcrOption == SymbolBase.OCR_OPTION_ALL_FIND or symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF:
645
                                    tText.append(',')
646
                                tText.append(text)
647
                        
648
                        lastCoord = (tex, tsy) # Top-Right Coord
649

  
650
            realSp = (symbolSp[0]+inSqX+ftSp[0], symbolSp[1]+inSqY+ftSp[1])
651
            realEp = (symbolSp[0]+inSqX+lastCoord[0], symbolSp[1]+inSqY+lastCoord[1])
652
            resultText = ''.join(tText)
653
            cv2.putText(canvas, resultText, (realSp[0], realSp[1]+th), 2, 1.0, (0, 0, 0)) # cv2.FONT_HERSHEY_SIMPLEX
654

  
655
            #textInfoList.append(ti.TextInfo(resultText, str(realSp[0]), str(realSp[1]), str(realEp[0]), str(realEp[1])))
656

  
657
            # text value in symbol object update
658
            index = [i for i, item in enumerate(searchedSymbolList) if item.getSp() == symbolSp]
659
            if len(index) > 0:
660
                searchedSymbolList[index[0]].setText(resultText)
605
            im = Image.fromarray(bitImg)
606
            sp = (0, 0)
607
            if symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF:
608
                sp = (symbolSp[0]+inSqX, symbolSp[1]+ inSqY)
609
            else:
610
                sp = (symbolSp[0]+inSqX, symbolSp[1] + inSqY)
611
            tList = TOCR.getTextInfoInSymbol(bitImg, sp)
612

  
613
            resultText = ''
614
            if tList is not None:
615
                for index in range(len(tList)):
616
                    textInfo = tList[index]
617
                    if index != 0:
618
                        resultText = resultText + ","
619
                    resultText = resultText + textInfo.getText()
620
                    cv2.putText(canvas, textInfo.getText(), (textInfo.getX(), textInfo.getY()), 2, 1.0, (0, 0, 0)) # cv2.FONT_HERSHEY_SIMPLEX
621

  
622
                # text value in symbol object update
623
                index = [i for i, item in enumerate(searchedSymbolList) if item.getSp() == symbolSp]
624
                if len(index) > 0:
625
                    searchedSymbolList[index[0]].setText(resultText)
626
        except Exception as ex:
627
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
628
        finally:
661 629
            threadLock.release()
662 630

  
663 631

  
DTI_PID/DTI_PID/tesseract_ocr_module.py
26 26
pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe'
27 27
tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe'
28 28

  
29
DEFAULT_CONF = """
30
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\'
31
"""
32

  
33
'''
34
    @brief      Get Text info in Symbol (Instrumentation, OPC, etc...)
35
    @author     Jeongwoo
36
    @date       2018.05.04
37
'''
38
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
39
    im = Image.fromarray(img)
40
    imgWidth = im.width
41
    imgHeight = im.height
42
    textInfoList = []
43

  
44
    WHITE_LIST_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-"
45
    MIN_TEXT_SIZE = 10
46

  
47
    boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
48
    textGroupIndex = 0
49

  
50
    lastRT = (-1, -1)
51
    tempText = ''
52
    charWidth = -1
53
    charHeight = -1
54
    lastCharHeight = -1
55
    lineSp = (-1, -1)
56
    lineRect = None
57
    for index in range(len(boundaryOcrData.split('\n'))):
58
        data = boundaryOcrData.split('\n')[index]
59
        sData = data.split(' ')
60
        char = sData[0]
61

  
62
        if WHITE_LIST_CHARS.find(char) >= 0:
63
            csx = int(sData[1])
64
            csy = imgHeight - int(sData[4])
65
            cex = int(sData[3])
66
            cey = int(sData[2])
67
            cw = cex - csx
68
            ch = abs(cey - int(sData[4]))
69
            
70
            if cw >= MIN_TEXT_SIZE or ch >= MIN_TEXT_SIZE:
71
                charWidth = max(charWidth, cw)
72
                charHeight = max(charHeight, ch)
73
                currentRect = None
74
                if lastRT != (-1, -1):
75
                    currentRect = QRect(csx, csy, csx + cw, csy + ch)
76
                if lastRT == (-1, -1) and lineRect is None:
77
                    tempText = tempText + char
78
                    lastRT = (csx+cw, csy)
79
                    lineSp = (csx, csy)
80
                    lastCharHeight = charHeight
81
                    lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], charHeight)
82
                elif (lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect):
83
                    tempText = tempText + char
84
                    lastRT = (csx+cw, min(lineSp[1], csy))
85
                    lineSp = (lineSp[0], min(lineSp[1], csy))
86
                    lastCharHeight = charHeight
87
                    lineRect.setHeight(charHeight)
88
                else:
89
                    # Save previous line
90
                    prevLineText = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lastCharHeight, 0)
91
                    textInfoList.append(prevLineText)
92
                    textGroupIndex = textGroupIndex + 1
93

  
94
                    # Start new line
95
                    tempText = char
96
                    charWidth = cw
97
                    charHeight = ch
98
                    lastRT = (csx + cw, csy)
99
                    lineSp = (csx, csy)
100
                    lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], ch)
101

  
102
    if not (len(textInfoList) > textGroupIndex) and (tempText is not None and len(tempText) > 0):
103
        textInfo = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], charHeight, 0)
104
        if textInfo not in textInfoList:
105
            textInfoList.append(textInfo)
106
    else:
107
        return None
108

  
109
    return textInfoList
110

  
29 111
'''
30 112
    @brief      Get info about each character
31 113
    @author     Jeongwoo
32 114
    @date       2018.05.03
33 115
'''
34
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = """--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\'"""):
116
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
35 117
    im = Image.fromarray(img)
36 118
    imgWidth = im.width
37 119
    imgHeight = im.height
......
58 140
    @history    2018.04.26  Jeongwoo    Make TextInfo object with Calculated Coords (with BoundBox Coords)
59 141
                2018.04.30  Jeongwoo    Add QRect.setHeight() in if-statement [(lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect)]
60 142
'''
61
def getTextInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = """--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\'"""):
143
def getTextInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
62 144
    im = Image.fromarray(img)
63 145
    imgWidth = im.width
64 146
    imgHeight = im.height
......
131 213
    return textInfoList
132 214

  
133 215

  
134
def removeTextFromNpArray(img, flag = FLAG_IMAGE_TO_BOXES, conf = '-c tessedit_char_whitelist="-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -psm 6'):
216
def removeTextFromNpArray(img, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
135 217
    retImg = img.copy()
136 218

  
137 219
    for i in range(4):

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)