개정판 9e64682b
DTI_PID 중 drawFoundSymbols 내 Tesseract 사용 구문을 tesseract_ocr_module에 병합 / tesseract_ocr_module의 각 함수의 conf를 전역 변수 사용하여 통일화
DTI_PID/DTI_PID/DTI_PID.py | ||
---|---|---|
552 | 552 |
|
553 | 553 |
''' |
554 | 554 |
@history 2018.04.27 Jeongwoo Remove Tesseract Log on listWidget |
555 |
2018.05.04 Jeongwoo Change method to OCR with tesseract_ocr_module.py |
|
555 | 556 |
''' |
556 | 557 |
def drawFoundSymbols(symbol, listWidget): |
557 | 558 |
global src |
... | ... | |
599 | 600 |
#kernel2 = np.ones((1, 1), np.uint8) |
600 | 601 |
#bitImg = cv2.erode(bitImg, kernel2) |
601 | 602 |
|
602 |
bitImg = cv2.resize(bitImg, None, fx = 2.0, fy = 2.0) |
|
603 |
|
|
604 |
im = Image.fromarray(bitImg) |
|
605 |
|
|
606 |
ocrData = pytesseract.image_to_boxes(im, config='-c tessedit_char_whitelist="-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -psm 6') |
|
607 |
|
|
608 |
### For image_to_boxes() |
|
609 |
if ocrData: |
|
610 |
splitOcrData = ocrData.split('\n') |
|
611 |
tList = [] |
|
612 |
lastCoord = (-1, -1) # Top-Right Coord |
|
613 |
tText = [] |
|
614 |
ftSp = (-1, -1) |
|
603 |
try: |
|
615 | 604 |
threadLock.acquire() |
616 |
for data in splitOcrData: |
|
617 |
sData = data.split(' ') |
|
618 |
text = sData[0] |
|
619 |
tsx = int(sData[1]) // 2 |
|
620 |
tsy = int(sData[2]) // 2 |
|
621 |
tex = int(sData[3]) // 2 |
|
622 |
tey = int(sData[4]) // 2 |
|
623 |
tw = tex - tsx |
|
624 |
th = tey - tsy |
|
625 |
|
|
626 |
global MIN_TEXT_SIZE |
|
627 |
if WHITE_LIST_CHARS.find(text) >= 0: |
|
628 |
if tw >= MIN_TEXT_SIZE or th >= MIN_TEXT_SIZE: |
|
629 |
realTextSp = (-1, -1) |
|
630 |
if symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF: |
|
631 |
realTextSp = (symbolSp[0]+inSqX+tsx, symbolSp[1]+((h//2) - tsy + inSqY)) |
|
632 |
else: |
|
633 |
realTextSp = (symbolSp[0]+inSqX+tsx, symbolSp[1]+tsy) |
|
634 |
#cv2.rectangle(src, realTextSp, (realTextSp[0] + tw, realTextSp[1] + th), (0, 255, 255), 3) |
|
635 |
removeText(srcGray, text, realTextSp[0], realTextSp[1], tw, th) |
|
636 |
if lastCoord == (-1, -1): |
|
637 |
tText.append(text) |
|
638 |
ftSp = (tsx, tsy) |
|
639 |
else: |
|
640 |
COORD_ADJUSTMENT = 15 |
|
641 |
if (abs(lastCoord[1] - tsy) <= COORD_ADJUSTMENT and lastCoord[0] >= tsx - COORD_ADJUSTMENT and lastCoord[0] <= tsx + COORD_ADJUSTMENT) or (abs(lastCoord[0] - tsx) <= COORD_ADJUSTMENT and lastCoord[1] >= tsy - COORD_ADJUSTMENT and lastCoord[1] <= tsy + COORD_ADJUSTMENT): |
|
642 |
tText.append(text) |
|
643 |
else: |
|
644 |
if symbolOcrOption == SymbolBase.OCR_OPTION_ALL_FIND or symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF: |
|
645 |
tText.append(',') |
|
646 |
tText.append(text) |
|
647 |
|
|
648 |
lastCoord = (tex, tsy) # Top-Right Coord |
|
649 |
|
|
650 |
realSp = (symbolSp[0]+inSqX+ftSp[0], symbolSp[1]+inSqY+ftSp[1]) |
|
651 |
realEp = (symbolSp[0]+inSqX+lastCoord[0], symbolSp[1]+inSqY+lastCoord[1]) |
|
652 |
resultText = ''.join(tText) |
|
653 |
cv2.putText(canvas, resultText, (realSp[0], realSp[1]+th), 2, 1.0, (0, 0, 0)) # cv2.FONT_HERSHEY_SIMPLEX |
|
654 |
|
|
655 |
#textInfoList.append(ti.TextInfo(resultText, str(realSp[0]), str(realSp[1]), str(realEp[0]), str(realEp[1]))) |
|
656 |
|
|
657 |
# text value in symbol object update |
|
658 |
index = [i for i, item in enumerate(searchedSymbolList) if item.getSp() == symbolSp] |
|
659 |
if len(index) > 0: |
|
660 |
searchedSymbolList[index[0]].setText(resultText) |
|
605 |
im = Image.fromarray(bitImg) |
|
606 |
sp = (0, 0) |
|
607 |
if symbolOcrOption == SymbolBase.OCR_OPTION_HALF_AND_HALF: |
|
608 |
sp = (symbolSp[0]+inSqX, symbolSp[1]+ inSqY) |
|
609 |
else: |
|
610 |
sp = (symbolSp[0]+inSqX, symbolSp[1] + inSqY) |
|
611 |
tList = TOCR.getTextInfoInSymbol(bitImg, sp) |
|
612 |
|
|
613 |
resultText = '' |
|
614 |
if tList is not None: |
|
615 |
for index in range(len(tList)): |
|
616 |
textInfo = tList[index] |
|
617 |
if index != 0: |
|
618 |
resultText = resultText + "," |
|
619 |
resultText = resultText + textInfo.getText() |
|
620 |
cv2.putText(canvas, textInfo.getText(), (textInfo.getX(), textInfo.getY()), 2, 1.0, (0, 0, 0)) # cv2.FONT_HERSHEY_SIMPLEX |
|
621 |
|
|
622 |
# text value in symbol object update |
|
623 |
index = [i for i, item in enumerate(searchedSymbolList) if item.getSp() == symbolSp] |
|
624 |
if len(index) > 0: |
|
625 |
searchedSymbolList[index[0]].setText(resultText) |
|
626 |
except Exception as ex: |
|
627 |
print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
|
628 |
finally: |
|
661 | 629 |
threadLock.release() |
662 | 630 |
|
663 | 631 |
|
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
26 | 26 |
pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe' |
27 | 27 |
tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe' |
28 | 28 |
|
29 |
DEFAULT_CONF = """ |
|
30 |
--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\' |
|
31 |
""" |
|
32 |
|
|
33 |
''' |
|
34 |
@brief Get Text info in Symbol (Instrumentation, OPC, etc...) |
|
35 |
@author Jeongwoo |
|
36 |
@date 2018.05.04 |
|
37 |
''' |
|
38 |
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF): |
|
39 |
im = Image.fromarray(img) |
|
40 |
imgWidth = im.width |
|
41 |
imgHeight = im.height |
|
42 |
textInfoList = [] |
|
43 |
|
|
44 |
WHITE_LIST_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-" |
|
45 |
MIN_TEXT_SIZE = 10 |
|
46 |
|
|
47 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng') |
|
48 |
textGroupIndex = 0 |
|
49 |
|
|
50 |
lastRT = (-1, -1) |
|
51 |
tempText = '' |
|
52 |
charWidth = -1 |
|
53 |
charHeight = -1 |
|
54 |
lastCharHeight = -1 |
|
55 |
lineSp = (-1, -1) |
|
56 |
lineRect = None |
|
57 |
for index in range(len(boundaryOcrData.split('\n'))): |
|
58 |
data = boundaryOcrData.split('\n')[index] |
|
59 |
sData = data.split(' ') |
|
60 |
char = sData[0] |
|
61 |
|
|
62 |
if WHITE_LIST_CHARS.find(char) >= 0: |
|
63 |
csx = int(sData[1]) |
|
64 |
csy = imgHeight - int(sData[4]) |
|
65 |
cex = int(sData[3]) |
|
66 |
cey = int(sData[2]) |
|
67 |
cw = cex - csx |
|
68 |
ch = abs(cey - int(sData[4])) |
|
69 |
|
|
70 |
if cw >= MIN_TEXT_SIZE or ch >= MIN_TEXT_SIZE: |
|
71 |
charWidth = max(charWidth, cw) |
|
72 |
charHeight = max(charHeight, ch) |
|
73 |
currentRect = None |
|
74 |
if lastRT != (-1, -1): |
|
75 |
currentRect = QRect(csx, csy, csx + cw, csy + ch) |
|
76 |
if lastRT == (-1, -1) and lineRect is None: |
|
77 |
tempText = tempText + char |
|
78 |
lastRT = (csx+cw, csy) |
|
79 |
lineSp = (csx, csy) |
|
80 |
lastCharHeight = charHeight |
|
81 |
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], charHeight) |
|
82 |
elif (lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect): |
|
83 |
tempText = tempText + char |
|
84 |
lastRT = (csx+cw, min(lineSp[1], csy)) |
|
85 |
lineSp = (lineSp[0], min(lineSp[1], csy)) |
|
86 |
lastCharHeight = charHeight |
|
87 |
lineRect.setHeight(charHeight) |
|
88 |
else: |
|
89 |
# Save previous line |
|
90 |
prevLineText = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lastCharHeight, 0) |
|
91 |
textInfoList.append(prevLineText) |
|
92 |
textGroupIndex = textGroupIndex + 1 |
|
93 |
|
|
94 |
# Start new line |
|
95 |
tempText = char |
|
96 |
charWidth = cw |
|
97 |
charHeight = ch |
|
98 |
lastRT = (csx + cw, csy) |
|
99 |
lineSp = (csx, csy) |
|
100 |
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], ch) |
|
101 |
|
|
102 |
if not (len(textInfoList) > textGroupIndex) and (tempText is not None and len(tempText) > 0): |
|
103 |
textInfo = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], charHeight, 0) |
|
104 |
if textInfo not in textInfoList: |
|
105 |
textInfoList.append(textInfo) |
|
106 |
else: |
|
107 |
return None |
|
108 |
|
|
109 |
return textInfoList |
|
110 |
|
|
29 | 111 |
''' |
30 | 112 |
@brief Get info about each character |
31 | 113 |
@author Jeongwoo |
32 | 114 |
@date 2018.05.03 |
33 | 115 |
''' |
34 |
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = """--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\'"""):
|
|
116 |
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
35 | 117 |
im = Image.fromarray(img) |
36 | 118 |
imgWidth = im.width |
37 | 119 |
imgHeight = im.height |
... | ... | |
58 | 140 |
@history 2018.04.26 Jeongwoo Make TextInfo object with Calculated Coords (with BoundBox Coords) |
59 | 141 |
2018.04.30 Jeongwoo Add QRect.setHeight() in if-statement [(lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect)] |
60 | 142 |
''' |
61 |
def getTextInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = """--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}<>:;+=?\\"\\'"""):
|
|
143 |
def getTextInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
62 | 144 |
im = Image.fromarray(img) |
63 | 145 |
imgWidth = im.width |
64 | 146 |
imgHeight = im.height |
... | ... | |
131 | 213 |
return textInfoList |
132 | 214 |
|
133 | 215 |
|
134 |
def removeTextFromNpArray(img, flag = FLAG_IMAGE_TO_BOXES, conf = '-c tessedit_char_whitelist="-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -psm 6'):
|
|
216 |
def removeTextFromNpArray(img, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
135 | 217 |
retImg = img.copy() |
136 | 218 |
|
137 | 219 |
for i in range(4): |
내보내기 Unified diff