38 |
38 |
--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"
|
39 |
39 |
"""
|
40 |
40 |
|
41 |
|
'''
|
42 |
|
@brief Get Text info in Symbol (Instrumentation, OPC, etc...)
|
43 |
|
@author Jeongwoo
|
44 |
|
@date 2018.05.04
|
45 |
|
@history 2018.05.09 Jeongwoo Check split text' length
|
46 |
|
2018.06.14 Jeongwoo Add try-except. If exception occured, return None
|
47 |
|
2018.06.19 Jeongwoo Move text size check if-statement
|
48 |
|
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
|
49 |
|
2018.10.19 euisung verifing doesn't used
|
50 |
|
'''
|
51 |
|
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
52 |
|
try:
|
53 |
|
docData = AppDocData.instance()
|
54 |
|
configs = docData.getConfigs('Text Size', 'Min Text Size')
|
55 |
|
minSize = int(configs[0].value) if 1 == len(configs) else self.ui.minTextSizeSpinBox.setValue(30)
|
56 |
|
configs = docData.getConfigs('Text Size', 'Max Text Size')
|
57 |
|
maxSize = int(configs[0].value) if 1 == len(configs) else self.ui.maxTextSizeSpinBox.setValue(60)
|
58 |
|
|
59 |
|
im = Image.fromarray(img)
|
60 |
|
imgWidth = im.width
|
61 |
|
imgHeight = im.height
|
62 |
|
textInfoList = []
|
63 |
|
|
64 |
|
WHITE_LIST_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-"
|
65 |
|
MIN_TEXT_SIZE = 10
|
66 |
|
|
67 |
|
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
|
68 |
|
textGroupIndex = 0
|
69 |
|
|
70 |
|
lastRT = (-1, -1)
|
71 |
|
tempText = ''
|
72 |
|
charWidth = -1
|
73 |
|
charHeight = -1
|
74 |
|
lineSp = (-1, -1)
|
75 |
|
lineRect = None
|
76 |
|
for index in range(len(boundaryOcrData.split('\n'))):
|
77 |
|
data = boundaryOcrData.split('\n')[index]
|
78 |
|
sData = data.split(' ')
|
79 |
|
if len(sData) >= 5:
|
80 |
|
char = sData[0]
|
81 |
|
|
82 |
|
if WHITE_LIST_CHARS.find(char) >= 0:
|
83 |
|
csx = int(sData[1])
|
84 |
|
csy = imgHeight - int(sData[4])
|
85 |
|
cex = int(sData[3])
|
86 |
|
cey = imgHeight - int(sData[2])
|
87 |
|
cw = cex - csx
|
88 |
|
ch = abs(cey - csy)
|
89 |
|
|
90 |
|
charWidth = max(charWidth, cw)
|
91 |
|
charHeight = max(charHeight, ch)
|
92 |
|
currentRect = None
|
93 |
|
if lastRT != (-1, -1):
|
94 |
|
currentRect = QRect(csx, csy, csx + cw, csy + ch)
|
95 |
|
if lastRT == (-1, -1) and lineRect is None:
|
96 |
|
tempText = tempText + char
|
97 |
|
lastRT = (csx+cw, csy)
|
98 |
|
lineSp = (csx, csy)
|
99 |
|
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], charHeight)
|
100 |
|
elif (lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect):
|
101 |
|
tempText = tempText + char
|
102 |
|
lastRT = (csx+cw, min(lineSp[1], csy))
|
103 |
|
lineSp = (lineSp[0], min(lineSp[1], csy))
|
104 |
|
lineHeight = max(max(lineRect.height(), abs(cey - lineSp[1])), abs((lineRect.y()+lineRect.height()) - lineSp[1]))
|
105 |
|
lineRect.setX(lineSp[0])
|
106 |
|
lineRect.setY(lineSp[1])
|
107 |
|
lineRect.setHeight(lineHeight)
|
108 |
|
else:
|
109 |
|
# Save previous line
|
110 |
|
if lineRect.height() >= minSize and lineRect.height() <= maxSize:
|
111 |
|
prevLineText = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lineRect.height(), 0)
|
112 |
|
textInfoList.append(prevLineText)
|
113 |
|
textGroupIndex = textGroupIndex + 1
|
114 |
|
|
115 |
|
# Start new line
|
116 |
|
tempText = char
|
117 |
|
charWidth = cw
|
118 |
|
charHeight = ch
|
119 |
|
lastRT = (csx + cw, csy)
|
120 |
|
lineSp = (csx, csy)
|
121 |
|
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], ch)
|
122 |
|
|
123 |
|
if not (len(textInfoList) > textGroupIndex) and (tempText is not None and len(tempText) > 0):
|
124 |
|
if lineRect.height() >= minSize and lineRect.height() <= maxSize:
|
125 |
|
textInfo = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lineRect.height(), 0)
|
126 |
|
if textInfo not in textInfoList:
|
127 |
|
textInfoList.append(textInfo)
|
128 |
|
else:
|
129 |
|
return None
|
130 |
|
|
131 |
|
return textInfoList
|
132 |
|
except Exception as ex:
|
133 |
|
return None
|
134 |
|
|
135 |
|
'''
|
136 |
|
@brief Get info about each character
|
137 |
|
@author Jeongwoo
|
138 |
|
@date 2018.05.03
|
139 |
|
@history 2018.05.09 Jeongwoo Check split text' length
|
140 |
|
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch]
|
141 |
|
2018.10.19 euisung verifing doesn't used
|
142 |
|
'''
|
143 |
|
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
144 |
|
docData = AppDocData.instance()
|
145 |
|
configs = docData.getConfigs('Text Size', 'Min Text Size')
|
146 |
|
minSize = int(configs[0].value) if 1 == len(configs) else self.ui.minTextSizeSpinBox.setValue(30)
|
147 |
|
configs = docData.getConfigs('Text Size', 'Max Text Size')
|
148 |
|
maxSize = int(configs[0].value) if 1 == len(configs) else self.ui.maxTextSizeSpinBox.setValue(60)
|
149 |
|
|
150 |
|
im = Image.fromarray(img)
|
151 |
|
imgWidth = im.width
|
152 |
|
imgHeight = im.height
|
153 |
|
textInfoList = []
|
154 |
|
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
|
155 |
|
|
156 |
|
for index in range(len(boundaryOcrData.split('\n'))):
|
157 |
|
data = boundaryOcrData.split('\n')[index]
|
158 |
|
sData = data.split(' ')
|
159 |
|
if len(sData) >= 5:
|
160 |
|
char = sData[0]
|
161 |
|
csx = int(sData[1])
|
162 |
|
csy = imgHeight - int(sData[4])
|
163 |
|
cex = int(sData[3])
|
164 |
|
cey = imgHeight - int(sData[2])
|
165 |
|
cw = cex - csx
|
166 |
|
ch = abs(cey - csy)
|
167 |
|
|
168 |
|
if not(ch >= minSize and ch <= maxSize):
|
169 |
|
continue
|
170 |
|
|
171 |
|
textInfoList.append(ti.TextInfo(char, startPoint[0]+csx, startPoint[1]+csy, cw, ch, 0))
|
172 |
|
|
173 |
|
return textInfoList
|
174 |
|
|
175 |
|
|
176 |
41 |
"""
|
177 |
42 |
@history 2018.04.26 Jeongwoo Make TextInfo object with Calculated Coords (with BoundBox Coords)
|
178 |
43 |
2018.04.30 Jeongwoo Add QRect.setHeight() in if-statement [(lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect)]
|
... | ... | |
367 |
232 |
if ocrData:
|
368 |
233 |
print(ocrData)
|
369 |
234 |
|
370 |
|
return img
|
|
235 |
return img
|
|
236 |
|
|
237 |
'''
|
|
238 |
@brief Get Text info in Symbol (Instrumentation, OPC, etc...)
|
|
239 |
@author Jeongwoo
|
|
240 |
@date 2018.05.04
|
|
241 |
@history 2018.05.09 Jeongwoo Check split text' length
|
|
242 |
2018.06.14 Jeongwoo Add try-except. If exception occured, return None
|
|
243 |
2018.06.19 Jeongwoo Move text size check if-statement
|
|
244 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
|
|
245 |
2018.10.19 euisung verifing doesn't used
|
|
246 |
'''
|
|
247 |
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
248 |
try:
|
|
249 |
docData = AppDocData.instance()
|
|
250 |
configs = docData.getConfigs('Text Size', 'Min Text Size')
|
|
251 |
minSize = int(configs[0].value) if 1 == len(configs) else self.ui.minTextSizeSpinBox.setValue(30)
|
|
252 |
configs = docData.getConfigs('Text Size', 'Max Text Size')
|
|
253 |
maxSize = int(configs[0].value) if 1 == len(configs) else self.ui.maxTextSizeSpinBox.setValue(60)
|
|
254 |
|
|
255 |
im = Image.fromarray(img)
|
|
256 |
imgWidth = im.width
|
|
257 |
imgHeight = im.height
|
|
258 |
textInfoList = []
|
|
259 |
|
|
260 |
WHITE_LIST_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-"
|
|
261 |
MIN_TEXT_SIZE = 10
|
|
262 |
|
|
263 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
|
|
264 |
textGroupIndex = 0
|
|
265 |
|
|
266 |
lastRT = (-1, -1)
|
|
267 |
tempText = ''
|
|
268 |
charWidth = -1
|
|
269 |
charHeight = -1
|
|
270 |
lineSp = (-1, -1)
|
|
271 |
lineRect = None
|
|
272 |
for index in range(len(boundaryOcrData.split('\n'))):
|
|
273 |
data = boundaryOcrData.split('\n')[index]
|
|
274 |
sData = data.split(' ')
|
|
275 |
if len(sData) >= 5:
|
|
276 |
char = sData[0]
|
|
277 |
|
|
278 |
if WHITE_LIST_CHARS.find(char) >= 0:
|
|
279 |
csx = int(sData[1])
|
|
280 |
csy = imgHeight - int(sData[4])
|
|
281 |
cex = int(sData[3])
|
|
282 |
cey = imgHeight - int(sData[2])
|
|
283 |
cw = cex - csx
|
|
284 |
ch = abs(cey - csy)
|
|
285 |
|
|
286 |
charWidth = max(charWidth, cw)
|
|
287 |
charHeight = max(charHeight, ch)
|
|
288 |
currentRect = None
|
|
289 |
if lastRT != (-1, -1):
|
|
290 |
currentRect = QRect(csx, csy, csx + cw, csy + ch)
|
|
291 |
if lastRT == (-1, -1) and lineRect is None:
|
|
292 |
tempText = tempText + char
|
|
293 |
lastRT = (csx+cw, csy)
|
|
294 |
lineSp = (csx, csy)
|
|
295 |
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], charHeight)
|
|
296 |
elif (lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect):
|
|
297 |
tempText = tempText + char
|
|
298 |
lastRT = (csx+cw, min(lineSp[1], csy))
|
|
299 |
lineSp = (lineSp[0], min(lineSp[1], csy))
|
|
300 |
lineHeight = max(max(lineRect.height(), abs(cey - lineSp[1])), abs((lineRect.y()+lineRect.height()) - lineSp[1]))
|
|
301 |
lineRect.setX(lineSp[0])
|
|
302 |
lineRect.setY(lineSp[1])
|
|
303 |
lineRect.setHeight(lineHeight)
|
|
304 |
else:
|
|
305 |
# Save previous line
|
|
306 |
if lineRect.height() >= minSize and lineRect.height() <= maxSize:
|
|
307 |
prevLineText = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lineRect.height(), 0)
|
|
308 |
textInfoList.append(prevLineText)
|
|
309 |
textGroupIndex = textGroupIndex + 1
|
|
310 |
|
|
311 |
# Start new line
|
|
312 |
tempText = char
|
|
313 |
charWidth = cw
|
|
314 |
charHeight = ch
|
|
315 |
lastRT = (csx + cw, csy)
|
|
316 |
lineSp = (csx, csy)
|
|
317 |
lineRect = QRect(lineSp[0], lineSp[1], imgWidth - lineSp[0], ch)
|
|
318 |
|
|
319 |
if not (len(textInfoList) > textGroupIndex) and (tempText is not None and len(tempText) > 0):
|
|
320 |
if lineRect.height() >= minSize and lineRect.height() <= maxSize:
|
|
321 |
textInfo = ti.TextInfo(tempText, startPoint[0]+lineSp[0], startPoint[1]+lineSp[1], lastRT[0] - lineSp[0], lineRect.height(), 0)
|
|
322 |
if textInfo not in textInfoList:
|
|
323 |
textInfoList.append(textInfo)
|
|
324 |
else:
|
|
325 |
return None
|
|
326 |
|
|
327 |
return textInfoList
|
|
328 |
except Exception as ex:
|
|
329 |
return None
|
|
330 |
|
|
331 |
'''
|
|
332 |
@brief Get info about each character
|
|
333 |
@author Jeongwoo
|
|
334 |
@date 2018.05.03
|
|
335 |
@history 2018.05.09 Jeongwoo Check split text' length
|
|
336 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch]
|
|
337 |
2018.10.19 euisung verifing doesn't used
|
|
338 |
'''
|
|
339 |
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
340 |
docData = AppDocData.instance()
|
|
341 |
configs = docData.getConfigs('Text Size', 'Min Text Size')
|
|
342 |
minSize = int(configs[0].value) if 1 == len(configs) else self.ui.minTextSizeSpinBox.setValue(30)
|
|
343 |
configs = docData.getConfigs('Text Size', 'Max Text Size')
|
|
344 |
maxSize = int(configs[0].value) if 1 == len(configs) else self.ui.maxTextSizeSpinBox.setValue(60)
|
|
345 |
|
|
346 |
im = Image.fromarray(img)
|
|
347 |
imgWidth = im.width
|
|
348 |
imgHeight = im.height
|
|
349 |
textInfoList = []
|
|
350 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
|
|
351 |
|
|
352 |
for index in range(len(boundaryOcrData.split('\n'))):
|
|
353 |
data = boundaryOcrData.split('\n')[index]
|
|
354 |
sData = data.split(' ')
|
|
355 |
if len(sData) >= 5:
|
|
356 |
char = sData[0]
|
|
357 |
csx = int(sData[1])
|
|
358 |
csy = imgHeight - int(sData[4])
|
|
359 |
cex = int(sData[3])
|
|
360 |
cey = imgHeight - int(sData[2])
|
|
361 |
cw = cex - csx
|
|
362 |
ch = abs(cey - csy)
|
|
363 |
|
|
364 |
if not(ch >= minSize and ch <= maxSize):
|
|
365 |
continue
|
|
366 |
|
|
367 |
textInfoList.append(ti.TextInfo(char, startPoint[0]+csx, startPoint[1]+csy, cw, ch, 0))
|
|
368 |
|
|
369 |
return textInfoList
|