개정판 eea44d8f
issue #663: fixed a routine recognizing text from image
Change-Id: I6360bae8fe30c0863a5fb0dfa15fee64f2970af3
DTI_PID/DTI_PID/TextDetector.py | ||
---|---|---|
39 | 39 |
try: |
40 | 40 |
tInfoList = self.getTextAreaInfo(img, offset[0], offset[1]) |
41 | 41 |
except Exception as ex: |
42 |
print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
|
42 |
print('error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
|
|
43 | 43 |
|
44 | 44 |
return tInfoList |
45 | 45 |
|
... | ... | |
202 | 202 |
2018.11.08 euisung add white char list check process on db |
203 | 203 |
''' |
204 | 204 |
@staticmethod |
205 |
def recognizeTextFromImage(tInfo, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue): |
|
205 |
def recognizeTextFromImage(tInfos, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue):
|
|
206 | 206 |
import re |
207 | 207 |
res = [] |
208 | 208 |
|
209 |
appDocData = AppDocData.instance()
|
|
209 |
app_doc_data = AppDocData.instance()
|
|
210 | 210 |
|
211 | 211 |
try: |
212 |
x = tInfo.getX() - round(offset[0]) |
|
213 |
y = tInfo.getY() - round(offset[1]) |
|
214 |
img = imgOCR[y:y+tInfo.getH(), x:x+tInfo.getW()] |
|
215 |
|
|
216 |
# set angle 0 if symbol contains the text area is instrumentation |
|
217 |
category = None |
|
218 |
contains = [symbol for symbol in searchedSymbolList if symbol.contains(tInfo)] |
|
219 |
if contains: |
|
220 |
_type = contains[0].getType() |
|
221 |
category = appDocData.getSymbolCategoryByType(_type) |
|
222 |
if 'Instrumentation' == category: tInfo.setAngle(0) |
|
223 |
# up to here |
|
212 |
for tInfo in tInfos: |
|
213 |
x = tInfo.getX() - round(offset[0]) |
|
214 |
y = tInfo.getY() - round(offset[1]) |
|
215 |
img = imgOCR[y:y+tInfo.getH(), x:x+tInfo.getW()] |
|
216 |
|
|
217 |
# set angle 0 if symbol contains the text area is instrumentation |
|
218 |
category = None |
|
219 |
contains = [symbol for symbol in searchedSymbolList if symbol.contains(tInfo)] |
|
220 |
if contains: |
|
221 |
_type = contains[0].getType() |
|
222 |
category = app_doc_data.getSymbolCategoryByType(_type) |
|
223 |
if 'Instrumentation' == category: |
|
224 |
tInfo.setAngle(0) |
|
225 |
# up to here |
|
224 | 226 |
|
225 |
whiteCharList = appDocData.getConfigs('Text Recognition', 'White Character List')
|
|
226 |
if len(whiteCharList) is 0: |
|
227 |
resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=appDocData.OCRData)
|
|
228 |
else: |
|
229 |
resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=appDocData.OCRData, conf = whiteCharList[0].value)
|
|
227 |
whiteCharList = app_doc_data.getConfigs('Text Recognition', 'White Character List')
|
|
228 |
if len(whiteCharList) is 0:
|
|
229 |
resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=app_doc_data.OCRData)
|
|
230 |
else:
|
|
231 |
resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), language=app_doc_data.OCRData, conf=whiteCharList[0].value)
|
|
230 | 232 |
|
231 |
if resultTextInfo is not None and len(resultTextInfo) > 0: |
|
232 |
for result in resultTextInfo: |
|
233 |
result.setX(result.getX() + round(offset[0])) |
|
234 |
result.setY(result.getY() + round(offset[1])) |
|
235 |
if 'Instrumentation' == category: |
|
236 |
text = re.sub('[^a-zA-Z0-9]+', '', result.getText()) |
|
237 |
result.setText(text) |
|
238 |
res.extend(resultTextInfo) |
|
239 |
|
|
240 |
if listWidget is not None: |
|
241 |
item = QListWidgetItem('{},{},{} is recognized'.format(resultTextInfo[0].getX(), resultTextInfo[0].getY(), resultTextInfo[0].getText())) |
|
242 |
listWidget.addItem(item) |
|
243 |
else: |
|
244 |
pass |
|
233 |
if resultTextInfo is not None and len(resultTextInfo) > 0: |
|
234 |
for result in resultTextInfo: |
|
235 |
result.setX(result.getX() + round(offset[0])) |
|
236 |
result.setY(result.getY() + round(offset[1])) |
|
237 |
if 'Instrumentation' == category: |
|
238 |
text = re.sub('[^a-zA-Z0-9]+', '', result.getText()) |
|
239 |
result.setText(text) |
|
240 |
|
|
241 |
res.extend(resultTextInfo) |
|
245 | 242 |
|
246 |
if worker is not None: worker.updateProgress.emit(maxProgressValue, resultTextInfo[0].getText() if resultTextInfo is not None and 1 == len(resultTextInfo) else None) |
|
243 |
if listWidget is not None: |
|
244 |
item = QListWidgetItem('{},{},{} is recognized'.format(resultTextInfo[0].getX(), resultTextInfo[0].getY(), resultTextInfo[0].getText())) |
|
245 |
listWidget.addItem(item) |
|
246 |
else: |
|
247 |
pass |
|
248 |
|
|
249 |
if worker is not None: |
|
250 |
worker.updateProgress.emit(maxProgressValue, resultTextInfo[0].getText() if resultTextInfo is not None and 1 == len(resultTextInfo) else None) |
|
247 | 251 |
except Exception as ex: |
248 |
message = 'error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno) |
|
249 |
worker.displayLog.emit(MessageType.Error, message) |
|
252 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno) |
|
253 |
if worker is not None: |
|
254 |
worker.displayLog.emit(MessageType.Error, message) |
|
250 | 255 |
|
251 | 256 |
return res |
252 | 257 |
|
... | ... | |
267 | 272 |
''' |
268 | 273 |
def recognizeText(self, imgSrc, offset, tInfoList, searchedSymbolList, worker, listWidget, maxProgressValue, onlyTextArea = False): |
269 | 274 |
import concurrent.futures as futures |
275 |
from multiprocessing import Process, Queue |
|
270 | 276 |
from Area import Area |
271 | 277 |
|
272 | 278 |
try: |
... | ... | |
282 | 288 |
imgOCR = cv2.imread(path, 1) |
283 | 289 |
imgOCR = cv2.threshold(cv2.cvtColor(imgOCR, cv2.COLOR_BGR2GRAY), 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] |
284 | 290 |
|
285 |
pool = futures.ThreadPoolExecutor(max_workers = THREAD_MAX_WORKER) |
|
286 |
for tInfo in tInfoList: |
|
287 |
future = pool.submit(TextDetector.recognizeTextFromImage, tInfo, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue) |
|
291 |
text_info_array = np.array_split(tInfoList, THREAD_MAX_WORKER) |
|
292 |
pool = futures.ThreadPoolExecutor(max_workers=THREAD_MAX_WORKER) |
|
293 |
for tInfo in text_info_array: |
|
294 |
future = pool.submit(TextDetector.recognizeTextFromImage, tInfo, imgOCR, offset, searchedSymbolList, |
|
295 |
worker, listWidget, maxProgressValue) |
|
288 | 296 |
data = future.result() |
289 |
if data: self.textInfoList.extend(data) |
|
290 |
pool.shutdown(wait = True) |
|
297 |
if data: |
|
298 |
self.textInfoList.extend(data) |
|
299 |
pool.shutdown(wait=True) |
|
291 | 300 |
|
292 |
## remove text item has only 1 character
|
|
293 |
#for index in range(len(self.textInfoList) - 1, -1, -1): |
|
301 |
# remove text item has only 1 character |
|
302 |
# for index in range(len(self.textInfoList) - 1, -1, -1):
|
|
294 | 303 |
# if len(self.textInfoList[index].getText()) is 1: |
295 | 304 |
# self.textInfoList.pop(index) |
296 | 305 |
|
... | ... | |
331 | 340 |
|
332 | 341 |
if worker is not None: worker.updateProgress.emit(maxProgressValue, None) |
333 | 342 |
except Exception as ex: |
334 |
message = 'error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)
|
|
343 |
message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)
|
|
335 | 344 |
worker.displayLog.emit(MessageType.Error, message) |
336 | 345 |
|
337 | 346 |
''' |
내보내기 Unified diff