개정판 0a40c9a5
issue #655: ocr lang bug fix
DTI_PID/DTI_PID/OcrResultDialog.py | ||
---|---|---|
88 | 88 |
@history 2018.04.26 Jeongwoo Add Rectangle with modified Coords |
89 | 89 |
2018.06.20 Jeongwoo Remove test code |
90 | 90 |
2018.11.08 euisung add white char list check process on db |
91 |
2018.11.22 euisung OCR lang apply fixed |
|
91 | 92 |
''' |
92 | 93 |
def detectText(self): |
93 | 94 |
try: |
... | ... | |
140 | 141 |
docData = AppDocData.instance() |
141 | 142 |
whiteCharList = docData.getConfigs('Text Recognition', 'White Character List') |
142 | 143 |
if len(whiteCharList) is 0: |
143 |
self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y()))) |
|
144 |
self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), language=docData.OCRData)
|
|
144 | 145 |
else: |
145 |
self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), conf = whiteCharList[0].value) |
|
146 |
self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), language=docData.OCRData, conf = whiteCharList[0].value)
|
|
146 | 147 |
|
147 | 148 |
#self.textInfoList = TOCR.getTextInfo(img, (int(self.boundingBox.x()), int(self.boundingBox.y()))) |
148 | 149 |
if self.textInfoList is not None and len(self.textInfoList) > 0: |
DTI_PID/DTI_PID/TextDetector.py | ||
---|---|---|
275 | 275 |
if area.name == 'Unit': |
276 | 276 |
img = imgSrc[round(area.y):round(area.y+area.height), round(area.x):round(area.x+area.width)] |
277 | 277 |
if len(whiteCharList) is 0: |
278 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng')
|
|
278 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData)
|
|
279 | 279 |
else: |
280 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', conf = whiteCharList[0].value)
|
|
280 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData, conf = whiteCharList[0].value)
|
|
281 | 281 |
if texts is not None and len(texts) > 0: |
282 | 282 |
appDocData.activeDrawing.setAttr('Unit', texts[0].getText()) |
283 | 283 |
self.otherTextInfoList.append([area.name, texts]) |
284 | 284 |
else: |
285 | 285 |
if area is not None and hasattr(area, 'img') and area.img is not None: |
286 | 286 |
if len(whiteCharList) is 0: |
287 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), language='eng')
|
|
287 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), language=appDocData.OCRData)
|
|
288 | 288 |
else: |
289 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), language='eng', conf=whiteCharList[0].value)
|
|
289 |
texts = TOCR.getTextInfo(area.img, (area.x, area.y), language=appDocData.OCRData, conf=whiteCharList[0].value)
|
|
290 | 290 |
self.otherTextInfoList.append([area.name, texts]) |
291 | 291 |
|
292 | 292 |
titleBlockProps = appDocData.getTitleBlockProperties() |
... | ... | |
295 | 295 |
area.parse(titleBlockProp[2]) |
296 | 296 |
img = imgSrc[round(area.y):round(area.y+area.height), round(area.x):round(area.x+area.width)] |
297 | 297 |
if len(whiteCharList) is 0: |
298 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng')
|
|
298 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData)
|
|
299 | 299 |
else: |
300 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', conf=whiteCharList[0].value)
|
|
300 |
texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData, conf=whiteCharList[0].value)
|
|
301 | 301 |
self.titleBlockTextInfoList.append([area.name, texts]) |
302 | 302 |
|
303 | 303 |
if worker is not None: worker.updateProgress.emit(maxProgressValue, None) |
DTI_PID/DTI_PID/TrainingImageListDialog.py | ||
---|---|---|
600 | 600 |
if len(whiteCharList) is 0: |
601 | 601 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF, lang=oCRLang) |
602 | 602 |
else: |
603 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF[:40] + whiteCharList[0].value, lang=oCRLang)
|
|
603 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF_COMM + whiteCharList[0].value, lang=oCRLang)
|
|
604 | 604 |
|
605 | 605 |
except Exception as ex: |
606 | 606 |
print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
39 | 39 |
DEFAULT_CONF = """ |
40 | 40 |
--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\" |
41 | 41 |
""" |
42 |
DEFAULT_CONF_COMM = "--psm 6 -c tessedit_char_whitelist=" |
|
42 | 43 |
|
43 | 44 |
def existTrainedData(): |
44 | 45 |
''' |
... | ... | |
70 | 71 |
""" |
71 | 72 |
def getTextInfo(img, startPoint, angle = 0, language='eng', flag = FLAG_IMAGE_TO_BOXES, conf = None): |
72 | 73 |
try: |
74 |
if img.shape is (0,0): |
|
75 |
return |
|
73 | 76 |
os.environ['TESSDATA_PREFIX'] = os.path.join(tesseract_path, 'tessdata') |
74 | 77 |
textInfoList = [] |
75 | 78 |
|
76 | 79 |
if conf == None: |
77 | 80 |
conf = DEFAULT_CONF |
78 | 81 |
else: |
79 |
conf = '--psm 6 -c tessedit_char_whitelist=' + conf
|
|
82 |
conf = DEFAULT_CONF_COMM + conf
|
|
80 | 83 |
|
81 | 84 |
docData = AppDocData.instance() |
82 | 85 |
oCRLang = language |
내보내기 Unified diff