개정판 7e6981ae
issue #655: box area and syntax fixed
DTI_PID/DTI_PID/TrainingImageListDialog.py | ||
---|---|---|
494 | 494 |
finally: |
495 | 495 |
self.deleteMidProcessFile() |
496 | 496 |
os.chdir(originPath) |
497 |
|
|
498 |
''' |
|
499 |
def isNoisable(self, char): |
|
500 |
''' |
|
501 |
@brief return True if char need noise reduce process |
|
502 |
@author euisung |
|
503 |
@date 2018.11.20 |
|
504 |
''' |
|
505 |
for passChar in noisePassList: |
|
506 |
if char == passChar: |
|
507 |
return False |
|
508 |
return True |
|
509 |
''' |
|
497 |
|
|
498 |
#def isNoisable(self, char): |
|
499 |
# ''' |
|
500 |
# @brief return True if char need noise reduce process |
|
501 |
# @author euisung |
|
502 |
# @date 2018.11.20 |
|
503 |
# ''' |
|
504 |
# for passChar in noisePassList: |
|
505 |
# if char == passChar: |
|
506 |
# return False |
|
507 |
# return True |
|
510 | 508 |
|
511 | 509 |
def makeChart(self): |
512 | 510 |
''' |
... | ... | |
599 | 597 |
docData = AppDocData.instance() |
600 | 598 |
oCRLang = docData.getCurrentProject().getName() if TOCR.existTrainedData() else 'eng' |
601 | 599 |
whiteCharList = docData.getConfigs('Text Recognition', 'White Character List') |
602 |
if len(whiteCharList.charList) is 0:
|
|
600 |
if len(whiteCharList) is 0: |
|
603 | 601 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF, lang=oCRLang) |
604 | 602 |
else: |
605 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF[:40] + whiteCharList.charList[0].value, lang=oCRLang)
|
|
603 |
boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF[:40] + whiteCharList[0].value, lang=oCRLang) |
|
606 | 604 |
|
607 | 605 |
except Exception as ex: |
608 | 606 |
print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
66 | 66 |
2018.10.19 euisung OCR lang change depend on project name |
67 | 67 |
2018.10.22 euisung system environment variable 'TESSDATA_PREFIX' change to ProgramData |
68 | 68 |
2018.11.08 euisung add config for OCR white char list |
69 |
2018.11.21 euisung fix area moving bug |
|
69 | 70 |
""" |
70 | 71 |
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = None): |
71 | 72 |
try: |
... | ... | |
122 | 123 |
|
123 | 124 |
for rect in merged_boxes: |
124 | 125 |
if not rect.isValid() or rect.left() < 0 or rect.top() < 0 or rect.right() > imgWidth or rect.bottom() > imgHeight: continue |
126 |
#left = rect.left() - 2 if rect.left() - 2 >= 0 else rect.left() |
|
127 |
#top = rect.top() - 2 if rect.top() - 2 >= 0 else rect.top() |
|
128 |
#width = rect.width() + 4 if rect.width() + 4 <= im.size[0] else rect.width() |
|
129 |
#height = rect.height() + 4 if rect.height() + 4 <= im.size[1] else rect.height() |
|
130 |
#text_rect = QRect(left, top, width, height) |
|
125 | 131 |
cropped = im.crop((rect.left(), imgHeight - rect.bottom(), rect.right(), imgHeight - rect.top())) |
126 | 132 |
text = pytesseract.image_to_string(cropped, config=conf, lang=oCRLang) |
127 | 133 |
|
128 | 134 |
if rect.height() >= minSize and rect.height() <= maxSize: |
129 |
left = rect.left() - 2 if rect.left() - 2 >= 0 else rect.left() |
|
130 |
top = imgHeight - rect.bottom() - 2 if imgHeight - rect.bottom() - 2 >= 0 else imgHeight - rect.bottom() |
|
131 |
width = rect.width() + 4 if rect.width() + 4 <= im.size[0] else rect.width() |
|
132 |
height = rect.height() + 4 if rect.height() + 4 <= im.size[1] else rect.height() |
|
133 |
text_rect = QRect(left, top, width, height) |
|
135 |
text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height()) |
|
134 | 136 |
if angle == 90 or angle == 270: |
135 | 137 |
transform = QTransform() |
136 | 138 |
transform.translate(imgHeight*0.5, imgWidth*0.5) |
내보내기 Unified diff