프로젝트

일반

사용자정보

개정판 0a40c9a5

ID0a40c9a5b60260de058dc016f0dfe08f728aea15
상위 5ef4861f
하위 dd359647

함의성이(가) 약 6년 전에 추가함

issue #655: ocr lang bug fix

차이점 보기:

DTI_PID/DTI_PID/OcrResultDialog.py
88 88
        @history 2018.04.26 Jeongwoo    Add Rectangle with modified Coords
89 89
                 2018.06.20 Jeongwoo    Remove test code
90 90
                 2018.11.08 euisung     add white char list check process on db
91
                 2018.11.22 euisung     OCR lang apply fixed
91 92
    '''
92 93
    def detectText(self):
93 94
        try:
......
140 141
            docData = AppDocData.instance()
141 142
            whiteCharList = docData.getConfigs('Text Recognition', 'White Character List')
142 143
            if len(whiteCharList) is 0:
143
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())))
144
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), language=docData.OCRData)
144 145
            else:
145
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), conf = whiteCharList[0].value)
146
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), language=docData.OCRData, conf = whiteCharList[0].value)
146 147

  
147 148
            #self.textInfoList = TOCR.getTextInfo(img, (int(self.boundingBox.x()), int(self.boundingBox.y())))
148 149
            if self.textInfoList is not None and len(self.textInfoList) > 0:
DTI_PID/DTI_PID/TextDetector.py
275 275
                if area.name == 'Unit':
276 276
                    img = imgSrc[round(area.y):round(area.y+area.height), round(area.x):round(area.x+area.width)]
277 277
                    if len(whiteCharList) is 0:
278
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng')
278
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData)
279 279
                    else:
280
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', conf = whiteCharList[0].value)
280
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData, conf = whiteCharList[0].value)
281 281
                    if texts is not None and len(texts) > 0:
282 282
                        appDocData.activeDrawing.setAttr('Unit', texts[0].getText())
283 283
                        self.otherTextInfoList.append([area.name, texts])
284 284
                else:
285 285
                    if area is not None and hasattr(area, 'img') and area.img is not None:
286 286
                        if len(whiteCharList) is 0:
287
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y), language='eng')
287
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y), language=appDocData.OCRData)
288 288
                        else:
289
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y), language='eng', conf=whiteCharList[0].value)
289
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y), language=appDocData.OCRData, conf=whiteCharList[0].value)
290 290
                        self.otherTextInfoList.append([area.name, texts])
291 291

  
292 292
            titleBlockProps = appDocData.getTitleBlockProperties()
......
295 295
                area.parse(titleBlockProp[2])
296 296
                img = imgSrc[round(area.y):round(area.y+area.height), round(area.x):round(area.x+area.width)]
297 297
                if len(whiteCharList) is 0:
298
                    texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng')
298
                    texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData)
299 299
                else:
300
                    texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language='eng', conf=whiteCharList[0].value)
300
                    texts = TOCR.getTextInfo(img, (area.x, area.y), 0, language=appDocData.OCRData, conf=whiteCharList[0].value)
301 301
                self.titleBlockTextInfoList.append([area.name, texts])
302 302

  
303 303
            if worker is not None: worker.updateProgress.emit(maxProgressValue, None)
DTI_PID/DTI_PID/TrainingImageListDialog.py
600 600
                if len(whiteCharList) is 0:
601 601
                    boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF, lang=oCRLang)
602 602
                else:
603
                    boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF[:40] + whiteCharList[0].value, lang=oCRLang)
603
                    boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF_COMM + whiteCharList[0].value, lang=oCRLang)
604 604

  
605 605
        except Exception as ex:
606 606
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
DTI_PID/DTI_PID/tesseract_ocr_module.py
39 39
DEFAULT_CONF = """
40 40
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"
41 41
"""
42
DEFAULT_CONF_COMM = "--psm 6 -c tessedit_char_whitelist="
42 43

  
43 44
def existTrainedData():
44 45
    '''
......
70 71
"""
71 72
def getTextInfo(img, startPoint, angle = 0, language='eng', flag = FLAG_IMAGE_TO_BOXES, conf = None):
72 73
    try:
74
        if img.shape is (0,0):
75
            return
73 76
        os.environ['TESSDATA_PREFIX'] = os.path.join(tesseract_path, 'tessdata')
74 77
        textInfoList = []
75 78

  
76 79
        if conf == None:
77 80
            conf = DEFAULT_CONF
78 81
        else:
79
            conf = '--psm 6 -c tessedit_char_whitelist=' + conf
82
            conf = DEFAULT_CONF_COMM + conf
80 83

  
81 84
        docData = AppDocData.instance()
82 85
        oCRLang = language

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)