프로젝트

일반

사용자정보

개정판 e92b19bf

IDe92b19bf7262fe69c45be437ddd21cb08341c73a
상위 51a72882
하위 0c282cc5

함의성이(가) 약 6년 전에 추가함

issue #655: white char

차이점 보기:

DTI_PID/DTI_PID/ConfigurationDialog.py
13 13
from AppDocData import Config
14 14
from AppDocData import Color
15 15
import Configuration_UI
16
import tesseract_ocr_module as TOCR
16 17

  
17 18
class ListView(QListView):
18 19
    def __init__(self, *args, **kwargs):
......
60 61
        self.ui.spinBoxShrinkSize.setValue(int(configs[0].value)) if 1 == len(configs) else self.ui.spinBoxShrinkSize.setValue(0)
61 62
        configs = docData.getConfigs('Text Recognition', 'Merge Size')
62 63
        self.ui.spinBoxMergeSize.setValue(int(configs[0].value)) if 1 == len(configs) else self.ui.spinBoxMergeSize.setValue(10)
64
        configs = docData.getConfigs('Text Recognition', 'White Character List')
65
        self.ui.lineEditWhiteCharList.setText(configs[0].value) if 1 == len(configs) else self.ui.lineEditWhiteCharList.setText(TOCR.DEFAULT_CONF[40:])
63 66

  
64 67
        configs = docData.getConfigs('Text Size', 'Min Text Size')
65 68
        self.ui.minTextSizeSpinBox.setValue(int(configs[0].value)) if 1 == len(configs) else self.ui.minTextSizeSpinBox.setValue(30)
......
473 476
            configs.append(Config('Note No Tag Rule', 'Note No Expression', self.ui.lineEditNoteNoExpression.text()))
474 477
            configs.append(Config('OPC Tag No Rule', 'Description', self.ui.lineEditDescription.text()))
475 478
            configs.append(Config('OPC Tag No Rule', 'OPC Tag', self.ui.lineEditOPCTag.text()))
479
            configs.append(Config('Text Recognition', 'White Character List', self.ui.lineEditWhiteCharList.text()))
476 480
            
477 481
            # Add Line Color Option - 2018.07.06 by kyouho
478 482
            rbRandomValue = self.ui.radioButtonRandom.isChecked()
DTI_PID/DTI_PID/MainWindow.py
257 257
            files = appDocData.getDrawingFileList()
258 258
            for file in files:
259 259
                drawing = [drawing for drawing in drawings if drawing[1] == file]
260
                if not drawing[0]:
260
                if 0 == len(drawing) or not drawing[0]:
261 261
                    drawings.append([None, file, None])
262 262

  
263
                item = QTreeWidgetItem(self.treeWidgetDrawingList.root, [file, drawing[0][2] if drawing[0] else ''])
263
                item = QTreeWidgetItem(self.treeWidgetDrawingList.root, [file, drawing[0][2] if drawing and drawing[0] else ''])
264 264
                item.setIcon(0, QIcon(':newPrefix/image.png'))
265 265
            
266 266
            self.treeWidgetDrawingList.root.setText(0, 'P&ID Drawings({})'.format(self.treeWidgetDrawingList.root.childCount()))
DTI_PID/DTI_PID/OcrResultDialog.py
85 85
        self.graphicsView.setImage(self.image)
86 86

  
87 87
    '''
88
        @history 2018.04.26 Jeongwoo Add Rectangle with modified Coords
89
                 2018.06.20 Jeongwoo Remove test code
88
        @history 2018.04.26 Jeongwoo    Add Rectangle with modified Coords
89
                 2018.06.20 Jeongwoo    Remove test code
90
                 2018.11.08 euisung     add white char list check process on db
90 91
    '''
91 92
    def detectText(self):
92 93
        try:
......
136 137
            #cv2.destroyAllWindows()
137 138

  
138 139
            # up to here
139
            self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())))
140
            docData = AppDocData.instance()
141
            whiteCharList = docData.getConfigs('Text Recognition', 'White Character List')
142
            if len(whiteCharList) is 0:
143
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())))
144
            else:
145
                self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), conf = whiteCharList[0].value)
140 146

  
141 147
            #self.textInfoList = TOCR.getTextInfo(img, (int(self.boundingBox.x()), int(self.boundingBox.y())))
142 148
            if self.textInfoList is not None and len(self.textInfoList) > 0:
DTI_PID/DTI_PID/TextDetector.py
163 163
        @author     humkyung
164 164
        @date       2018.07.24
165 165
        @history    change parameter updateProgressSignal to worker
166
                    2018.11.08 euisung     add white char list check process on db
166 167
    '''
167 168
    @staticmethod
168 169
    def recognizeTextFromImage(tInfo, imgOCR, offset, searchedSymbolList, worker, listWidget, maxProgressValue):
......
185 186
                if 'Instrumentation' == category: tInfo.setAngle(0)
186 187
            # up to here
187 188

  
188
            resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle())
189
            whiteCharList = appDocData.getConfigs('Text Recognition', 'White Character List')
190
            if len(whiteCharList) is 0:
191
                resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle())
192
            else:
193
                resultTextInfo = TOCR.getTextInfo(img, (x, y), tInfo.getAngle(), conf = whiteCharList[0].value)
194

  
189 195
            if resultTextInfo is not None and len(resultTextInfo) > 0:
190 196
                for result in resultTextInfo:
191 197
                    result.setX(result.getX() + round(offset[0]))
......
243 249
            if onlyTextArea:
244 250
                return
245 251
            # parse texts in area except Drawing area
252
            whiteCharList = appDocData.getConfigs('Text Recognition', 'White Character List')
246 253
            for area in appDocData.getAreaList():
247 254
                if area.name == 'Drawing': continue
248 255

  
249 256
                if area.name == 'Unit':
250 257
                    img = imgSrc[round(area.y):round(area.y+area.height), round(area.x):round(area.x+area.width)]
251
                    texts = TOCR.getTextInfo(img, (area.x, area.y), 0)
258
                    if len(whiteCharList) is 0:
259
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0)
260
                    else:
261
                        texts = TOCR.getTextInfo(img, (area.x, area.y), 0, conf = whiteCharList[0].value)
252 262
                    if texts is not None and len(texts) > 0:
253 263
                        appDocData.activeDrawing.setAttr('Unit', texts[0].getText())
254 264
                        self.otherTextInfoList.append([area.name, texts])
255 265
                else:
256 266
                    if area is not None and hasattr(area, 'img') and area.img is not None:
257
                        texts = TOCR.getTextInfo(area.img, (area.x, area.y))
267
                        if len(whiteCharList) is 0:
268
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y))
269
                        else:
270
                            texts = TOCR.getTextInfo(area.img, (area.x, area.y), conf = whiteCharList[0].value)
258 271
                        self.otherTextInfoList.append([area.name, texts])
259 272

  
260 273
            if worker is not None: worker.updateProgress.emit(maxProgressValue, None)
......
292 305
    '''
293 306
    def recognizeTextInArea(self, imgSrc, area, angle=0):
294 307
        try:
308
            appDocData = AppDocData.instance()
309
            whiteCharList = appDocData.getConfigs('Text Recognition', 'White Character List')
295 310
            img = imgSrc[round(area[1]):round(area[1] + area[3]), round(area[0]):round(area[0] + area[2])]
296
            return TOCR.getTextInfo(img, (area[0], area[1]))
311
            if len(whiteCharList) is 0:
312
                return TOCR.getTextInfo(img, (area[0], area[1]))
313
            else:
314
                return TOCR.getTextInfo(img, (area[0], area[1]), conf = whiteCharList[0].value)
297 315
        except Exception as ex:
298 316
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
299 317

  
DTI_PID/DTI_PID/TrainingImageListDialog.py
11 11
import pytesseract
12 12
import TrainingImageList_UI
13 13
from TrainingEditorDialog import QTrainingEditorDialog
14
import tesseract_ocr_module as TOCR
14 15

  
15 16
dataPath = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID')
16 17
tesseractPath = os.path.join(dataPath, 'Tesseract-OCR', 'tessdata')
......
24 25
cntraining_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'cntraining.exe')
25 26
combine_tessdata_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'combine_tessdata.exe')
26 27

  
27
DEFAULT_CONF = """
28
    --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"\\'
29
"""
30

  
31 28
class QTrainingImageListDialog(QDialog):
32 29
    trainingDataNumber = 0
33 30

  
......
438 435
            trainingBoxPath = os.path.join(project.getTrainingFilePath(), boxName)
439 436
            boundaryOcrData = None
440 437
            if not isBoxFile:
441
                boundaryOcrData = pytesseract.image_to_boxes(drawing, config=DEFAULT_CONF, lang='seed+eng')
438
                docData = AppDocData.instance()
439
                oCRLang = docData.getCurrentProject().getName() if TOCR.isTrainedData() else 'eng'
440
                whiteCharList = docData.getConfigs('Text Recognition', 'White Character List')
441
                if len(whiteCharList) is 0:
442
                    boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF, lang=oCRLang)
443
                else:
444
                    boundaryOcrData = pytesseract.image_to_boxes(drawing, config=TOCR.DEFAULT_CONF[:40] + whiteCharList[0].value, lang=oCRLang)
442 445

  
443 446
        except Exception as ex:
444 447
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
DTI_PID/DTI_PID/tesseract_ocr_module.py
29 29
## Tesseract path
30 30
'''
31 31
    @history    Jeongwoo 2018.06.14 Tesseract path changed
32
                humkyung 2018.08.13 set tesseract executable path to relative of this file path 
32
                humkyung 2018.08.13 set tesseract executable path to relative of this file path
33
                euisung             set tesseract executable path to ProgramData
33 34
'''
34 35
#pytesseract.pytesseract.tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
35 36
pytesseract.pytesseract.tesseract_cmd = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', 'tesseract.exe')
36 37

  
38
tesseract_path = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR')
37 39
DEFAULT_CONF = """
38 40
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"
39 41
"""
40 42

  
43
def isTrainedData():
44
    '''
45
        @brief      check trained data is exist
46
        @author     euisung
47
        @date       2018.11.08
48
    '''
49
    docData = AppDocData.instance()
50
    prj_trained_data = os.path.join(tesseract_path, 'tessdata', docData.getCurrentProject().getName()+'.traineddata')
51
    if os.path.isfile(prj_trained_data):
52
        return True
53
    else:
54
        return False
55

  
41 56
"""
42 57
    @history    2018.04.26  Jeongwoo    Make TextInfo object with Calculated Coords (with BoundBox Coords)
43 58
                2018.04.30  Jeongwoo    Add QRect.setHeight() in if-statement [(lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect)]
......
49 64
                2018.06.20  Jeongwoo    Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
50 65
                humkyung 2018.10.12 change logic to extract text which first get bounding box and then extract character
51 66
                2018.10.19  euisung     OCR lang change depend on project name 
52
                2018.10.22  euisung     system environment variable 'TESSDATA_PREFIX' change depend on project path
67
                2018.10.22  euisung     system environment variable 'TESSDATA_PREFIX' change to ProgramData
68
                2018.11.08  euisung     add config for OCR white char list
53 69
"""
54
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
70
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = None):
55 71
    try:
56
        tesseract_path = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR')
57 72
        os.environ['TESSDATA_PREFIX'] = tesseract_path
58 73
        textInfoList = []
59 74

  
75
        if conf == None:
76
            conf = DEFAULT_CONF
77
        else:
78
            conf = DEFAULT_CONF[:40] + conf
79

  
60 80
        docData = AppDocData.instance()
61
        prj_trained_data = os.path.join(tesseract_path, 'tessdata', docData.getCurrentProject().getName()+'.traineddata')
62
        oCRLang = docData.getCurrentProject().getName() if os.path.isfile(prj_trained_data) else 'eng'
63
        #oCRLang = 'eng'
81
        oCRLang = docData.getCurrentProject().getName() if isTrainedData() else 'eng'
82

  
64 83
        configs = docData.getConfigs('Text Size', 'Min Text Size')
65 84
        minSize = int(configs[0].value) if 1 == len(configs) else 30
66 85
        configs = docData.getConfigs('Text Size', 'Max Text Size')

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)