프로젝트

일반

사용자정보

개정판 aec5980d

IDaec5980d5511e636cfe332e4ca1a714b589d3925
상위 567c7c1c
하위 d38e9e65

함의성이(가) 6년 이상 전에 추가함

build issue #655: tesseract moved programdata

차이점 보기:

DTI_PID/DTI_PID/TrainingImageListDialog.py
12 12
import TrainingImageList_UI
13 13
from TrainingEditorDialog import QTrainingEditorDialog
14 14

  
15
runningPath = os.getcwd()
16
###tesseractPath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tessdata')
17
tesseractPath = os.path.join('C:\\ProgramData\\Digital PID', 'Tesseract-OCR', 'tessdata')###
18
###pytesseract.pytesseract.tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
19
pytesseract.pytesseract.tesseract_cmd = os.path.join('C:\\ProgramData\\Digital PID', 'Tesseract-OCR', 'tesseract.exe')###
20
###tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
21
tesseract_cmd = os.path.join('C:\\ProgramData\\Digital PID', 'Tesseract-OCR', 'tesseract.exe')###
22
###unicharset_extractor_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'unicharset_extractor.exe')
23
unicharset_extractor_cmd = os.path.join('C:\\ProgramData\\Digital PID', 'Tesseract-OCR', 'unicharset_extractor.exe')###
24
set_unicharset_properties_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'set_unicharset_properties.exe')
15
dataPath = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID')
16
tesseractPath = os.path.join(dataPath, 'Tesseract-OCR', 'tessdata')
17
pytesseract.pytesseract.tesseract_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'tesseract.exe')
18
tesseract_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'tesseract.exe')
19
unicharset_extractor_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'unicharset_extractor.exe')
20
set_unicharset_properties_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'set_unicharset_properties.exe')
25 21
#langDataPath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'set_unicharset_properties.exe')
26
shapeclustering_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'shapeclustering.exe')
27
mftraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'mftraining.exe')
28
cntraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'cntraining.exe')
29
combine_tessdata_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'combine_tessdata.exe')
22
shapeclustering_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'shapeclustering.exe')
23
mftraining_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'mftraining.exe')
24
cntraining_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'cntraining.exe')
25
combine_tessdata_cmd = os.path.join(dataPath, 'Tesseract-OCR', 'combine_tessdata.exe')
30 26

  
31 27
DEFAULT_CONF = """
32 28
    --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"\\'
......
171 167
        from PIL import Image
172 168
        import math
173 169
        try:
174
            ###os.environ['TESSDATA_PREFIX'] = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR')
175
            os.environ['TESSDATA_PREFIX'] = os.path.join('C:\ProgramData\Digital PID', 'Tesseract-OCR')###
170
            os.environ['TESSDATA_PREFIX'] = os.path.join(dataPath, 'Tesseract-OCR')
176 171
            appDocData = AppDocData.instance()
177 172
            project = appDocData.getCurrentProject()
178 173
            self.oCRLang = appDocData.getCurrentProject().getName()
......
294 289
            return None
295 290

  
296 291
        try:
292
            originPath = os.getcwd()
293
            os.chdir(dataPath)
297 294
            # 1
298 295
            trainingImgPath = self.get_short_path_name(trainingImgPath)
299
            trainingBoxPathT = self.get_short_path_name(trainingBoxPath.replace('.box', ''))
296
            trainingBoxPathT = trainingImgPath.replace('.tif', '')
300 297
            trainCmd = '\"' + tesseract_cmd + '\" ' + trainingImgPath + ' ' + trainingBoxPathT + ' nobatch box.train'# &timeout 15'
301 298
            subprocess.call(trainCmd, shell = True)
302 299
        
......
304 301
            trainingBoxPathU = self.get_short_path_name(trainingBoxPath)
305 302
            unicharsetExtractorCmd = '\"' + unicharset_extractor_cmd + '\"' + ' ' + trainingBoxPathU# + ' &timeout 15'
306 303
            subprocess.call(unicharsetExtractorCmd, shell = True)
307
            '''
304
            
308 305
            # 3
309
            inputUnicharset = os.path.join(runningPath, 'unicharset')
306
            inputUnicharset = os.path.join(dataPath, 'unicharset')
310 307
            inputUnicharset = self.get_short_path_name(inputUnicharset)
311 308
            scriptPath = ' --script_dir=//langdata-master'
312 309
            setUnicharsetPropertiesCmd = '\"' + set_unicharset_properties_cmd + '\" -U ' + inputUnicharset + ' -O ' + inputUnicharset + scriptPath# + ' &timeout 15'
......
319 316
            fw.close()
320 317

  
321 318
            # 5
322
            trPath = os.path.join(project.getTrainingFilePath(), self.oCRLang + '.' + self.oCRLang + 'F.exp0.tr')
323
            trPath = self.get_short_path_name(trPath)
319
            trPath = trainingImgPath.replace('.tif', '.tr')
324 320
            fontProperty = self.get_short_path_name(fontProperty)
325 321
            shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F ' + fontProperty + ' -U ' + inputUnicharset + ' ' + trPath# + ' &timeout 15'
326 322
            subprocess.call(shapeclusteringCmd, shell = True)
......
335 331

  
336 332
            self.deleteMidProcessFile()
337 333

  
338
            os.rename(os.path.join(runningPath, 'inttemp'), os.path.join(runningPath, self.oCRLang + '.inttemp'))
339
            os.rename(os.path.join(runningPath, 'normproto'), os.path.join(runningPath, self.oCRLang + '.normproto'))
340
            os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, self.oCRLang + '.pffmtable'))
341
            os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, self.oCRLang + '.shapetable'))
342
            os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, self.oCRLang + '.unicharset'))
334
            os.rename(os.path.join(dataPath, 'inttemp'), os.path.join(dataPath, self.oCRLang + '.inttemp'))
335
            os.rename(os.path.join(dataPath, 'normproto'), os.path.join(dataPath, self.oCRLang + '.normproto'))
336
            os.rename(os.path.join(dataPath, 'pffmtable'), os.path.join(dataPath, self.oCRLang + '.pffmtable'))
337
            os.rename(os.path.join(dataPath, 'shapetable'), os.path.join(dataPath, self.oCRLang + '.shapetable'))
338
            os.rename(os.path.join(dataPath, 'unicharset'), os.path.join(dataPath, self.oCRLang + '.unicharset'))
343 339
            # 8
344 340
            combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" ' + self.oCRLang + '.'
345 341
            subprocess.call(combineTessdataCmd, shell = True)
346 342

  
347 343
            if os.path.isfile(os.path.join(tesseractPath, self.oCRLang + '.traineddata')):
348 344
                os.remove(os.path.join(tesseractPath, self.oCRLang + '.traineddata'))
349
            os.rename(os.path.join(runningPath, self.oCRLang + '.traineddata'), os.path.join(tesseractPath, self.oCRLang + '.traineddata'))
345
            os.rename(os.path.join(dataPath, self.oCRLang + '.traineddata'), os.path.join(tesseractPath, self.oCRLang + '.traineddata'))
350 346

  
351 347
            self.deleteMidProcessFile()
352
            '''
348
            
353 349
        except Exception as ex:
354 350
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
355 351
            from App import App
......
359 355
            App.mainWnd().addMessage.emit(MessageType.Error, message)
360 356
        finally:
361 357
            self.deleteMidProcessFile()
358
            os.chdir(originPath)
362 359

  
363 360
    '''
364 361
        @brief      delete Mid Process File
......
367 364
    '''
368 365
    def deleteMidProcessFile(self):
369 366
        try:
370
            if os.path.isfile(os.path.join(runningPath, self.oCRLang + '.inttemp')):
371
                os.remove(os.path.join(runningPath, self.oCRLang + '.inttemp'))
372
            if os.path.isfile(os.path.join(runningPath, self.oCRLang + '.normproto')):
373
                os.remove(os.path.join(runningPath, self.oCRLang + '.normproto'))
374
            if os.path.isfile(os.path.join(runningPath, self.oCRLang + '.pffmtable')):
375
                os.remove(os.path.join(runningPath, self.oCRLang + '.pffmtable'))
376
            if os.path.isfile(os.path.join(runningPath, self.oCRLang + '.shapetable')):
377
                os.remove(os.path.join(runningPath, self.oCRLang + '.shapetable'))
378
            if os.path.isfile(os.path.join(runningPath, self.oCRLang + '.unicharset')):
379
                os.remove(os.path.join(runningPath, self.oCRLang + '.unicharset'))        
367
            if os.path.isfile(os.path.join(dataPath, self.oCRLang + '.inttemp')):
368
                os.remove(os.path.join(dataPath, self.oCRLang + '.inttemp'))
369
            if os.path.isfile(os.path.join(dataPath, self.oCRLang + '.normproto')):
370
                os.remove(os.path.join(dataPath, self.oCRLang + '.normproto'))
371
            if os.path.isfile(os.path.join(dataPath, self.oCRLang + '.pffmtable')):
372
                os.remove(os.path.join(dataPath, self.oCRLang + '.pffmtable'))
373
            if os.path.isfile(os.path.join(dataPath, self.oCRLang + '.shapetable')):
374
                os.remove(os.path.join(dataPath, self.oCRLang + '.shapetable'))
375
            if os.path.isfile(os.path.join(dataPath, self.oCRLang + '.unicharset')):
376
                os.remove(os.path.join(dataPath, self.oCRLang + '.unicharset'))        
380 377
        except Exception as ex:
381 378
            print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
382 379
            from App import App
DTI_PID/DTI_PID/tesseract_ocr_module.py
32 32
                humkyung 2018.08.13 set tesseract executable path to relative of this file path 
33 33
'''
34 34
#pytesseract.pytesseract.tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
35
pytesseract.pytesseract.tesseract_cmd = os.path.join('C:\ProgramData\Digital PID', 'Tesseract-OCR', 'tesseract.exe')
36
#tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
35
pytesseract.pytesseract.tesseract_cmd = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', 'tesseract.exe')
37 36

  
38 37
DEFAULT_CONF = """
39 38
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"
......
189 188
"""
190 189
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
191 190
    try:
192
        ###os.environ['TESSDATA_PREFIX'] = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR')
193
        os.environ['TESSDATA_PREFIX'] = os.path.join('C:\ProgramData\Digital PID', 'Tesseract-OCR')###
191
        os.environ['TESSDATA_PREFIX'] = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR')
194 192
        textInfoList = []
195 193

  
196 194
        docData = AppDocData.instance()

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)