개정판 594dfaa7
build issue #655: cmd 15
DTI_PID/DTI_PID/TrainingImageListDialog.py | ||
---|---|---|
16 | 16 |
set_unicharset_properties_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'set_unicharset_properties.exe') |
17 | 17 |
#langDataPath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'set_unicharset_properties.exe') |
18 | 18 |
shapeclustering_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'shapeclustering.exe') |
19 |
mftraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'mftraining.exe ')
|
|
20 |
cntraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'cntraining.exe ')
|
|
21 |
combine_tessdata_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'combine_tessdata.exe ')
|
|
19 |
mftraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'mftraining.exe') |
|
20 |
cntraining_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'cntraining.exe') |
|
21 |
combine_tessdata_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'combine_tessdata.exe') |
|
22 | 22 |
|
23 | 23 |
DEFAULT_CONF = """ |
24 | 24 |
--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"\\' |
... | ... | |
277 | 277 |
print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
278 | 278 |
|
279 | 279 |
try: |
280 |
# 실제 적용 데이터 생성 3단계진행
|
|
280 |
# 1111111111111111111111111111111
|
|
281 | 281 |
#trainCmd = tesseract_cmd + ' ' + trainingImgPath + ' ' + trainingBoxPath.replace('.box', '') + ' nobatch box.train' + ' >> output.log' |
282 |
#trainCmd = '\"' + tesseract_cmd + ' ' + trainingImgPath + ' ' + trainingBoxPath.replace('.box', '') + ' nobatch box.train &timeout 15' + '\"'
|
|
283 |
trainCmd = '\"' + tesseract_cmd + '\"' + ' \"' + trainingImgPath + '\" \"' + trainingBoxPath.replace('.box', '') + '\" nobatch box.train' + ' &timeout 15' |
|
282 |
trainCmd = '\"' + tesseract_cmd + '\" ' + trainingImgPath + ' ' + trainingBoxPath.replace('.box', '') + ' nobatch box.train &timeout 15'
|
|
283 |
#trainCmd = '\"' + tesseract_cmd + '\"' + ' \"' + trainingImgPath + '\" \"' + trainingBoxPath.replace('.box', '') + '\" nobatch box.train' + ' &timeout 15'
|
|
284 | 284 |
print(trainCmd) |
285 | 285 |
#subprocess.call(trainCmd, shell = True) |
286 | 286 |
os.system(trainCmd) |
287 | 287 |
|
288 |
# 222222222222222222222222222222 |
|
288 | 289 |
#unicharsetExtractorCmd = unicharset_extractor_cmd + ' ' + trainingBoxPath + " >> output.log" |
289 |
unicharsetExtractorCmd = '\"' + unicharset_extractor_cmd + '\"' + ' \"' + trainingBoxPath + '\" &timeout 15' |
|
290 |
unicharsetExtractorCmd = '\"' + unicharset_extractor_cmd + '\"' + ' ' + trainingBoxPath + ' &timeout 15' |
|
291 |
#unicharsetExtractorCmd = '\"' + unicharset_extractor_cmd + '\"' + ' \"' + trainingBoxPath + '\" &timeout 15' |
|
290 | 292 |
print(unicharsetExtractorCmd) |
291 | 293 |
##subprocess.call(unicharsetExtractorCmd, shell = True) |
292 | 294 |
os.system(unicharsetExtractorCmd) |
293 | 295 |
#QMessageBox.question(self, 'tesseract', trainCmd, QMessageBox.Yes, QMessageBox.Cancel) |
294 | 296 |
|
297 |
# 3333333333333333333333333333333 |
|
295 | 298 |
inputUnicharset = os.path.join(runningPath, 'unicharset') |
296 | 299 |
#outputUnicharset = os.path.join(project.getTrainingFilePath(), 'unicharset') |
297 |
scriptPath = '\" --script_dir=//langdata-master\"' |
|
298 |
setUnicharsetPropertiesCmd = '\"' + set_unicharset_properties_cmd + '\" -U \"' + inputUnicharset + '\" -O \"' + inputUnicharset + '\"' + scriptPath + ' &timeout 15' |
|
300 |
scriptPath = ' --script_dir=//langdata-master' |
|
301 |
setUnicharsetPropertiesCmd = '\"' + set_unicharset_properties_cmd + '\" -U ' + inputUnicharset + ' -O ' + inputUnicharset + scriptPath + ' &timeout 15' |
|
302 |
#setUnicharsetPropertiesCmd = '\"' + set_unicharset_properties_cmd + '\" -U \"' + inputUnicharset + '\" -O \"' + inputUnicharset + '\"' + scriptPath + ' &timeout 15' |
|
299 | 303 |
print(setUnicharsetPropertiesCmd) |
300 | 304 |
##subprocess.call(setUnicharsetPropertiesCmd, shell = True) |
301 | 305 |
os.system(setUnicharsetPropertiesCmd) |
302 | 306 |
|
307 |
# 44444444444444444444444444444444 |
|
303 | 308 |
fontProperty = os.path.join(project.getTrainingFilePath(), 'font_properties') |
304 | 309 |
fw = open(fontProperty, 'w', encoding='utf8') |
305 | 310 |
fw.write('seed 0 0 0 0 0') |
306 | 311 |
fw.close() |
307 | 312 |
|
313 |
# 5555555555555555555555555555555 |
|
308 | 314 |
trPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.tr') |
309 |
shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F \"' + fontProperty + '\" -U \"' + inputUnicharset + '\" \"' + trPath + '\" &timeout 15' |
|
315 |
shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F ' + fontProperty + ' -U ' + inputUnicharset + ' ' + trPath + ' &timeout 15' |
|
316 |
#shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F \"' + fontProperty + '\" -U \"' + inputUnicharset + '\" \"' + trPath + '\" &timeout 15' |
|
310 | 317 |
##subprocess.call(shapeclusteringCmd, shell = True) |
311 | 318 |
print(shapeclusteringCmd) |
312 | 319 |
os.system(shapeclusteringCmd) |
313 | 320 |
|
321 |
# 66666666666666666666666666666 |
|
314 | 322 |
#outputLangUnicharset = os.path.join(project.getTrainingFilePath(), 'seed.unicharset') |
315 |
mftrainingCmd = '\"' + mftraining_cmd + '\" -F \"' + fontProperty + '\" -U \"' + inputUnicharset + '\" -O \"' + inputUnicharset + '\" \"' + trPath + '\" &timeout 15' |
|
323 |
mftrainingCmd = '\"' + mftraining_cmd + '\" -F ' + fontProperty + ' -U ' + inputUnicharset + ' -O ' + inputUnicharset + ' ' + trPath + ' &timeout 15' |
|
324 |
#mftrainingCmd = '\"' + mftraining_cmd + '\" -F \"' + fontProperty + '\" -U \"' + inputUnicharset + '\" -O \"' + inputUnicharset + '\" \"' + trPath + '\" &timeout 15' |
|
316 | 325 |
##subprocess.call(mftrainingCmd, shell = True) |
317 | 326 |
print(mftrainingCmd) |
318 | 327 |
os.system(mftrainingCmd) |
319 | 328 |
|
320 |
cntrainingCmd = '\"' + cntraining_cmd + '\" \"' + trPath + '\" &timeout 15' |
|
329 |
# 77777777777777777777777777777 |
|
330 |
cntrainingCmd = '\"' + cntraining_cmd + '\" ' + trPath + ' &timeout 15' |
|
331 |
#cntrainingCmd = '\"' + cntraining_cmd + '\" \"' + trPath + '\" &timeout 15' |
|
321 | 332 |
##subprocess.call(cntrainingCmd, shell = True) |
322 | 333 |
print(cntrainingCmd) |
323 | 334 |
os.system(cntrainingCmd) |
... | ... | |
338 | 349 |
os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, oCRLang + '.pffmtable')) |
339 | 350 |
os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, oCRLang + '.shapetable')) |
340 | 351 |
os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, oCRLang + '.unicharset')) |
352 |
# 88888888888888888888888888 |
|
341 | 353 |
combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" ' + oCRLang + '.' |
342 | 354 |
subprocess.call(combineTessdataCmd, shell = True) |
343 | 355 |
|
내보내기 Unified diff