프로젝트

일반

사용자정보

개정판 69908833

ID69908833eb0443d46a42e31ce2e4295cab8770a3
상위 e1c48a1b
하위 053e0f35

함의성이(가) 6년 이상 전에 추가함

issue #655: OCR lang changed depend on project name and delete mid process file

차이점 보기:

DTI_PID/DTI_PID/TrainingImageListDialog.py
38 38
        self.ui.tableWidgetList.setColumnCount(3)
39 39

  
40 40
        ## column header 명 설정
41
        #headerLabel = docData.getCurrentProject().getName()
42 41
        self.ui.tableWidgetList.setHorizontalHeaderLabels(['No.', '이미지 목록', '박스 작업 상태'])
43 42
        self.ui.tableWidgetList.horizontalHeaderItem(1).setToolTip('도면 이름') # header tooltip
44 43
        self.ui.tableWidgetList.horizontalHeaderItem(2).setToolTip('작업 상태') # header tooltip
......
166 165
        try:
167 166
            appDocData = AppDocData.instance()
168 167
            project = appDocData.getCurrentProject()
168
            oCRLang = appDocData.getCurrentProject().getName()
169 169
            dataList = appDocData.getTrainingFileList()
170 170
            listHasBox = []
171 171
            listHasBoxImage = []
......
266 266
                        outBox += boxComponent[0] + " " + str(boxComponent[1]) + " " + str(boxComponent[2]) + " " + str(boxComponent[3]) + " " + str(boxComponent[4]) + ' 0\n'
267 267
                    currentX = areaW
268 268

  
269
            trainingImgPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.tif')
270
            trainingBoxPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.box')
269
            trainingImgPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.tif')
270
            trainingBoxPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.box')
271 271
            trainingTextImg.save(trainingImgPath, compression='tiff_lzw')
272 272
            fw = open(trainingBoxPath, 'w', encoding='utf8')
273 273
            fw.write(outBox)
......
303 303
        fw.write('seed 0 0 0 0 0')
304 304
        fw.close()
305 305

  
306
        trPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.tr')
306
        trPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.tr')
307 307
        shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F ' + fontProperty + ' -U ' + inputUnicharset + ' ' + trPath
308 308
        subprocess.call(shapeclusteringCmd, shell = True)
309 309

  
......
314 314
        cntrainingCmd = '\"' + cntraining_cmd + '\" ' + trPath
315 315
        subprocess.call(cntrainingCmd, shell = True)
316 316

  
317
        if os.path.isfile(os.path.join(runningPath, 'seed.inttemp')):
318
            os.remove(os.path.join(runningPath, 'seed.inttemp'))
319
        if os.path.isfile(os.path.join(runningPath, 'seed.normproto')):
320
            os.remove(os.path.join(runningPath, 'seed.normproto'))
321
        if os.path.isfile(os.path.join(runningPath, 'seed.pffmtable')):
322
            os.remove(os.path.join(runningPath, 'seed.pffmtable'))
323
        if os.path.isfile(os.path.join(runningPath, 'seed.shapetable')):
324
            os.remove(os.path.join(runningPath, 'seed.shapetable'))
325
        if os.path.isfile(os.path.join(runningPath, 'seed.unicharset')):
326
            os.remove(os.path.join(runningPath, 'seed.unicharset'))
327

  
328
        os.rename(os.path.join(runningPath, 'inttemp'), os.path.join(runningPath, 'seed.inttemp'))
329
        os.rename(os.path.join(runningPath, 'normproto'), os.path.join(runningPath, 'seed.normproto'))
330
        os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, 'seed.pffmtable'))
331
        os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, 'seed.shapetable'))
332
        os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, 'seed.unicharset'))
333
        combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" seed.'
317
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.inttemp')):
318
            os.remove(os.path.join(runningPath, oCRLang + '.inttemp'))
319
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.normproto')):
320
            os.remove(os.path.join(runningPath, oCRLang + '.normproto'))
321
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.pffmtable')):
322
            os.remove(os.path.join(runningPath, oCRLang + '.pffmtable'))
323
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.shapetable')):
324
            os.remove(os.path.join(runningPath, oCRLang + '.shapetable'))
325
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.unicharset')):
326
            os.remove(os.path.join(runningPath, oCRLang + '.unicharset'))
327

  
328
        os.rename(os.path.join(runningPath, 'inttemp'), os.path.join(runningPath, oCRLang + '.inttemp'))
329
        os.rename(os.path.join(runningPath, 'normproto'), os.path.join(runningPath, oCRLang + '.normproto'))
330
        os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, oCRLang + '.pffmtable'))
331
        os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, oCRLang + '.shapetable'))
332
        os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, oCRLang + '.unicharset'))
333
        combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" ' + oCRLang + '.'
334 334
        subprocess.call(combineTessdataCmd, shell = True)
335 335

  
336
        if os.path.isfile(os.path.join(tesseractPath, 'seed.traineddata')):
337
            os.remove(os.path.join(tesseractPath, 'seed.traineddata'))
338
        os.rename(os.path.join(runningPath, 'seed.traineddata'), os.path.join(tesseractPath, 'seed.traineddata'))
336
        if os.path.isfile(os.path.join(tesseractPath, oCRLang + '.traineddata')):
337
            os.remove(os.path.join(tesseractPath, oCRLang + '.traineddata'))
338
        os.rename(os.path.join(runningPath, oCRLang + '.traineddata'), os.path.join(tesseractPath, oCRLang + '.traineddata'))
339

  
340
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.inttemp')):
341
            os.remove(os.path.join(runningPath, oCRLang + '.inttemp'))
342
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.normproto')):
343
            os.remove(os.path.join(runningPath, oCRLang + '.normproto'))
344
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.pffmtable')):
345
            os.remove(os.path.join(runningPath, oCRLang + '.pffmtable'))
346
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.shapetable')):
347
            os.remove(os.path.join(runningPath, oCRLang + '.shapetable'))
348
        if os.path.isfile(os.path.join(runningPath, oCRLang + '.unicharset')):
349
            os.remove(os.path.join(runningPath, oCRLang + '.unicharset'))
339 350

  
340 351
        #except Exception as ex:
341 352
            #print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno))
DTI_PID/DTI_PID/tesseract_ocr_module.py
35 35
tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe')
36 36

  
37 37
DEFAULT_CONF = """
38
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"\\'
38
    --psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"
39 39
"""
40 40

  
41 41
'''
......
183 183
                2018.06.14  Jeongwoo    Add try-except. If exception occured, return None
184 184
                2018.06.20  Jeongwoo    Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
185 185
                humkyung 2018.10.12 change logic to extract text which first get bounding box and then extract character
186
                2018.10.19  euisung     OCR lang change depend on project name 
186 187
"""
187 188
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
188 189
    #if form is not None:
......
193 194
        textInfoList = []
194 195

  
195 196
        docData = AppDocData.instance()
197
        oCRLang = 'eng+' + docData.getCurrentProject().getName()
196 198
        configs = docData.getConfigs('Text Size', 'Min Text Size')
197 199
        minSize = int(configs[0].value) if 1 == len(configs) else 30
198 200
        configs = docData.getConfigs('Text Size', 'Max Text Size')
......
202 204
        im = im.rotate(-angle, expand=True)
203 205
        imgWidth = im.width
204 206
        imgHeight = im.height
205
        boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng+seed')
207
        boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang=oCRLang)
206 208
        bounding_boxes = boundaryOcrData.split('\n')
207 209
        merged_boxes = []
208 210
        for box in bounding_boxes:
......
235 237
        
236 238
        for rect in merged_boxes:
237 239
            cropped = im.crop((rect.left(), imgHeight - rect.bottom(), rect.right(), imgHeight - rect.top()))
238
            text = pytesseract.image_to_string(cropped, config=conf, lang='eng+seed')
240
            text = pytesseract.image_to_string(cropped, config=conf, lang=oCRLang)
239 241

  
240 242
            if rect.height() >= minSize and rect.height() <= maxSize:
241 243
                text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height())
seed.normproto
1
4
2
linear   essential      -0.250000   0.750000
3
linear   non-essential   0.000000   1.000000
4
linear   essential       0.000000   1.000000
5
linear   essential       0.000000   1.000000
6

  
7
1 1
8
significant   elliptical     8
9
	  0.261230  0.122217  0.153809  0.039063
10
	  0.000400  0.000400  0.000400  0.000400
11

  
12
/ 1
13
significant   elliptical     6
14
	  0.225260  0.162435  0.211589  0.102865
15
	  0.000400  0.000400  0.000400  0.000400
16

  
17
2 1
18
significant   elliptical    13
19
	  0.240685  0.219261  0.176983  0.089543
20
	  0.000403  0.000499  0.000400  0.000400
21

  
22
' 1
23
significant   elliptical     1
24
	  0.484375  0.093750  0.058594  0.062500
25
	  0.000400  0.000400  0.000400  0.000400
26

  
27
F 1
28
significant   elliptical     6
29
	  0.314453  0.193490  0.154297  0.108073
30
	  0.000400  0.000400  0.000400  0.000400
31

  
32
T 1
33
significant   elliptical     6
34
	  0.296224  0.141016  0.165365  0.057292
35
	  0.000400  0.000400  0.000400  0.000400
36

  
37
, 1
38
significant   elliptical     1
39
	  0.531250  0.079297  0.082031  0.035156
40
	  0.000400  0.000400  0.000400  0.000400
41

  
42
" 1
43
significant   elliptical     8
44
	  0.490723  0.098291  0.059570  0.062988
45
	  0.000400  0.000400  0.000400  0.000400
46

  
47
V 1
48
significant   elliptical     2
49
	  0.277344  0.195117  0.142578  0.093750
50
	  0.000400  0.000400  0.000400  0.000400
51

  
52
B 1
53
significant   elliptical     2
54
	  0.251953  0.280273  0.160156  0.105469
55
	  0.000400  0.000400  0.000400  0.000400
56

  
57
- 1
58
significant   elliptical     4
59
	  0.244141  0.099414  0.025391  0.139648
60
	  0.000860  0.000400  0.000400  0.000462
61

  
62
8 1
63
significant   elliptical     2
64
	  0.248047  0.261328  0.158203  0.101563
65
	  0.000400  0.000400  0.000400  0.000400
66

  
67
0 1
68
significant   elliptical     3
69
	  0.273438  0.264323  0.174479  0.117188
70
	  0.001419  0.002254  0.000799  0.000412
71

  
72
4 1
73
significant   elliptical     2
74
	  0.236328  0.182617  0.128906  0.078125
75
	  0.000400  0.000400  0.000400  0.000400
76

  
77
X 1
78
significant   elliptical     1
79
	  0.171875  0.156250  0.117188  0.082031
80
	  0.000400  0.000400  0.000400  0.000400
81

  
82
l 1
83
significant   elliptical     1
84
	  0.320313  0.138672  0.199219  0.023438
85
	  0.000400  0.000400  0.000400  0.000400
86

  
87
A 1
88
significant   elliptical     1
89
	  0.281250  0.288281  0.175781  0.125000
90
	  0.000400  0.000400  0.000400  0.000400
91

  
92
M 1
93
significant   elliptical     1
94
	  0.289063  0.442187  0.179688  0.171875
95
	  0.000400  0.000400  0.000400  0.000400
96

  
97
O 1
98
significant   elliptical     1
99
	  0.316406  0.318750  0.207031  0.140625
100
	  0.000400  0.000400  0.000400  0.000400
101

  
102
6 1
103
significant   elliptical     1
104
	  0.320313  0.335156  0.199219  0.121094
105
	  0.000400  0.000400  0.000400  0.000400
seed.unicharset
1
23
2
NULL 0 Common 0
3
Joined 7 0,255,0,255,0,0,0,0,0,0 Latin 1 0 1 Joined	# Joined [4a 6f 69 6e 65 64 ]a
4
|Broken|0|1 f 0,255,0,255,0,0,0,0,0,0 Common 2 10 2 |Broken|0|1	# Broken
5
2 8 0,255,0,255,0,0,0,0,0,0 Common 3 2 3 2	# 2 [32 ]0
6
T 5 0,255,0,255,0,0,0,0,0,0 Latin 4 0 4 T	# T [54 ]A
7
F 5 0,255,0,255,0,0,0,0,0,0 Latin 5 0 5 F	# F [46 ]A
8
' 10 0,255,0,255,0,0,0,0,0,0 Common 6 10 6 '	# ' [27 ]p
9
/ 10 0,255,0,255,0,0,0,0,0,0 Common 7 6 7 /	# / [2f ]p
10
1 8 0,255,0,255,0,0,0,0,0,0 Common 8 2 8 1	# 1 [31 ]0
11
, 10 0,255,0,255,0,0,0,0,0,0 Common 9 6 9 ,	# , [2c ]p
12
4 8 0,255,0,255,0,0,0,0,0,0 Common 10 2 10 4	# 4 [34 ]0
13
0 8 0,255,0,255,0,0,0,0,0,0 Common 11 2 11 0	# 0 [30 ]0
14
8 8 0,255,0,255,0,0,0,0,0,0 Common 12 2 12 8	# 8 [38 ]0
15
- 10 0,255,0,255,0,0,0,0,0,0 Common 13 3 13 -	# - [2d ]p
16
B 5 0,255,0,255,0,0,0,0,0,0 Latin 14 0 14 B	# B [42 ]A
17
V 5 0,255,0,255,0,0,0,0,0,0 Latin 15 0 15 V	# V [56 ]A
18
" 10 0,255,0,255,0,0,0,0,0,0 Common 16 10 16 "	# " [22 ]p
19
X 5 0,255,0,255,0,0,0,0,0,0 Latin 17 0 17 X	# X [58 ]A
20
6 8 0,255,0,255,0,0,0,0,0,0 Common 18 2 18 6	# 6 [36 ]0
21
O 5 0,255,0,255,0,0,0,0,0,0 Latin 19 0 19 O	# O [4f ]A
22
M 5 0,255,0,255,0,0,0,0,0,0 Latin 20 0 20 M	# M [4d ]A
23
A 5 0,255,0,255,0,0,0,0,0,0 Latin 21 0 21 A	# A [41 ]A
24
l 3 0,255,0,255,0,0,0,0,0,0 Latin 22 0 22 l	# l [6c ]a

내보내기 Unified diff