개정판 69908833
issue #655: OCR lang changed depend on project name and delete mid process file
DTI_PID/DTI_PID/TrainingImageListDialog.py | ||
---|---|---|
38 | 38 |
self.ui.tableWidgetList.setColumnCount(3) |
39 | 39 | |
40 | 40 |
## column header 명 설정 |
41 |
#headerLabel = docData.getCurrentProject().getName() |
|
42 | 41 |
self.ui.tableWidgetList.setHorizontalHeaderLabels(['No.', '이미지 목록', '박스 작업 상태']) |
43 | 42 |
self.ui.tableWidgetList.horizontalHeaderItem(1).setToolTip('도면 이름') # header tooltip |
44 | 43 |
self.ui.tableWidgetList.horizontalHeaderItem(2).setToolTip('작업 상태') # header tooltip |
... | ... | |
166 | 165 |
try: |
167 | 166 |
appDocData = AppDocData.instance() |
168 | 167 |
project = appDocData.getCurrentProject() |
168 |
oCRLang = appDocData.getCurrentProject().getName() |
|
169 | 169 |
dataList = appDocData.getTrainingFileList() |
170 | 170 |
listHasBox = [] |
171 | 171 |
listHasBoxImage = [] |
... | ... | |
266 | 266 |
outBox += boxComponent[0] + " " + str(boxComponent[1]) + " " + str(boxComponent[2]) + " " + str(boxComponent[3]) + " " + str(boxComponent[4]) + ' 0\n' |
267 | 267 |
currentX = areaW |
268 | 268 | |
269 |
trainingImgPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.tif')
|
|
270 |
trainingBoxPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.box')
|
|
269 |
trainingImgPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.tif')
|
|
270 |
trainingBoxPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.box')
|
|
271 | 271 |
trainingTextImg.save(trainingImgPath, compression='tiff_lzw') |
272 | 272 |
fw = open(trainingBoxPath, 'w', encoding='utf8') |
273 | 273 |
fw.write(outBox) |
... | ... | |
303 | 303 |
fw.write('seed 0 0 0 0 0') |
304 | 304 |
fw.close() |
305 | 305 | |
306 |
trPath = os.path.join(project.getTrainingFilePath(), 'seed.seedF.exp0.tr')
|
|
306 |
trPath = os.path.join(project.getTrainingFilePath(), oCRLang + '.' + oCRLang + 'F.exp0.tr')
|
|
307 | 307 |
shapeclusteringCmd = '\"' + shapeclustering_cmd + '\" -F ' + fontProperty + ' -U ' + inputUnicharset + ' ' + trPath |
308 | 308 |
subprocess.call(shapeclusteringCmd, shell = True) |
309 | 309 | |
... | ... | |
314 | 314 |
cntrainingCmd = '\"' + cntraining_cmd + '\" ' + trPath |
315 | 315 |
subprocess.call(cntrainingCmd, shell = True) |
316 | 316 | |
317 |
if os.path.isfile(os.path.join(runningPath, 'seed.inttemp')):
|
|
318 |
os.remove(os.path.join(runningPath, 'seed.inttemp'))
|
|
319 |
if os.path.isfile(os.path.join(runningPath, 'seed.normproto')):
|
|
320 |
os.remove(os.path.join(runningPath, 'seed.normproto'))
|
|
321 |
if os.path.isfile(os.path.join(runningPath, 'seed.pffmtable')):
|
|
322 |
os.remove(os.path.join(runningPath, 'seed.pffmtable'))
|
|
323 |
if os.path.isfile(os.path.join(runningPath, 'seed.shapetable')):
|
|
324 |
os.remove(os.path.join(runningPath, 'seed.shapetable'))
|
|
325 |
if os.path.isfile(os.path.join(runningPath, 'seed.unicharset')):
|
|
326 |
os.remove(os.path.join(runningPath, 'seed.unicharset'))
|
|
327 | ||
328 |
os.rename(os.path.join(runningPath, 'inttemp'), os.path.join(runningPath, 'seed.inttemp'))
|
|
329 |
os.rename(os.path.join(runningPath, 'normproto'), os.path.join(runningPath, 'seed.normproto'))
|
|
330 |
os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, 'seed.pffmtable'))
|
|
331 |
os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, 'seed.shapetable'))
|
|
332 |
os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, 'seed.unicharset'))
|
|
333 |
combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" seed.'
|
|
317 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.inttemp')):
|
|
318 |
os.remove(os.path.join(runningPath, oCRLang + '.inttemp'))
|
|
319 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.normproto')):
|
|
320 |
os.remove(os.path.join(runningPath, oCRLang + '.normproto'))
|
|
321 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.pffmtable')):
|
|
322 |
os.remove(os.path.join(runningPath, oCRLang + '.pffmtable'))
|
|
323 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.shapetable')):
|
|
324 |
os.remove(os.path.join(runningPath, oCRLang + '.shapetable'))
|
|
325 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.unicharset')):
|
|
326 |
os.remove(os.path.join(runningPath, oCRLang + '.unicharset'))
|
|
327 | ||
328 |
os.rename(os.path.join(runningPath, 'inttemp'), os.path.join(runningPath, oCRLang + '.inttemp'))
|
|
329 |
os.rename(os.path.join(runningPath, 'normproto'), os.path.join(runningPath, oCRLang + '.normproto'))
|
|
330 |
os.rename(os.path.join(runningPath, 'pffmtable'), os.path.join(runningPath, oCRLang + '.pffmtable'))
|
|
331 |
os.rename(os.path.join(runningPath, 'shapetable'), os.path.join(runningPath, oCRLang + '.shapetable'))
|
|
332 |
os.rename(os.path.join(runningPath, 'unicharset'), os.path.join(runningPath, oCRLang + '.unicharset'))
|
|
333 |
combineTessdataCmd = '\"' + combine_tessdata_cmd + '\" ' + oCRLang + '.'
|
|
334 | 334 |
subprocess.call(combineTessdataCmd, shell = True) |
335 | 335 | |
336 |
if os.path.isfile(os.path.join(tesseractPath, 'seed.traineddata')): |
|
337 |
os.remove(os.path.join(tesseractPath, 'seed.traineddata')) |
|
338 |
os.rename(os.path.join(runningPath, 'seed.traineddata'), os.path.join(tesseractPath, 'seed.traineddata')) |
|
336 |
if os.path.isfile(os.path.join(tesseractPath, oCRLang + '.traineddata')): |
|
337 |
os.remove(os.path.join(tesseractPath, oCRLang + '.traineddata')) |
|
338 |
os.rename(os.path.join(runningPath, oCRLang + '.traineddata'), os.path.join(tesseractPath, oCRLang + '.traineddata')) |
|
339 | ||
340 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.inttemp')): |
|
341 |
os.remove(os.path.join(runningPath, oCRLang + '.inttemp')) |
|
342 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.normproto')): |
|
343 |
os.remove(os.path.join(runningPath, oCRLang + '.normproto')) |
|
344 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.pffmtable')): |
|
345 |
os.remove(os.path.join(runningPath, oCRLang + '.pffmtable')) |
|
346 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.shapetable')): |
|
347 |
os.remove(os.path.join(runningPath, oCRLang + '.shapetable')) |
|
348 |
if os.path.isfile(os.path.join(runningPath, oCRLang + '.unicharset')): |
|
349 |
os.remove(os.path.join(runningPath, oCRLang + '.unicharset')) |
|
339 | 350 | |
340 | 351 |
#except Exception as ex: |
341 | 352 |
#print('error occured({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)) |
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
35 | 35 |
tesseract_cmd = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe') |
36 | 36 | |
37 | 37 |
DEFAULT_CONF = """ |
38 |
--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\"\\'
|
|
38 |
--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-~.,/!@#$%&*(){}[]<>:;+=?\\" |
|
39 | 39 |
""" |
40 | 40 | |
41 | 41 |
''' |
... | ... | |
183 | 183 |
2018.06.14 Jeongwoo Add try-except. If exception occured, return None |
184 | 184 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height |
185 | 185 |
humkyung 2018.10.12 change logic to extract text which first get bounding box and then extract character |
186 |
2018.10.19 euisung OCR lang change depend on project name |
|
186 | 187 |
""" |
187 | 188 |
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF): |
188 | 189 |
#if form is not None: |
... | ... | |
193 | 194 |
textInfoList = [] |
194 | 195 | |
195 | 196 |
docData = AppDocData.instance() |
197 |
oCRLang = 'eng+' + docData.getCurrentProject().getName() |
|
196 | 198 |
configs = docData.getConfigs('Text Size', 'Min Text Size') |
197 | 199 |
minSize = int(configs[0].value) if 1 == len(configs) else 30 |
198 | 200 |
configs = docData.getConfigs('Text Size', 'Max Text Size') |
... | ... | |
202 | 204 |
im = im.rotate(-angle, expand=True) |
203 | 205 |
imgWidth = im.width |
204 | 206 |
imgHeight = im.height |
205 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng+seed')
|
|
207 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang=oCRLang)
|
|
206 | 208 |
bounding_boxes = boundaryOcrData.split('\n') |
207 | 209 |
merged_boxes = [] |
208 | 210 |
for box in bounding_boxes: |
... | ... | |
235 | 237 |
|
236 | 238 |
for rect in merged_boxes: |
237 | 239 |
cropped = im.crop((rect.left(), imgHeight - rect.bottom(), rect.right(), imgHeight - rect.top())) |
238 |
text = pytesseract.image_to_string(cropped, config=conf, lang='eng+seed')
|
|
240 |
text = pytesseract.image_to_string(cropped, config=conf, lang=oCRLang)
|
|
239 | 241 | |
240 | 242 |
if rect.height() >= minSize and rect.height() <= maxSize: |
241 | 243 |
text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height()) |
seed.normproto | ||
---|---|---|
1 |
4 |
|
2 |
linear essential -0.250000 0.750000 |
|
3 |
linear non-essential 0.000000 1.000000 |
|
4 |
linear essential 0.000000 1.000000 |
|
5 |
linear essential 0.000000 1.000000 |
|
6 | ||
7 |
1 1 |
|
8 |
significant elliptical 8 |
|
9 |
0.261230 0.122217 0.153809 0.039063 |
|
10 |
0.000400 0.000400 0.000400 0.000400 |
|
11 | ||
12 |
/ 1 |
|
13 |
significant elliptical 6 |
|
14 |
0.225260 0.162435 0.211589 0.102865 |
|
15 |
0.000400 0.000400 0.000400 0.000400 |
|
16 | ||
17 |
2 1 |
|
18 |
significant elliptical 13 |
|
19 |
0.240685 0.219261 0.176983 0.089543 |
|
20 |
0.000403 0.000499 0.000400 0.000400 |
|
21 | ||
22 |
' 1 |
|
23 |
significant elliptical 1 |
|
24 |
0.484375 0.093750 0.058594 0.062500 |
|
25 |
0.000400 0.000400 0.000400 0.000400 |
|
26 | ||
27 |
F 1 |
|
28 |
significant elliptical 6 |
|
29 |
0.314453 0.193490 0.154297 0.108073 |
|
30 |
0.000400 0.000400 0.000400 0.000400 |
|
31 | ||
32 |
T 1 |
|
33 |
significant elliptical 6 |
|
34 |
0.296224 0.141016 0.165365 0.057292 |
|
35 |
0.000400 0.000400 0.000400 0.000400 |
|
36 | ||
37 |
, 1 |
|
38 |
significant elliptical 1 |
|
39 |
0.531250 0.079297 0.082031 0.035156 |
|
40 |
0.000400 0.000400 0.000400 0.000400 |
|
41 | ||
42 |
" 1 |
|
43 |
significant elliptical 8 |
|
44 |
0.490723 0.098291 0.059570 0.062988 |
|
45 |
0.000400 0.000400 0.000400 0.000400 |
|
46 | ||
47 |
V 1 |
|
48 |
significant elliptical 2 |
|
49 |
0.277344 0.195117 0.142578 0.093750 |
|
50 |
0.000400 0.000400 0.000400 0.000400 |
|
51 | ||
52 |
B 1 |
|
53 |
significant elliptical 2 |
|
54 |
0.251953 0.280273 0.160156 0.105469 |
|
55 |
0.000400 0.000400 0.000400 0.000400 |
|
56 | ||
57 |
- 1 |
|
58 |
significant elliptical 4 |
|
59 |
0.244141 0.099414 0.025391 0.139648 |
|
60 |
0.000860 0.000400 0.000400 0.000462 |
|
61 | ||
62 |
8 1 |
|
63 |
significant elliptical 2 |
|
64 |
0.248047 0.261328 0.158203 0.101563 |
|
65 |
0.000400 0.000400 0.000400 0.000400 |
|
66 | ||
67 |
0 1 |
|
68 |
significant elliptical 3 |
|
69 |
0.273438 0.264323 0.174479 0.117188 |
|
70 |
0.001419 0.002254 0.000799 0.000412 |
|
71 | ||
72 |
4 1 |
|
73 |
significant elliptical 2 |
|
74 |
0.236328 0.182617 0.128906 0.078125 |
|
75 |
0.000400 0.000400 0.000400 0.000400 |
|
76 | ||
77 |
X 1 |
|
78 |
significant elliptical 1 |
|
79 |
0.171875 0.156250 0.117188 0.082031 |
|
80 |
0.000400 0.000400 0.000400 0.000400 |
|
81 | ||
82 |
l 1 |
|
83 |
significant elliptical 1 |
|
84 |
0.320313 0.138672 0.199219 0.023438 |
|
85 |
0.000400 0.000400 0.000400 0.000400 |
|
86 | ||
87 |
A 1 |
|
88 |
significant elliptical 1 |
|
89 |
0.281250 0.288281 0.175781 0.125000 |
|
90 |
0.000400 0.000400 0.000400 0.000400 |
|
91 | ||
92 |
M 1 |
|
93 |
significant elliptical 1 |
|
94 |
0.289063 0.442187 0.179688 0.171875 |
|
95 |
0.000400 0.000400 0.000400 0.000400 |
|
96 | ||
97 |
O 1 |
|
98 |
significant elliptical 1 |
|
99 |
0.316406 0.318750 0.207031 0.140625 |
|
100 |
0.000400 0.000400 0.000400 0.000400 |
|
101 | ||
102 |
6 1 |
|
103 |
significant elliptical 1 |
|
104 |
0.320313 0.335156 0.199219 0.121094 |
|
105 |
0.000400 0.000400 0.000400 0.000400 |
seed.unicharset | ||
---|---|---|
1 |
23 |
|
2 |
NULL 0 Common 0 |
|
3 |
Joined 7 0,255,0,255,0,0,0,0,0,0 Latin 1 0 1 Joined # Joined [4a 6f 69 6e 65 64 ]a |
|
4 |
|Broken|0|1 f 0,255,0,255,0,0,0,0,0,0 Common 2 10 2 |Broken|0|1 # Broken |
|
5 |
2 8 0,255,0,255,0,0,0,0,0,0 Common 3 2 3 2 # 2 [32 ]0 |
|
6 |
T 5 0,255,0,255,0,0,0,0,0,0 Latin 4 0 4 T # T [54 ]A |
|
7 |
F 5 0,255,0,255,0,0,0,0,0,0 Latin 5 0 5 F # F [46 ]A |
|
8 |
' 10 0,255,0,255,0,0,0,0,0,0 Common 6 10 6 ' # ' [27 ]p |
|
9 |
/ 10 0,255,0,255,0,0,0,0,0,0 Common 7 6 7 / # / [2f ]p |
|
10 |
1 8 0,255,0,255,0,0,0,0,0,0 Common 8 2 8 1 # 1 [31 ]0 |
|
11 |
, 10 0,255,0,255,0,0,0,0,0,0 Common 9 6 9 , # , [2c ]p |
|
12 |
4 8 0,255,0,255,0,0,0,0,0,0 Common 10 2 10 4 # 4 [34 ]0 |
|
13 |
0 8 0,255,0,255,0,0,0,0,0,0 Common 11 2 11 0 # 0 [30 ]0 |
|
14 |
8 8 0,255,0,255,0,0,0,0,0,0 Common 12 2 12 8 # 8 [38 ]0 |
|
15 |
- 10 0,255,0,255,0,0,0,0,0,0 Common 13 3 13 - # - [2d ]p |
|
16 |
B 5 0,255,0,255,0,0,0,0,0,0 Latin 14 0 14 B # B [42 ]A |
|
17 |
V 5 0,255,0,255,0,0,0,0,0,0 Latin 15 0 15 V # V [56 ]A |
|
18 |
" 10 0,255,0,255,0,0,0,0,0,0 Common 16 10 16 " # " [22 ]p |
|
19 |
X 5 0,255,0,255,0,0,0,0,0,0 Latin 17 0 17 X # X [58 ]A |
|
20 |
6 8 0,255,0,255,0,0,0,0,0,0 Common 18 2 18 6 # 6 [36 ]0 |
|
21 |
O 5 0,255,0,255,0,0,0,0,0,0 Latin 19 0 19 O # O [4f ]A |
|
22 |
M 5 0,255,0,255,0,0,0,0,0,0 Latin 20 0 20 M # M [4d ]A |
|
23 |
A 5 0,255,0,255,0,0,0,0,0,0 Latin 21 0 21 A # A [41 ]A |
|
24 |
l 3 0,255,0,255,0,0,0,0,0,0 Latin 22 0 22 l # l [6c ]a |
내보내기 Unified diff