개정판 03a5d186
dev issue #507 : fix format
Change-Id: I43eb8c556bcd79c7e30a3e1923599d621320c334
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
10 | 10 |
import re |
11 | 11 |
import sys, os |
12 | 12 |
from PyQt5.QtWidgets import * |
13 |
|
|
13 | 14 |
try: |
14 | 15 |
from PyQt5.QtCore import QRect |
15 | 16 |
from PyQt5.QtGui import QTransform |
... | ... | |
32 | 33 |
humkyung 2018.08.13 set tesseract executable path to relative of this file path |
33 | 34 |
euisung set tesseract executable path to ProgramData |
34 | 35 |
''' |
35 |
pytesseract.pytesseract.tesseract_cmd = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', 'tesseract.exe') |
|
36 |
pytesseract.pytesseract.tesseract_cmd = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', |
|
37 |
'tesseract.exe') |
|
36 | 38 |
|
37 | 39 |
tesseract_path = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR') |
38 | 40 |
DEFAULT_CONF = """ |
... | ... | |
40 | 42 |
""" |
41 | 43 |
DEFAULT_CONF_COMM = "--psm 6 -c preserve_interword_spaces=1" |
42 | 44 |
|
45 |
|
|
43 | 46 |
def exist_trained_data(): |
44 | 47 |
"""check trained data is exist""" |
45 | 48 |
|
46 | 49 |
docData = AppDocData.instance() |
47 |
prj_trained_data = os.path.join(tesseract_path, 'tessdata', docData.getCurrentProject().getName()+'.traineddata')
|
|
50 |
prj_trained_data = os.path.join(tesseract_path, 'tessdata', docData.getCurrentProject().getName() + '.traineddata')
|
|
48 | 51 |
if os.path.isfile(prj_trained_data): |
49 | 52 |
return True |
50 | 53 |
else: |
51 | 54 |
return False |
52 | 55 |
|
56 |
|
|
53 | 57 |
""" |
54 | 58 |
@history 2018.04.26 Jeongwoo Make TextInfo object with Calculated Coords (with BoundBox Coords) |
55 | 59 |
2018.04.30 Jeongwoo Add QRect.setHeight() in if-statement [(lineRect is not None and currentRect is not None) and lineRect.intersects(currentRect)] |
... | ... | |
65 | 69 |
2018.11.08 euisung add config for OCR white char list |
66 | 70 |
2018.11.21 euisung fix area moving bug |
67 | 71 |
""" |
68 |
def getTextInfo(img, startPoint, angle, language, flag = FLAG_IMAGE_TO_BOXES, conf = None): |
|
72 |
|
|
73 |
|
|
74 |
def getTextInfo(img, startPoint, angle, language, flag=FLAG_IMAGE_TO_BOXES, conf=None): |
|
69 | 75 |
from PIL import ImageOps |
70 | 76 |
|
71 | 77 |
try: |
... | ... | |
144 | 150 |
maxx = int(tokens[3]) |
145 | 151 |
maxy = int(tokens[4]) |
146 | 152 |
|
147 |
findBox = None
|
|
153 |
find_box = None
|
|
148 | 154 |
for merged_box in merged_boxes: |
149 | 155 |
bottom = merged_box.bottom() |
150 | 156 |
top = merged_box.top() |
151 |
if (abs(miny - top) <= 1 or abs(maxy - bottom) <= 1) or (miny <= top and maxy >= bottom) or (miny >= top and maxy <= bottom): |
|
152 |
findBox = merged_box |
|
153 |
|
|
154 |
if findBox: |
|
155 |
findBox.setLeft(min(findBox.left(), minx)) |
|
156 |
findBox.setTop(min(findBox.top(), miny)) |
|
157 |
findBox.setRight(max(findBox.right(), maxx)) |
|
158 |
findBox.setBottom(max(findBox.bottom(), maxy)) |
|
157 |
if (abs(miny - top) <= 1 or abs(maxy - bottom) <= 1) or (miny <= top and maxy >= bottom) or ( |
|
158 |
miny >= top and maxy <= bottom): |
|
159 |
find_box = merged_box |
|
160 |
|
|
161 |
if find_box: |
|
162 |
find_box.setLeft(min(find_box.left(), minx)) |
|
163 |
find_box.setTop(min(find_box.top(), miny)) |
|
164 |
find_box.setRight(max(find_box.right(), maxx)) |
|
165 |
find_box.setBottom(max(find_box.bottom(), maxy)) |
|
159 | 166 |
else: |
160 | 167 |
merged_boxes.append(QRect(minx, miny, maxx - minx, maxy - miny)) |
161 | 168 |
|
... | ... | |
189 | 196 |
text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height()) |
190 | 197 |
if angle == 90 or angle == 270: |
191 | 198 |
transform = QTransform() |
192 |
transform.translate(imgHeight*0.5, imgWidth*0.5)
|
|
199 |
transform.translate(imgHeight * 0.5, imgWidth * 0.5)
|
|
193 | 200 |
transform.rotate(-angle) |
194 |
transform.translate(-imgWidth*0.5, -imgHeight*0.5)
|
|
201 |
transform.translate(-imgWidth * 0.5, -imgHeight * 0.5)
|
|
195 | 202 |
text_rect = transform.mapRect(text_rect) |
196 |
|
|
203 |
|
|
197 | 204 |
text_info = ti.TextInfo(text, startPoint[0] - thickness + text_rect.left(), |
198 | 205 |
startPoint[1] - thickness + text_rect.top(), |
199 | 206 |
text_rect.width(), text_rect.height(), angle) |
... | ... | |
204 | 211 |
from App import App |
205 | 212 |
from AppDocData import MessageType |
206 | 213 |
|
207 |
message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno) |
|
214 |
message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, |
|
215 |
sys.exc_info()[-1].tb_lineno) |
|
208 | 216 |
App.mainWnd().addMessage.emit(MessageType.Error, message) |
209 | 217 |
return None |
210 | 218 |
|
211 |
def removeTextFromNpArray(img, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF): |
|
219 |
|
|
220 |
def removeTextFromNpArray(img, flag=FLAG_IMAGE_TO_BOXES, conf=DEFAULT_CONF): |
|
212 | 221 |
retImg = img.copy() |
213 | 222 |
|
214 | 223 |
for i in range(4): |
... | ... | |
222 | 231 |
|
223 | 232 |
return retImg |
224 | 233 |
|
234 |
|
|
225 | 235 |
''' |
226 | 236 |
@history 2018.06.14 Jeongwoo Add try-except. If exception occurred, return None |
227 | 237 |
''' |
238 |
|
|
239 |
|
|
228 | 240 |
def imageToBoxes(img, conf): |
229 | 241 |
docData = AppDocData.instance() |
230 | 242 |
configs = docData.getConfigs('Text Size', 'Min Text Size') |
... | ... | |
250 | 262 |
th = tey - tsy |
251 | 263 |
|
252 | 264 |
if th >= minSize and th <= maxSize: |
253 |
roi = img[tsy:tsy+th, tsx:tsx+tw]
|
|
265 |
roi = img[tsy:tsy + th, tsx:tsx + tw]
|
|
254 | 266 |
temp = roi.copy() |
255 | 267 |
tempBin = cv2.bitwise_not(temp) |
256 | 268 |
roi = cv2.bitwise_xor(roi, tempBin, roi) |
... | ... | |
261 | 273 |
|
262 | 274 |
return img |
263 | 275 |
|
276 |
|
|
264 | 277 |
''' |
265 | 278 |
@history 2018.06.14 Jeongwoo Add try-except. If exception occurred, return None |
266 | 279 |
''' |
280 |
|
|
281 |
|
|
267 | 282 |
def imageToData(img, conf): |
268 | 283 |
docData = AppDocData.instance() |
269 | 284 |
configs = docData.getConfigs('Text Size', 'Min Text Size') |
... | ... | |
272 | 287 |
maxSize = int(configs[0].value) if 1 == len(configs) else self.ui.maxTextSizeSpinBox.setValue(60) |
273 | 288 |
|
274 | 289 |
im = Image.fromarray(img) |
275 |
ocrData = pytesseract.image_to_data(im, config = conf)
|
|
290 |
ocrData = pytesseract.image_to_data(im, config=conf)
|
|
276 | 291 |
|
277 | 292 |
if ocrData: |
278 | 293 |
splitOcrData = ocrData.split('\n') |
... | ... | |
290 | 305 |
print('TOCR RESULT : ' + text + ' , (' + str(tx) + ',' + str(ty) + '), ' + str(tw) + ' ' + str(th)) |
291 | 306 |
|
292 | 307 |
if th >= minSize and th <= maxSize: |
293 |
roi = img[ty:ty+th, tx:tx+tw]
|
|
308 |
roi = img[ty:ty + th, tx:tx + tw]
|
|
294 | 309 |
temp = roi.copy() |
295 | 310 |
tempBin = cv2.bitwise_not(temp) |
296 | 311 |
roi = cv2.bitwise_xor(roi, tempBin, roi) |
... | ... | |
298 | 313 |
cv2.imshow('tempBin', tempBin) |
299 | 314 |
cv2.waitKey(0) |
300 | 315 |
cv2.destroyAllWindows() |
301 |
i = i+1
|
|
316 |
i = i + 1
|
|
302 | 317 |
|
303 | 318 |
cv2.imshow('image_To_Data', img) |
304 | 319 |
cv2.waitKey(0) |
305 | 320 |
return img |
306 | 321 |
|
322 |
|
|
307 | 323 |
def imageToString(img, conf): |
308 | 324 |
im = Image.fromarray(img) |
309 | 325 |
conf = 'hocr' |
... | ... | |
312 | 328 |
if ocrData: |
313 | 329 |
print(ocrData) |
314 | 330 |
|
315 |
return img |
|
331 |
return img |
내보내기 Unified diff