프로젝트

일반

사용자정보

통계
| 개정판:

hytos / DTI_PID / DTI_PID / OcrResultDialog.py @ 4d4b604e

이력 | 보기 | 이력해설 | 다운로드 (17.1 KB)

1
# coding: utf-8
2
"""
3
    This is ocr result dialog module
4
"""
5
from PIL import Image
6
import io
7
import numpy as np
8
import math
9
import enum
10

    
11
from PyQt5.QtCore import *
12
from PyQt5.QtGui import *
13
from PyQt5.QtWidgets import *
14
import OcrResultDialog_UI
15
import QtImageViewer
16
import tesseract_ocr_module as TOCR
17
from App import App
18
from AppDocData import *
19
from TextInfo import TextInfo
20

    
21

    
22
class SpellTextEdit(QTextEdit):
23
    def __init__(self, *args):
24
        QTextEdit.__init__(self, *args)
25

    
26
        # Default dictionary based on the current locale.
27
        app_doc_data = AppDocData.instance()
28
        white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List')
29
        self.highlighter = Highlighter(self.document())
30
        self.highlighter.white_char_list = white_char_list[0].value if white_char_list else None
31

    
32

    
33
class Highlighter(QSyntaxHighlighter):
34
    err_format = QTextCharFormat()
35
    err_format.setUnderlineColor(Qt.red)
36
    err_format.setUnderlineStyle(QTextCharFormat.SpellCheckUnderline)
37

    
38
    def __init__(self, *args):
39
        QSyntaxHighlighter.__init__(self, *args)
40

    
41
        self.white_char_list = None
42

    
43
    def highlightBlock(self, text):
44
        pos = 0
45
        for word in text.split():
46
            if self.white_char_list and any((c not in self.white_char_list) for c in word):
47
                self.setFormat(pos, len(word), self.err_format)
48
            pos += len(word) + 1
49

    
50

    
51
class QOcrResultDialog(QDialog):
52
    class Format(enum.Enum):
53
        Normal = 0
54
        Table = 1
55

    
56
    def __init__(self, parent, qimage, boundingBox, format=Format.Normal, text_item=None):
57
        QDialog.__init__(self, parent)
58
        self.textInfoList = []
59

    
60
        self._text_item = text_item
61
        self.image = qimage
62
        self.boundingBox = boundingBox
63
        self._format = format
64

    
65
        self.angle = 0  # angle in degree
66

    
67
        self.ui = OcrResultDialog_UI.Ui_Dialog()
68
        self.ui.setupUi(self)
69
        self.ui.detectResultTextEdit = SpellTextEdit()
70
        self.ui.detectResultTextEdit.setFont(QFont('Consolas', 15, QFont.Bold))
71
        self.ui.horizontalLayoutTextEdit.addWidget(self.ui.detectResultTextEdit)
72

    
73
        app_doc_data = AppDocData.instance()
74
        configs = app_doc_data.getAppConfigs('app', 'mode')
75
        if configs and 1 == len(configs) and 'advanced' == configs[0].value:
76
            pass
77
        else:
78
            self.ui.pushButtonMakeTrainingImage.setVisible(False)
79

    
80
        self.graphicsView = QtImageViewer.QtImageViewer(App.mainWnd())
81
        self.graphicsView.useDefaultCommand()  # USE DEFAULT COMMAND
82
        self.graphicsView.setImage(self.image)
83
        self.ui.horizontalLayoutGraphicsView.addWidget(self.graphicsView)
84

    
85
        self.ui.counterClockPushButton_2.clicked.connect(lambda: self.rotateImage(True))
86
        self.ui.clockPushButton_2.clicked.connect(lambda: self.rotateImage(False))
87
        self.ui.pushButtonCopyHori.clicked.connect(self.copy_horizontal)
88
        # add shortcut for detecting text with 't'
89
        self.ui.redetectPushButton.clicked.connect(self.detect_text)
90
        shortcut = QShortcut(QKeySequence('t'), self.ui.redetectPushButton)
91
        shortcut.activated.connect(self.detect_text)
92
        # up to here
93
        self.ui.pushButtonMakeTrainingImage.clicked.connect(self.pushButtonMakeTrainingImageClicked)
94

    
95
        self.ui.comboBoxOCRData.addItem('eng')
96
        tessdata_path = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', 'tessdata')
97
        if os.path.isfile(os.path.join(tessdata_path, app_doc_data.getCurrentProject().name + '.traineddata')):
98
            self.ui.comboBoxOCRData.addItem(app_doc_data.getCurrentProject().name)
99

    
100
        configs = app_doc_data.getConfigs('Text Recognition', 'OCR Data')
101
        value = configs[0].value if 1 == len(configs) else ''
102
        if value:
103
            at = self.ui.comboBoxOCRData.findText(value)
104
            self.ui.comboBoxOCRData.setCurrentIndex(at)
105
        else:
106
            self.ui.comboBoxOCRData.selectedIndex = 0
107

    
108
        if not self._text_item:
109
            if format == QOcrResultDialog.Format.Normal and self.boundingBox.height() > self.boundingBox.width():
110
                self.rotateImage(False)
111

    
112
            self.detect_text()
113
        else:
114
            rect = self._text_item.sceneBoundingRect()
115
            text_info = TextInfo(self._text_item.text(), 0, 0, rect.width(), rect.height(), 0)
116
            self.textInfoList.append(text_info)
117
            self.display_text_rect()
118

    
119
            allowed_error = 0.001
120
            if abs(self._text_item.angle - 1.57) < allowed_error or abs(self._text_item.angle - 4.71) < allowed_error:
121
                self.rotateImage(False)
122

    
123
            self.ui.detectResultTextEdit.setPlainText(self._text_item.text())
124
            self.ui.checkBoxSeperate.setChecked(False)
125

    
126
        self.isAccepted = False
127

    
128
    def showEvent(self, QShowEvent):
129
        """show event"""
130
        self.graphicsView.zoomImageInit()
131

    
132
    def display_text_rect(self):
133
        """display text bounding rectangle"""
134
        for item in self.graphicsView.scene.items():
135
            if type(item) is not QGraphicsPixmapItem:
136
                self.graphicsView.scene.removeItem(item)
137

    
138
        for text_info in self.textInfoList:
139
            self.graphicsView.scene.addRect(text_info.getX(), text_info.getY(),
140
                                            text_info.getW(), text_info.getH(), QPen(Qt.red, 1, Qt.SolidLine))
141

    
142
    '''
143
        @brief      Make OCR Training Image
144
        @author     euisung
145
        @date       2018.10.16
146
        @history    euisung     2018.11.02       add notice push
147
    '''
148

    
149
    def pushButtonMakeTrainingImageClicked(self):
150
        import uuid
151
        uid = str(uuid.uuid4()) + '.png'
152
        appDocData = AppDocData.instance()
153
        project = appDocData.getCurrentProject()
154
        trainingImgPath = os.path.join(project.getTrainingFilePath(), uid)
155

    
156
        self.image.save(trainingImgPath)
157
        QMessageBox.about(self, self.tr("INFO"), self.tr('Successfully saved.'))
158
        QDialog.reject(self)
159

    
160
    def rotateImage(self, isCounterClock):
161
        """rotate the image"""
162

    
163
        transform = QTransform()
164
        if isCounterClock:
165
            '''CounterClock'''
166
            self.angle = (self.angle - 90) % 360
167
            transform.rotate(-90)
168
        else:
169
            '''Clock'''
170
            self.angle = (self.angle - 270) % 360
171
            transform.rotate(90)
172

    
173
        self.graphicsView.clearImage()
174
        self.image = self.image.transformed(transform)
175
        self.graphicsView.setImage(self.image)
176

    
177
        for text_info in self.textInfoList:
178
            rect = QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH())
179
            rect = transform.mapRect(rect)
180
            text_info.setX(self.image.width() + rect.left() if rect.left() < 0 else rect.left())
181
            text_info.setY(self.image.height() - max(abs(rect.top()), abs(rect.bottom())) if rect.top() < 0 else rect.top())
182
            text_info.setW(rect.width())
183
            text_info.setH(rect.height())
184

    
185
            self.graphicsView.scene.addRect(QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH()),
186
                                            QPen(Qt.red, 1, Qt.SolidLine))
187

    
188
    '''
189
        @history 2018.04.26 Jeongwoo    Add Rectangle with modified Coords
190
                 2018.06.20 Jeongwoo    Remove test code
191
                 2018.11.08 euisung     add white char list check process on db
192
                 2018.11.22 euisung     OCR lang apply fixed
193
    '''
194
    def detect_text(self):
195
        import cv2
196
        from TextDetector import TextDetector
197
        from TextInfo import TextInfo
198

    
199
        try:
200
            buffer = QBuffer()
201
            buffer.open(QBuffer.ReadWrite)
202
            self.image.save(buffer, "PNG")
203
            pyImage = Image.open(io.BytesIO(buffer.data()))
204
            img = np.array(pyImage)
205
            if len(img.shape[::-1]) == 2:
206
                img_width, img_height = img.shape[::-1]
207
            else:
208
                _, img_width, img_height = img.shape[::-1]
209

    
210
            app_doc_data = AppDocData.instance()
211

    
212
            ocr_data = self.ui.comboBoxOCRData.currentText()
213
            white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List')
214
            if self._format == QOcrResultDialog.Format.Normal:
215
                self.textInfoList = TOCR.getTextInfo(img, (0, 0), 0, language=ocr_data,
216
                                                     conf=white_char_list[0].value if white_char_list else '')
217

    
218
                if self.textInfoList:
219
                    self.ui.detectResultTextEdit.setText(self.getPlainText(self.textInfoList))
220
                    self.display_text_rect()
221

    
222
                    self.copy_horizontal()
223
                else:
224
                    self.ui.detectResultTextEdit.setText(self.tr("Not Found"))
225
            else:
226
                cv_image = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 200, 255, cv2.THRESH_BINARY)[1]
227
                # contours 추출
228
                contours, _ = cv2.findContours(cv_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
229
                text_info_list = []
230
                for contour in contours:
231
                    [x, y, w, h] = cv2.boundingRect(contour)
232
                    text_info_list.append(TextInfo('', x, y, w, h, 0))
233

    
234
                text_info_list = [text_info for text_info in text_info_list if
235
                                  not any([_text_info for _text_info in text_info_list if _text_info is not text_info
236
                                           and text_info.contains(_text_info.center)])]
237

    
238
                detector = TextDetector()
239
                detector.recognizeText(img, (0, 0), text_info_list, None, None, None, None, onlyTextArea=True)
240
                self.textInfoList = detector.textInfoList.copy()
241
                self.textInfoList.sort(key=lambda x: x.getY())
242

    
243
                """
244
                index = 0
245
                for rect in rects:
246
                    cropped = img[rect.y():rect.y() + rect.height(), rect.x():rect.x() + rect.width()]
247
                    cv2.imwrite(f"c:\\temp\\ocr-{index}.png", cropped)
248
                    text_info = TOCR.getTextInfo(cropped, (0, 0), 0, language=ocr_data,
249
                                                 conf=white_char_list[0].value if white_char_list else '')
250
                    if text_info:
251
                        for _text_info in text_info:
252
                            _text_info.setX(_text_info.getX() + rect.x())
253
                            _text_info.setY(_text_info.getY() + rect.y())
254

255
                        self.textInfoList.extend(text_info)
256

257
                    index = index + 1
258
                """
259

    
260
                if self.textInfoList:
261
                    self.ui.detectResultTextEdit.setText(self.getPlainText(self.textInfoList))
262
                    self.display_text_rect()
263

    
264
                    self.copy_horizontal()
265
                else:
266
                    self.ui.detectResultTextEdit.setText(self.tr("Not Found"))
267

    
268
        except Exception as ex:
269
            from App import App
270
            message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
271
                                                           sys.exc_info()[-1].tb_lineno)
272
            App.mainWnd().addMessage.emit(MessageType.Error, message)
273

    
274
    def getPlainText(self, textInfoList):
275
        text = ''
276
        for index in range(len(textInfoList)):
277
            textInfo = textInfoList[index]
278
            if index != 0:
279
                text = text + '\n'
280
            text = text + textInfo.getText()
281
        return text
282

    
283
    '''
284
        @brief      OK Button Clicked. Remake TextInfo object
285
        @author     Jeongwoo
286
        @date       18.04.19
287
        @history    18.04.20    Jeongwoo    Calculate Start Point Coordinates by rotated angle
288
                    18.04.26    Jeongwoo    Scene.itemAt(textX - boundBox.x(), textY - boundBox.y())
289
    '''
290

    
291
    def accept(self):
292
        from TextInfo import TextInfo
293
        self.isAccepted = True
294

    
295
        try:
296
            text = self.ui.detectResultTextEdit.toPlainText()
297
            if text == '' or text == 'Not Found':
298
                QMessageBox.about(self.ui.ocrDialogButtonBox, self.tr('Notice'),
299
                                  self.tr('Please try again after recognition or type.'))
300
                return
301

    
302
            isSplit = self.ui.checkBoxSeperate.isChecked()
303
            if isSplit:
304
                splitText = text.split('\n')
305
            else:
306
                splitText = [text]
307

    
308
            # try to detect text if there is no result of detection or
309
            # count of text info list not match with count of split text
310
            if isSplit:
311
                if self.textInfoList and (len(self.textInfoList) == len(splitText)):
312
                    for index in range(len(self.textInfoList)):
313
                        self.textInfoList[index].setText(splitText[index])
314
                elif not self.textInfoList:
315
                    self.detect_text()
316
                    if len(self.textInfoList) == len(splitText):
317
                        for index in range(len(self.textInfoList)):
318
                            self.textInfoList[index].setText(splitText[index])
319
                    else:
320
                        self.textInfoList = self.getMergedTextInfo(text)
321
            elif len(self.textInfoList) > 1 or len(self.textInfoList) == 0:
322
                self.textInfoList = self.getMergedTextInfo(text)
323

    
324
            radian = round(math.radians(abs(self.angle)), 2)
325
            for idx in range(len(self.textInfoList)):
326
                text_info = self.textInfoList[idx]
327
                # update text using user input text
328
                if idx < len(splitText):
329
                    text_info.setText(splitText[idx])
330
                # up to here
331

    
332
                if radian == 1.57 or radian == 4.71:
333
                    text_info.setAngle(radian)  # 360 degree == 6.28319 radian
334

    
335
                    # rotate text information
336
                    trans = QTransform()
337
                    trans.rotate(self.angle*-1)
338
                    rect = QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH())
339
                    rect = trans.mapRect(rect)
340
                    width, height = self.image.height(), self.image.width()
341
                    x = width + rect.left() if rect.left() < 0 else rect.left()
342
                    y = height - max(abs(rect.top()), abs(rect.bottom())) if rect.top() < 0 else rect.top()
343
                    text_info.setX(self.boundingBox.x() + x)
344
                    text_info.setY(self.boundingBox.y() + y)
345
                    text_info.setW(rect.width())
346
                    text_info.setH(rect.height())
347
                    # up to here
348
                else:
349
                    text_info.setX(int(self.boundingBox.x()) + text_info.getX())
350
                    text_info.setY(int(self.boundingBox.y()) + text_info.getY())
351

    
352
            QDialog.accept(self)
353

    
354
        except Exception as ex:
355
            from App import App
356
            message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename,
357
                                                           sys.exc_info()[-1].tb_lineno)
358
            App.mainWnd().addMessage.emit(MessageType.Error, message)
359

    
360
    def getMergedTextInfo(self, text):
361
        import cv2
362
        from TextInfo import TextInfo
363

    
364
        buffer = QBuffer()
365
        buffer.open(QBuffer.ReadWrite)
366
        self.image.save(buffer, "PNG")
367
        pyImage = Image.open(io.BytesIO(buffer.data()))
368
        img = np.array(pyImage)
369

    
370
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
371
        imgNot = np.ones(img.shape, np.uint8)
372
        cv2.bitwise_not(img, imgNot)
373
        imgNot = cv2.dilate(imgNot, np.ones((8, 8), np.uint8))
374

    
375
        contours, hierarchy = cv2.findContours(imgNot, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
376
        minX, minY, maxX, maxY = sys.maxsize, sys.maxsize, 0, 0
377
        if len(contours) is 0:
378
            minX, minY, maxX, maxY = 0, 0, self.image.width(), self.image.height()
379
        else:
380
            minX, minY, maxX, maxY = sys.maxsize, sys.maxsize, 0, 0
381
            for cnt in contours:
382
                x, y, w, h = cv2.boundingRect(cnt)
383
                minX = min(x, minX)
384
                minY = min(y, minY)
385
                maxX = max(x + w, maxX)
386
                maxY = max(y + h, maxY)
387
            minX, minY, maxX, maxY = minX, minY, maxX, maxY
388

    
389
        return [TextInfo(text, minX, minY, maxX - minX, maxY - minY, 0)]
390

    
391
    def reject(self):
392
        self.isAccepted = False
393
        self.textInfoList = None
394
        QDialog.reject(self)
395

    
396
    def copy_horizontal(self):
397
        import io, csv
398

    
399
        try:
400
            table = [[text for text in self.ui.detectResultTextEdit.toPlainText().split('\n')]]
401
            stream = io.StringIO()
402
            csv.writer(stream, delimiter='\t').writerows(table)
403
            QApplication.clipboard().setText(stream.getvalue())
404

    
405
        except Exception as ex:
406
            from App import App 
407
            from AppDocData import MessageType
408

    
409
            message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)
410
            App.mainWnd().addMessage.emit(MessageType.Error, message)
411

    
412
    '''
413
        @brief  Display this QDialog
414
    '''
415

    
416
    def showDialog(self):
417
        # self.setWindowFlags(self.windowFlags() & ~Qt.WindowContextHelpButtonHint)
418
        res = self.exec_()
419
        return res, self.textInfoList
클립보드 이미지 추가 (최대 크기: 500 MB)