프로젝트

일반

사용자정보

통계
| 개정판:

hytos / DTI_PID / DTI_PID / OcrResultDialog.py @ 08207d04

이력 | 보기 | 이력해설 | 다운로드 (16.7 KB)

1
# coding: utf-8
2
"""
3
    This is ocr result dialog module
4
"""
5
from PIL import Image
6
import io
7
import numpy as np
8
import math
9
import enum
10

    
11
from PyQt5.QtCore import *
12
from PyQt5.QtGui import *
13
from PyQt5.QtWidgets import *
14
import OcrResultDialog_UI
15
import QtImageViewer
16
import tesseract_ocr_module as TOCR
17
from App import App
18
from AppDocData import *
19
from TextInfo import TextInfo
20

    
21

    
22
class SpellTextEdit(QTextEdit):
23
    def __init__(self, *args):
24
        QTextEdit.__init__(self, *args)
25

    
26
        # Default dictionary based on the current locale.
27
        app_doc_data = AppDocData.instance()
28
        white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List')
29
        self.highlighter = Highlighter(self.document())
30
        self.highlighter.white_char_list = white_char_list[0].value if white_char_list else None
31

    
32

    
33
class Highlighter(QSyntaxHighlighter):
34
    err_format = QTextCharFormat()
35
    err_format.setUnderlineColor(Qt.red)
36
    err_format.setUnderlineStyle(QTextCharFormat.SpellCheckUnderline)
37

    
38
    def __init__(self, *args):
39
        QSyntaxHighlighter.__init__(self, *args)
40

    
41
        self.white_char_list = None
42

    
43
    def highlightBlock(self, text):
44
        pos = 0
45
        for word in text.split():
46
            if self.white_char_list and any((c not in self.white_char_list) for c in word):
47
                self.setFormat(pos, len(word), self.err_format)
48
            pos += len(word) + 1
49

    
50

    
51
class QOcrResultDialog(QDialog):
52
    class Format(enum.Enum):
53
        Normal = 0
54
        Table = 1
55

    
56
    def __init__(self, parent, qimage, boundingBox, format=Format.Normal, text_item=None):
57
        QDialog.__init__(self, parent)
58
        self.textInfoList = []
59

    
60
        self._text_item = text_item
61
        self.image = qimage
62
        self.boundingBox = boundingBox
63
        self._format = format
64

    
65
        self.angle = 0  # angle in degree
66

    
67
        self.ui = OcrResultDialog_UI.Ui_Dialog()
68
        self.ui.setupUi(self)
69
        self.ui.detectResultTextEdit = SpellTextEdit()
70
        self.ui.detectResultTextEdit.setFont(QFont('Consolas', 15, QFont.Bold))
71
        self.ui.horizontalLayoutTextEdit.addWidget(self.ui.detectResultTextEdit)
72

    
73
        app_doc_data = AppDocData.instance()
74
        configs = app_doc_data.getAppConfigs('app', 'mode')
75
        if configs and 1 == len(configs) and 'advanced' == configs[0].value:
76
            pass
77
        else:
78
            self.ui.pushButtonMakeTrainingImage.setVisible(False)
79

    
80
        self.graphicsView = QtImageViewer.QtImageViewer(App.mainWnd())
81
        self.graphicsView.useDefaultCommand()  # USE DEFAULT COMMAND
82
        self.graphicsView.setImage(self.image)
83
        self.ui.horizontalLayoutGraphicsView.addWidget(self.graphicsView)
84

    
85
        self.ui.counterClockPushButton_2.clicked.connect(lambda: self.rotateImage(True))
86
        self.ui.clockPushButton_2.clicked.connect(lambda: self.rotateImage(False))
87
        self.ui.pushButtonCopyHori.clicked.connect(self.copy_horizontal)
88
        # add shortcut for detecting text with 't'
89
        self.ui.redetectPushButton.clicked.connect(self.detect_text)
90
        shortcut = QShortcut(QKeySequence('t'), self.ui.redetectPushButton)
91
        shortcut.activated.connect(self.detect_text)
92
        # up to here
93
        self.ui.pushButtonMakeTrainingImage.clicked.connect(self.pushButtonMakeTrainingImageClicked)
94

    
95
        self.ui.comboBoxOCRData.addItem('eng')
96
        tessdata_path = os.path.join(os.getenv('ALLUSERSPROFILE'), 'Digital PID', 'Tesseract-OCR', 'tessdata')
97
        if os.path.isfile(os.path.join(tessdata_path, app_doc_data.getCurrentProject().name + '.traineddata')):
98
            self.ui.comboBoxOCRData.addItem(app_doc_data.getCurrentProject().name)
99

    
100
        configs = app_doc_data.getConfigs('Text Recognition', 'OCR Data')
101
        value = configs[0].value if 1 == len(configs) else ''
102
        if value:
103
            at = self.ui.comboBoxOCRData.findText(value)
104
            self.ui.comboBoxOCRData.setCurrentIndex(at)
105
        else:
106
            self.ui.comboBoxOCRData.selectedIndex = 0
107

    
108
        if not self._text_item:
109
            if format == QOcrResultDialog.Format.Normal and self.boundingBox.height() > self.boundingBox.width():
110
                self.rotateImage(False)
111

    
112
            self.detect_text()
113
        else:
114
            rect = self._text_item.sceneBoundingRect()
115
            text_info = TextInfo(self._text_item.text(), 0, 0, rect.width(), rect.height(), 0)
116
            self.textInfoList.append(text_info)
117
            self.display_text_rect()
118

    
119
            allowed_error = 0.001
120
            if abs(self._text_item.angle - 1.57) < allowed_error or abs(self._text_item.angle - 4.71) < allowed_error:
121
                self.rotateImage(False)
122

    
123
            self.ui.detectResultTextEdit.setPlainText(self._text_item.text())
124
            self.ui.checkBoxSeperate.setChecked(False)
125

    
126
        self.isAccepted = False
127

    
128
    def showEvent(self, QShowEvent):
129
        """show event"""
130
        self.graphicsView.zoomImageInit()
131

    
132
    def display_text_rect(self):
133
        """display text bounding rectangle"""
134
        for item in self.graphicsView.scene.items():
135
            if type(item) is not QGraphicsPixmapItem:
136
                self.graphicsView.scene.removeItem(item)
137

    
138
        for text_info in self.textInfoList:
139
            self.graphicsView.scene.addRect(text_info.getX(), text_info.getY(),
140
                                            text_info.getW(), text_info.getH(), QPen(Qt.red, 1, Qt.SolidLine))
141

    
142
    '''
143
        @brief      Make OCR Training Image
144
        @author     euisung
145
        @date       2018.10.16
146
        @history    euisung     2018.11.02       add notice push
147
    '''
148

    
149
    def pushButtonMakeTrainingImageClicked(self):
150
        import uuid
151
        uid = str(uuid.uuid4()) + '.png'
152
        appDocData = AppDocData.instance()
153
        project = appDocData.getCurrentProject()
154
        trainingImgPath = os.path.join(project.getTrainingFilePath(), uid)
155

    
156
        self.image.save(trainingImgPath)
157
        QMessageBox.about(self, self.tr("INFO"), self.tr('Successfully saved.'))
158
        QDialog.reject(self)
159

    
160
    def rotateImage(self, isCounterClock):
161
        """rotate the image"""
162

    
163
        transform = QTransform()
164
        if isCounterClock:
165
            '''CounterClock'''
166
            self.angle = (self.angle - 90) % 360
167
            transform.rotate(-90)
168
        else:
169
            '''Clock'''
170
            self.angle = (self.angle - 270) % 360
171
            transform.rotate(90)
172

    
173
        self.graphicsView.clearImage()
174
        self.image = self.image.transformed(transform)
175
        self.graphicsView.setImage(self.image)
176

    
177
        for text_info in self.textInfoList:
178
            rect = QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH())
179
            rect = transform.mapRect(rect)
180
            text_info.setX(self.image.width() + rect.left() if rect.left() < 0 else rect.left())
181
            text_info.setY(self.image.height() - max(abs(rect.top()), abs(rect.bottom())) if rect.top() < 0 else rect.top())
182
            text_info.setW(rect.width())
183
            text_info.setH(rect.height())
184

    
185
            self.graphicsView.scene.addRect(QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH()),
186
                                            QPen(Qt.red, 1, Qt.SolidLine))
187

    
188
    '''
189
        @history 2018.04.26 Jeongwoo    Add Rectangle with modified Coords
190
                 2018.06.20 Jeongwoo    Remove test code
191
                 2018.11.08 euisung     add white char list check process on db
192
                 2018.11.22 euisung     OCR lang apply fixed
193
    '''
194
    def detect_text(self):
195
        import cv2
196
        from TextDetector import TextDetector
197
        from TextInfo import TextInfo
198

    
199
        try:
200
            buffer = QBuffer()
201
            buffer.open(QBuffer.ReadWrite)
202
            self.image.save(buffer, "PNG")
203
            pyImage = Image.open(io.BytesIO(buffer.data()))
204
            img = np.array(pyImage)
205
            if len(img.shape[::-1]) == 2:
206
                img_width, img_height = img.shape[::-1]
207
            else:
208
                _, img_width, img_height = img.shape[::-1]
209

    
210
            app_doc_data = AppDocData.instance()
211

    
212
            ocr_data = self.ui.comboBoxOCRData.currentText()
213
            white_char_list = app_doc_data.getConfigs('Text Recognition', 'White Character List')
214
            if self._format == QOcrResultDialog.Format.Normal:
215
                self.textInfoList = TOCR.getTextInfo(img, (0, 0), 0, language=ocr_data,
216
                                                     conf=white_char_list[0].value if white_char_list else '')
217

    
218
                if self.textInfoList:
219
                    self.ui.detectResultTextEdit.setText(self.getPlainText(self.textInfoList))
220
                    self.display_text_rect()
221

    
222
                    self.copy_horizontal()
223
                else:
224
                    self.ui.detectResultTextEdit.setText(self.tr("Not Found"))
225
            else:
226
                cv_image = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 200, 255, cv2.THRESH_BINARY)[1]
227
                # contours 추출
228
                contours, _ = cv2.findContours(cv_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
229
                text_info_list = []
230
                for contour in contours:
231
                    [x, y, w, h] = cv2.boundingRect(contour)
232
                    text_info_list.append(TextInfo('', x, y, w, h, 0))
233

    
234
                text_info_list = [text_info for text_info in text_info_list if
235
                                  not any([_text_info for _text_info in text_info_list if _text_info is not text_info
236
                                           and text_info.contains(_text_info.center)])]
237

    
238
                detector = TextDetector()
239
                detector.recognizeText(img, (0, 0), text_info_list, None, None, None, None, onlyTextArea=True)
240
                self.textInfoList = detector.textInfoList.copy()
241
                self.textInfoList.sort(key=lambda x: x.getY())
242

    
243

    
244
                index = 0
245
                for text_info in text_info_list:
246
                    cropped = img[text_info.getY():text_info.getY() + text_info.getH(), text_info.getX():text_info.getX() + text_info.getW()]
247
                    cv2.imwrite(f"c:\\temp\\ocr-{index}.png", cropped)
248
                    index = index + 1
249

    
250
                if self.textInfoList:
251
                    self.ui.detectResultTextEdit.setText(self.getPlainText(self.textInfoList))
252
                    self.display_text_rect()
253

    
254
                    self.copy_horizontal()
255
                else:
256
                    self.ui.detectResultTextEdit.setText(self.tr("Not Found"))
257

    
258
        except Exception as ex:
259
            from App import App
260
            message = 'error occurred({}) in {}:{}'.format(repr(ex), sys.exc_info()[-1].tb_frame.f_code.co_filename,
261
                                                           sys.exc_info()[-1].tb_lineno)
262
            App.mainWnd().addMessage.emit(MessageType.Error, message)
263

    
264
    def getPlainText(self, textInfoList):
265
        text = ''
266
        for index in range(len(textInfoList)):
267
            textInfo = textInfoList[index]
268
            if index != 0:
269
                text = text + '\n'
270
            text = text + textInfo.getText()
271
        return text
272

    
273
    '''
274
        @brief      OK Button Clicked. Remake TextInfo object
275
        @author     Jeongwoo
276
        @date       18.04.19
277
        @history    18.04.20    Jeongwoo    Calculate Start Point Coordinates by rotated angle
278
                    18.04.26    Jeongwoo    Scene.itemAt(textX - boundBox.x(), textY - boundBox.y())
279
    '''
280

    
281
    def accept(self):
282
        from TextInfo import TextInfo
283
        self.isAccepted = True
284

    
285
        try:
286
            text = self.ui.detectResultTextEdit.toPlainText()
287
            if text == '' or text == 'Not Found':
288
                QMessageBox.about(self.ui.ocrDialogButtonBox, self.tr('Notice'),
289
                                  self.tr('Please try again after recognition or type.'))
290
                return
291

    
292
            isSplit = self.ui.checkBoxSeperate.isChecked()
293
            if isSplit:
294
                splitText = text.split('\n')
295
            else:
296
                splitText = [text]
297

    
298
            # try to detect text if there is no result of detection or
299
            # count of text info list not match with count of split text
300
            if isSplit:
301
                if self.textInfoList and (len(self.textInfoList) == len(splitText)):
302
                    for index in range(len(self.textInfoList)):
303
                        self.textInfoList[index].setText(splitText[index])
304
                elif not self.textInfoList:
305
                    self.detect_text()
306
                    if len(self.textInfoList) == len(splitText):
307
                        for index in range(len(self.textInfoList)):
308
                            self.textInfoList[index].setText(splitText[index])
309
                    else:
310
                        self.textInfoList = self.getMergedTextInfo(text)
311
            elif len(self.textInfoList) > 1 or len(self.textInfoList) == 0:
312
                self.textInfoList = self.getMergedTextInfo(text)
313

    
314
            radian = round(math.radians(abs(self.angle)), 2)
315
            for idx in range(len(self.textInfoList)):
316
                text_info = self.textInfoList[idx]
317
                # update text using user input text
318
                if idx < len(splitText):
319
                    text_info.setText(splitText[idx])
320
                # up to here
321

    
322
                if radian == 1.57 or radian == 4.71:
323
                    text_info.setAngle(radian)  # 360 degree == 6.28319 radian
324

    
325
                    # rotate text information
326
                    trans = QTransform()
327
                    trans.rotate(self.angle*-1)
328
                    rect = QRectF(text_info.getX(), text_info.getY(), text_info.getW(), text_info.getH())
329
                    rect = trans.mapRect(rect)
330
                    width, height = self.image.height(), self.image.width()
331
                    x = width + rect.left() if rect.left() < 0 else rect.left()
332
                    y = height - max(abs(rect.top()), abs(rect.bottom())) if rect.top() < 0 else rect.top()
333
                    text_info.setX(self.boundingBox.x() + x)
334
                    text_info.setY(self.boundingBox.y() + y)
335
                    text_info.setW(rect.width())
336
                    text_info.setH(rect.height())
337
                    # up to here
338
                else:
339
                    text_info.setX(int(self.boundingBox.x()) + text_info.getX())
340
                    text_info.setY(int(self.boundingBox.y()) + text_info.getY())
341

    
342
            QDialog.accept(self)
343

    
344
        except Exception as ex:
345
            from App import App
346
            message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename,
347
                                                           sys.exc_info()[-1].tb_lineno)
348
            App.mainWnd().addMessage.emit(MessageType.Error, message)
349

    
350
    def getMergedTextInfo(self, text):
351
        import cv2
352
        from TextInfo import TextInfo
353

    
354
        buffer = QBuffer()
355
        buffer.open(QBuffer.ReadWrite)
356
        self.image.save(buffer, "PNG")
357
        pyImage = Image.open(io.BytesIO(buffer.data()))
358
        img = np.array(pyImage)
359

    
360
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
361
        imgNot = np.ones(img.shape, np.uint8)
362
        cv2.bitwise_not(img, imgNot)
363
        imgNot = cv2.dilate(imgNot, np.ones((8, 8), np.uint8))
364

    
365
        contours, hierarchy = cv2.findContours(imgNot, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
366
        minX, minY, maxX, maxY = sys.maxsize, sys.maxsize, 0, 0
367
        if len(contours) is 0:
368
            minX, minY, maxX, maxY = 0, 0, self.image.width(), self.image.height()
369
        else:
370
            minX, minY, maxX, maxY = sys.maxsize, sys.maxsize, 0, 0
371
            for cnt in contours:
372
                x, y, w, h = cv2.boundingRect(cnt)
373
                minX = min(x, minX)
374
                minY = min(y, minY)
375
                maxX = max(x + w, maxX)
376
                maxY = max(y + h, maxY)
377
            minX, minY, maxX, maxY = minX, minY, maxX, maxY
378

    
379
        return [TextInfo(text, minX, minY, maxX - minX, maxY - minY, 0)]
380

    
381
    def reject(self):
382
        self.isAccepted = False
383
        self.textInfoList = None
384
        QDialog.reject(self)
385

    
386
    def copy_horizontal(self):
387
        import io, csv
388

    
389
        try:
390
            table = [[text for text in self.ui.detectResultTextEdit.toPlainText().split('\n')]]
391
            stream = io.StringIO()
392
            csv.writer(stream, delimiter='\t').writerows(table)
393
            QApplication.clipboard().setText(stream.getvalue())
394

    
395
        except Exception as ex:
396
            from App import App 
397
            from AppDocData import MessageType
398

    
399
            message = 'error occurred({}) in {}:{}'.format(ex, sys.exc_info()[-1].tb_frame.f_code.co_filename, sys.exc_info()[-1].tb_lineno)
400
            App.mainWnd().addMessage.emit(MessageType.Error, message)
401

    
402
    '''
403
        @brief  Display this QDialog
404
    '''
405

    
406
    def showDialog(self):
407
        # self.setWindowFlags(self.windowFlags() & ~Qt.WindowContextHelpButtonHint)
408
        res = self.exec_()
409
        return res, self.textInfoList
클립보드 이미지 추가 (최대 크기: 500 MB)