프로젝트

일반

사용자정보

개정판 227e6ded

ID227e6dedb0f40a13b86b597348b01bbe3e766cdc
상위 f6e96725
하위 80d6cd66

함의성이(가) 6년 이상 전에 추가함

build issue #655: tesseract path check

차이점 보기:

DTI_PID/DTI_PID/OcrResultDialog.py
134 134
            #cv2.destroyAllWindows()
135 135

  
136 136
            # up to here
137
            self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())))
137
            self.textInfoList = TOCR.getTextInfo(img, (round(self.boundingBox.x()), round(self.boundingBox.y())), self)
138 138

  
139 139
            #self.textInfoList = TOCR.getTextInfo(img, (int(self.boundingBox.x()), int(self.boundingBox.y())))
140 140
            if self.textInfoList is not None and len(self.textInfoList) > 0:
DTI_PID/DTI_PID/TrainingImageListDialog.py
277 277
        
278 278
        #unicharsetExtractorCmd = unicharset_extractor_cmd + ' ' + trainingBoxPath + " >> output.log"
279 279
        #subprocess.call(unicharsetExtractorCmd, shell = True)
280

  
280
        QMessageBox.question(self, 'tesseract', trainCmd, QMessageBox.Yes, QMessageBox.Cancel)
281
        
281 282
        os.system(trainCmd)
282 283
        #os.system(unicharsetExtractorCmd)
283 284

  
DTI_PID/DTI_PID/tesseract_ocr_module.py
9 9
import TextInfo as ti
10 10
import re
11 11
import sys, os
12
from PyQt5.QtWidgets import *
12 13
try:
13 14
    from PyQt5.QtCore import QRect
14 15
    from PyQt5.QtGui import QTransform
......
45 46
                2018.06.14  Jeongwoo    Add try-except. If exception occured, return None
46 47
                2018.06.19  Jeongwoo    Move text size check if-statement
47 48
                2018.06.20  Jeongwoo    Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
49
                2018.10.19 euisung verifing doesn't used
48 50
'''
49 51
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
50 52
    try:
......
136 138
    @date       2018.05.03
137 139
    @history    2018.05.09  Jeongwoo    Check split text' length
138 140
                2018.06.20  Jeongwoo    Remove variable [lastCharHeight] / Change variable [cey], [ch]
141
                2018.10.19 euisung verifing doesn't used
139 142
'''
140 143
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
141 144
    docData = AppDocData.instance()
......
181 184
                2018.06.20  Jeongwoo    Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
182 185
                humkyung 2018.10.12 change logic to extract text which first get bounding box and then extract character
183 186
"""
184
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
187
def getTextInfo(img, startPoint, form = None, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
188
    if form is not None:
189
        docData = AppDocData.instance()
190
        QMessageBox.question(form, 'tesseract', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe'), QMessageBox.Yes, QMessageBox.Cancel)
191
        QMessageBox.question(form, 'tesseract', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe'), QMessageBox.Yes, QMessageBox.Cancel)
185 192
    try:
186 193
        textInfoList = []
187 194

  
......
195 202
        im = im.rotate(-angle, expand=True)
196 203
        imgWidth = im.width
197 204
        imgHeight = im.height
198
        boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
205
        boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng+seed')
199 206
        bounding_boxes = boundaryOcrData.split('\n')
200 207
        merged_boxes = []
201 208
        for box in bounding_boxes:
......
228 235
        
229 236
        for rect in merged_boxes:
230 237
            cropped = im.crop((rect.left(), imgHeight - rect.bottom(), rect.right(), imgHeight - rect.top()))
231
            text = pytesseract.image_to_string(cropped, config=conf, lang='eng')
238
            text = pytesseract.image_to_string(cropped, config=conf, lang='eng+seed')
232 239

  
233 240
            if rect.height() >= minSize and rect.height() <= maxSize:
234 241
                text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height())

내보내기 Unified diff

클립보드 이미지 추가 (최대 크기: 500 MB)