9 |
9 |
import TextInfo as ti
|
10 |
10 |
import re
|
11 |
11 |
import sys, os
|
|
12 |
from PyQt5.QtWidgets import *
|
12 |
13 |
try:
|
13 |
14 |
from PyQt5.QtCore import QRect
|
14 |
15 |
from PyQt5.QtGui import QTransform
|
... | ... | |
45 |
46 |
2018.06.14 Jeongwoo Add try-except. If exception occured, return None
|
46 |
47 |
2018.06.19 Jeongwoo Move text size check if-statement
|
47 |
48 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
|
|
49 |
2018.10.19 euisung verifing doesn't used
|
48 |
50 |
'''
|
49 |
51 |
def getTextInfoInSymbol(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
50 |
52 |
try:
|
... | ... | |
136 |
138 |
@date 2018.05.03
|
137 |
139 |
@history 2018.05.09 Jeongwoo Check split text' length
|
138 |
140 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch]
|
|
141 |
2018.10.19 euisung verifing doesn't used
|
139 |
142 |
'''
|
140 |
143 |
def getCharactersInfo(img, startPoint, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
141 |
144 |
docData = AppDocData.instance()
|
... | ... | |
181 |
184 |
2018.06.20 Jeongwoo Remove variable [lastCharHeight] / Change variable [cey], [ch] / Change method to calculate text line height
|
182 |
185 |
humkyung 2018.10.12 change logic to extract text which first get bounding box and then extract character
|
183 |
186 |
"""
|
184 |
|
def getTextInfo(img, startPoint, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
187 |
def getTextInfo(img, startPoint, form = None, angle = 0, flag = FLAG_IMAGE_TO_BOXES, conf = DEFAULT_CONF):
|
|
188 |
if form is not None:
|
|
189 |
docData = AppDocData.instance()
|
|
190 |
QMessageBox.question(form, 'tesseract', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe'), QMessageBox.Yes, QMessageBox.Cancel)
|
|
191 |
QMessageBox.question(form, 'tesseract', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Tesseract-OCR', 'tesseract.exe'), QMessageBox.Yes, QMessageBox.Cancel)
|
185 |
192 |
try:
|
186 |
193 |
textInfoList = []
|
187 |
194 |
|
... | ... | |
195 |
202 |
im = im.rotate(-angle, expand=True)
|
196 |
203 |
imgWidth = im.width
|
197 |
204 |
imgHeight = im.height
|
198 |
|
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng')
|
|
205 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=conf, lang='eng+seed')
|
199 |
206 |
bounding_boxes = boundaryOcrData.split('\n')
|
200 |
207 |
merged_boxes = []
|
201 |
208 |
for box in bounding_boxes:
|
... | ... | |
228 |
235 |
|
229 |
236 |
for rect in merged_boxes:
|
230 |
237 |
cropped = im.crop((rect.left(), imgHeight - rect.bottom(), rect.right(), imgHeight - rect.top()))
|
231 |
|
text = pytesseract.image_to_string(cropped, config=conf, lang='eng')
|
|
238 |
text = pytesseract.image_to_string(cropped, config=conf, lang='eng+seed')
|
232 |
239 |
|
233 |
240 |
if rect.height() >= minSize and rect.height() <= maxSize:
|
234 |
241 |
text_rect = QRect(rect.left(), imgHeight - rect.bottom(), rect.width(), rect.height())
|