개정판 3ed85f79
dev issue #507 : fix text
Change-Id: If3c96639dbc60f9ae3ad11da2525749e8449b407
DTI_PID/DTI_PID/tesseract_ocr_module.py | ||
---|---|---|
103 | 103 |
boundaryOcrData = pytesseract.image_to_boxes(im, config=_conf, lang=oCRLang) |
104 | 104 |
bounding_boxes = boundaryOcrData.split('\n') |
105 | 105 |
merged_boxes = [] |
106 |
|
|
107 |
# before |
|
108 |
""" |
|
106 | 109 |
for box in bounding_boxes: |
107 | 110 |
if merged_boxes: |
108 | 111 |
tokens = box.split(' ') |
... | ... | |
130 | 133 |
maxx = int(tokens[3]) |
131 | 134 |
maxy = int(tokens[4]) |
132 | 135 |
merged_boxes.append(QRect(minx, miny, maxx - minx, maxy - miny)) |
133 |
|
|
136 |
""" |
|
137 |
|
|
138 |
for box in bounding_boxes: |
|
139 |
if merged_boxes: |
|
140 |
tokens = box.split(' ') |
|
141 |
if len(tokens) >= 5: |
|
142 |
minx = int(tokens[1]) |
|
143 |
miny = int(tokens[2]) |
|
144 |
maxx = int(tokens[3]) |
|
145 |
maxy = int(tokens[4]) |
|
146 |
|
|
147 |
findBox = None |
|
148 |
for merged_box in merged_boxes: |
|
149 |
bottom = merged_box.bottom() |
|
150 |
top = merged_box.top() |
|
151 |
if abs(miny - top) <= 1 or abs(maxy - bottom) <= 1: |
|
152 |
findBox = merged_box |
|
153 |
|
|
154 |
if findBox: |
|
155 |
findBox.setLeft(min(findBox.left(), minx)) |
|
156 |
findBox.setTop(min(findBox.top(), miny)) |
|
157 |
findBox.setRight(max(findBox.right(), maxx)) |
|
158 |
findBox.setBottom(max(findBox.bottom(), maxy)) |
|
159 |
else: |
|
160 |
merged_boxes.append(QRect(minx, miny, maxx - minx, maxy - miny)) |
|
161 |
|
|
162 |
else: |
|
163 |
tokens = box.split(' ') |
|
164 |
if len(tokens) >= 5: |
|
165 |
minx = int(tokens[1]) |
|
166 |
miny = int(tokens[2]) |
|
167 |
maxx = int(tokens[3]) |
|
168 |
maxy = int(tokens[4]) |
|
169 |
merged_boxes.append(QRect(minx, miny, maxx - minx, maxy - miny)) |
|
170 |
|
|
134 | 171 |
for rect in merged_boxes: |
135 | 172 |
if not rect.isValid() or \ |
136 | 173 |
rect.left() < 0 or rect.top() < 0 or rect.right() > imgWidth or rect.bottom() > imgHeight: continue |
내보내기 Unified diff