개정판 2cc84fc6
Azure OCR로 텍스트 영역 추출 후 Tesseract OCR로 해당 영역 재검출 코드 적용(해당 코드 삭제 희망 시 주석처리된 부분 주석 해제)
DTI_PID/DTI_PID/DTI_PID.py | ||
---|---|---|
695 | 695 |
@author jwkim |
696 | 696 |
@date |
697 | 697 |
@history humkyung 2018.04.06 check if file exists |
698 |
Jeongwoo 2018.05.09 Use Tesseract OCR after Azure OCR (Azure OCR : Getting text area) |
|
698 | 699 |
''' |
699 | 700 |
def initMainSrc(mainRes): |
700 | 701 |
global src |
... | ... | |
721 | 722 |
area = AppDocData.instance().getArea('Drawing') |
722 | 723 |
if area is not None: |
723 | 724 |
area.img = srcGray[int(area.y):int(area.y+area.height), int(area.x):int(area.x+area.width)] |
724 |
|
|
725 |
(_tempOcrSrc, textInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray, area.x if area is not None else 0, area.y if area is not None else 0) |
|
726 |
|
|
725 |
|
|
726 |
#(_tempOcrSrc, textInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray, area.x if area is not None else 0, area.y if area is not None else 0) |
|
727 |
(_tempOcrSrc, tInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray, area.x if area is not None else 0, area.y if area is not None else 0) |
|
728 |
|
|
727 | 729 |
global MIN_TEXT_SIZE |
728 |
for textInfo in textInfoList: |
|
729 |
if textInfo.getW() >= MIN_TEXT_SIZE or textInfo.getH() >= MIN_TEXT_SIZE: |
|
730 |
ocrCompletedSrc = removeText(ocrCompletedSrc, textInfo.getText(), textInfo.getX(), textInfo.getY(), textInfo.getW(), textInfo.getH()) |
|
730 |
for tInfo in tInfoList: |
|
731 |
if tInfo.getW() >= MIN_TEXT_SIZE or tInfo.getH() >= MIN_TEXT_SIZE: |
|
732 |
resultTextInfo = TOCR.getTextInfo(ocrCompletedSrc[tInfo.getY():tInfo.getY()+tInfo.getH(),tInfo.getX():tInfo.getX()+tInfo.getW()], (tInfo.getX(), tInfo.getY())) |
|
733 |
if resultTextInfo is not None: |
|
734 |
textInfoList.extend(resultTextInfo) |
|
735 |
ocrCompletedSrc = removeText(ocrCompletedSrc, resultTextInfo[0].getText(), resultTextInfo[0].getX(), resultTextInfo[0].getY(), resultTextInfo[0].getW(), resultTextInfo[0].getH()) |
|
736 |
else: |
|
737 |
print(tInfo.getText()) |
|
738 |
#global MIN_TEXT_SIZE |
|
739 |
#for textInfo in textInfoList: |
|
740 |
# if textInfo.getW() >= MIN_TEXT_SIZE or textInfo.getH() >= MIN_TEXT_SIZE: |
|
741 |
# ocrCompletedSrc = removeText(ocrCompletedSrc, textInfo.getText(), textInfo.getX(), textInfo.getY(), textInfo.getW(), textInfo.getH()) |
|
731 | 742 |
|
732 | 743 |
noteArea = AppDocData.instance().getArea('Note') |
733 | 744 |
if noteArea is not None: |
... | ... | |
743 | 754 |
@date |
744 | 755 |
@history humkyung 2018.04.06 change error display from message box to print |
745 | 756 |
Jeongwoo 2018.04.25 Remove 'Current Symbol : ' QListItem |
757 |
Jeongwoo 2018.05.09 Make Comments OCR.removeTextFromNpArray block |
|
746 | 758 |
''' |
747 | 759 |
def executeRecognition(path, listWidget): |
748 | 760 |
global src |
... | ... | |
803 | 815 |
#threadLock.release() |
804 | 816 |
pool.shutdown(wait = True) |
805 | 817 |
|
806 |
area = AppDocData.instance().getArea('Drawing') |
|
807 |
(_tempOcrSrc, tInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray, area.x if area is not None else 0, area.y if area is not None else 0) |
|
808 |
#(srcGray, tInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray) |
|
809 |
if area is not None: |
|
810 |
srcGray[int(area.y):int(area.y+area.height), int(area.x):int(area.x+area.width)] = _tempOcrSrc |
|
811 |
else: |
|
812 |
srcGray = _tempOcrSrc |
|
813 |
#srcGray = TOCR.removeTextFromNpArray(srcGray, TOCR.FLAG_IMAGE_TO_DATA) |
|
818 |
####area = AppDocData.instance().getArea('Drawing')
|
|
819 |
####(_tempOcrSrc, tInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray, area.x if area is not None else 0, area.y if area is not None else 0)
|
|
820 |
#####(srcGray, tInfoList) = OCR.removeTextFromNpArray(area.img if area is not None else srcGray)
|
|
821 |
####if area is not None:
|
|
822 |
#### srcGray[int(area.y):int(area.y+area.height), int(area.x):int(area.x+area.width)] = _tempOcrSrc
|
|
823 |
####else:
|
|
824 |
#### srcGray = _tempOcrSrc
|
|
825 |
####srcGray = TOCR.removeTextFromNpArray(srcGray, TOCR.FLAG_IMAGE_TO_DATA)
|
|
814 | 826 |
global MIN_TEXT_SIZE |
815 | 827 |
for textInfo in textInfoList: |
816 | 828 |
#if not checkTextInSymbol((textInfo.getX(), textInfo.getY())): |
내보내기 Unified diff