Sunday, 17 February 2019

OpenCV Python - Fixing Broken Text

I am attempting to repair broken text (the images below) so that I can perform OCR on the images. How do I go about repairing the text below? I have already tried dilation, erosion, morphology closing, and using the distance between contours. None of these seem to work. I would appreciate any help, thanks.

Broken Text:

enter image description here

enter image description here

enter image description here

Attempted Solutions (none work):

def OCR (img):
    config = ('-l eng --oem 1 --psm 3')
    text = pytesseract.image_to_string(img, config = config)
    return text

def find_if_close(cnt1,cnt2, threshold):
    row1,row2 = cnt1.shape[0],cnt2.shape[0]
    for i in range(row1):
        for j in range(row2):
            dist = np.linalg.norm(cnt1[i]-cnt2[j])
            if abs(dist) < threshold :
                return True
            elif i==row1-1 and j==row2-1:
                return False

def get_countour(img):
        imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        ret, thresh = cv2.threshold(imgray, 127, 255, 0)
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        letters = []
        for contour1 in contours:
            letter = []
            for contour2 in contours:
                if find_if_close(contour1,contour2,5):
                    letter.append(contour1)
                    letter.append(contour2)
            cv2.drawContours(img, letter, -1, (0,255,0),3)
            break


def strengthen(img):
    #imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #ret, thresh = cv2.threshold(imgray,0,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    k = np.ones((1,20), np.uint8)
    dilated = cv2.dilate(img,k,1)
    eroded = cv2.erode(dilated,k,1)

    #closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, k)
    return eroded


closed = strengthen(img)
contours = get_countour(img)
print(OCR(img))
cv2.imshow('img', img)
cv2.imshow('strengthened', closed)


cv2.waitKey(0)
cv2.destroyAllWindows()



from OpenCV Python - Fixing Broken Text

No comments:

Post a Comment