I am attempting to repair broken text (the images below) so that I can perform OCR on the images. How do I go about repairing the text below? I have already tried dilation, erosion, morphology closing, and using the distance between contours. None of these seem to work. I would appreciate any help, thanks.
Broken Text:
Attempted Solutions (none work):
def OCR (img):
config = ('-l eng --oem 1 --psm 3')
text = pytesseract.image_to_string(img, config = config)
return text
def find_if_close(cnt1,cnt2, threshold):
row1,row2 = cnt1.shape[0],cnt2.shape[0]
for i in range(row1):
for j in range(row2):
dist = np.linalg.norm(cnt1[i]-cnt2[j])
if abs(dist) < threshold :
return True
elif i==row1-1 and j==row2-1:
return False
def get_countour(img):
imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
letters = []
for contour1 in contours:
letter = []
for contour2 in contours:
if find_if_close(contour1,contour2,5):
letter.append(contour1)
letter.append(contour2)
cv2.drawContours(img, letter, -1, (0,255,0),3)
break
def strengthen(img):
#imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#ret, thresh = cv2.threshold(imgray,0,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)
k = np.ones((1,20), np.uint8)
dilated = cv2.dilate(img,k,1)
eroded = cv2.erode(dilated,k,1)
#closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, k)
return eroded
closed = strengthen(img)
contours = get_countour(img)
print(OCR(img))
cv2.imshow('img', img)
cv2.imshow('strengthened', closed)
cv2.waitKey(0)
cv2.destroyAllWindows()
from OpenCV Python - Fixing Broken Text



No comments:
Post a Comment