The objective is to remove uneven vertical and horizontal lines from an images using cv2 Python
.
Currently, I am using these two code block to remove the horizontal and vertical lines.
Remove vertical and horizontal line
nimg_v=gray.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 40))
detLines = cv2.morphologyEx(nimg_v, cv2.MORPH_OPEN, kernel, iterations=2) #
nimg_v[(detLines !=0)]=0
# Remove horizontal lines
nimg_h=nimg_v.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,30))
detLines = cv2.morphologyEx(nimg_h, cv2.MORPH_OPEN, kernel, iterations=1)
nimg_h[(detLines !=0)]=0
Despite adjusting the Size of the structuring element
, but I am still unable to remove most of the lines while maintaining the text N1
,DG
,and BJ
.
The full code is
import cv2
import numpy as np
from matplotlib import pyplot as plt
dpath='so_images/dummy_image.jpg'
im = cv2.imread(dpath)
# Rough estimation the starting point of text region
y,x=50,700
# Rough estimation where the end of text region
y_end, x_end=1500,1350
white_bg = 255*np.ones_like(im)
white_bg[y:y+(y_end-y), x:x+(x_end-x)] =im[y:y+(y_end-y), x:x+(x_end-x)]
gray=cv2.cvtColor(white_bg, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
dilate = cv2.dilate(gray, kernel, iterations = 2)
idx = (dilate==255)
gray[idx]=0
## Remove vertical and horizontal line
nimg_v=gray.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 40))
detLines = cv2.morphologyEx(nimg_v, cv2.MORPH_OPEN, kernel, iterations=2) #
nimg_v[(detLines !=0)]=0
# Remove horizontal lines
nimg_h=nimg_v.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,30))
detLines = cv2.morphologyEx(nimg_h, cv2.MORPH_OPEN, kernel, iterations=1)
nimg_h[(detLines !=0)]=0
img_sm=nimg_h.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2,2))
dilate = cv2.dilate(img_sm, kernel, iterations = 4)
img_sm[(dilate !=0)]=255
img_cont=img_sm.copy()
schunk_small=800
schunk_big=50000
cnts = cv2.findContours(img_cont, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if (area < schunk_small) | (area>schunk_big):
cv2.drawContours(img_cont, [c], -1, (0, 0, 0), -1)
plt.imshow(img_cont)
plt.show()
The original input image
Which produce
But, notice, there are still vertical and horizontal lines in the image. Most of these lines are not a perfect straight.
I intend to clear the all the lines and having an output, ideally something as below.
In addition, I also try using HoughLinesP
and filter based on the slope <1
as below. However, I am still unable to remove the lines.
edges = cv2.Laplacian(img_cont,cv2.CV_8UC1) # Laplacian Edge Detection
lines = cv2.HoughLinesP(
edges, # Input edge image
1, # Distance resolution in pixels
np.pi/180, # Angle resolution in radians
threshold=100, # Min number of votes for valid line
minLineLength=5, # Min allowed length of line
maxLineGap=10 # Max allowed gap between line for joining them
)
lines_list = []
for points in lines:
x1,y1,x2,y2=points[0]
slope = ((y2-y1) / (x2-x1)) if (x2-x1) != 0 else np.inf
if slope <= 1:
cv2.line(img_cont,(x1,y1),(x2,y2),(255,255,255),1)
Remark:
Since this is for OCR application, I am particularly interested to remove the vertical and horizontal lines below the text BJ
. This is because, the OCR system unable to recognize the text if there is horizontal lines below the BJ
The Google Colab notebook to run the cleaning can be found from this link
from Remove uneven vertical and horizontal lines from an image using cv2 Python
No comments:
Post a Comment