the image:
the image is handwritten line of text this is extracting the text some what but not the expect same in the image
and the code is
import cv2
img = cv2.imread("a.jpg")
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img = cv2.resize(img,(6000,100))
_, result1 = cv2.threshold(img,100,255,cv2.THRESH_BINARY)
_, result2 = cv2.threshold(img,100,255,cv2.THRESH_BINARY_INV)
adaptive_result1 = cv2.adaptiveThreshold(result1,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,199,5)
adaptive_result2 = cv2.adaptiveThreshold(result1,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,199,5)
adaptive_result3 = cv2.adaptiveThreshold(result2,100,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,199,5)
adaptive_result4 = cv2.adaptiveThreshold(result2,256,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,999,1)
# cv2.imshow("title",result)
# cv2.waitKey(0)
import pytesseract
from PIL import Image
from pytesseract import Output
configs = r'--psm 6 --oem 3'
pytesseract.pytesseract.tesseract_cmd =r'C:/Users/ASUS/OneDrive/Desktop/ubuntu file/tesseract.exe'
data = pytesseract.image_to_data(img, config=configs, output_type=Output.DICT)
print(*data['text'])
data = pytesseract.image_to_data(result2, config=configs, output_type=Output.DICT)
print(*data['text'])
data = pytesseract.image_to_data(adaptive_result3, config=configs, output_type=Output.DICT)
print(*data['text'])
data = pytesseract.image_to_data(adaptive_result4, config=configs, output_type=Output.DICT)
print(*data['text'])
# data = pytesseract.image_to_data(img, config=configs, output_type=Output.DICT)
# print(*data['text'])
# print(data.keys())
The Output
buttered off before. there, twinkling like new Yar sytemy, hung cuter of tempting Uwitationx they beaged hi to contre the extera
from how to get text from handwriting image in python

No comments:
Post a Comment