I've been searching through posts trying to find ways of improving performance on the below script but can't seem to get anything to make much difference. Through tests I've managed to conclude that pytesseract.image_to_data
takes ~.2 seconds each time it's called. Does anyone see an obvious way of improving the speed? I would like to be able to retrieve all of the text information without cropping the image into 3 separate boxes but without doing that it always gets it wrong. Tried psm that recognizes different sized characters but yields poor results. I'm not at all experienced in this area so it's possible I'm just doing something very wrong. Any thoughts would be greatly appreciated.
def preprocess(frame, xy):
_frame = cv.pyrUp(frame[xy['y1']:xy['y2'], xy['x1']:xy['x2']])
_frame = cv.cvtColor(_frame, cv.COLOR_BGR2GRAY)
_frame = cv.GaussianBlur(_frame, (3, 3), 0)
_frame = cv.threshold(_frame, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)[1]
kernel = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
_frame = cv.morphologyEx(_frame, cv.MORPH_OPEN, kernel, iterations=1)
_frame = 255 - _frame
return np.dstack([_frame, _frame, _frame])
def to_data(frame):
img_data = pytesseract.image_to_data(frame, config='--oem 3 --psm 7', output_type=Output.DICT)
return img_data.get('text', [-1])[-1]
def run(file_name, network, interval, show):
file = pathlib.Path(PLAYBACK_PATH.format(str(file_name)))
cap = cv.VideoCapture(str(file), 0)
fps = int(cap.get(cv.CAP_PROP_FPS))
dataset = []
ix = 0
while True:
ret, frame = cap.read()
if frame is None:
break
if ix % (fps * interval) == 0:
row = []
for lookup, xy in NETWORKS[network].items():
_frame = preprocess(frame, xy)
_data = to_data(_frame)
row.append(_data)
if show:
cv.imshow('frame', _frame)
cv.waitKey(1)
row.append(ix)
dataset.append(row)
else:
pass
ix += 1
cap.release()
cv.destroyAllWindows()
return dataset
from Retrieve text from video frames via pytesseract
No comments:
Post a Comment