Hemant Vishwakarma: Retrieve text from video frames via pytesseract

Sunday, 2 July 2023

Retrieve text from video frames via pytesseract

I've been searching through posts trying to find ways of improving performance on the below script but can't seem to get anything to make much difference. Through tests I've managed to conclude that pytesseract.image_to_data takes ~.2 seconds each time it's called. Does anyone see an obvious way of improving the speed? I would like to be able to retrieve all of the text information without cropping the image into 3 separate boxes but without doing that it always gets it wrong. Tried psm that recognizes different sized characters but yields poor results. I'm not at all experienced in this area so it's possible I'm just doing something very wrong. Any thoughts would be greatly appreciated.

def preprocess(frame, xy):
    _frame = cv.pyrUp(frame[xy['y1']:xy['y2'], xy['x1']:xy['x2']])
    _frame = cv.cvtColor(_frame, cv.COLOR_BGR2GRAY)
    _frame = cv.GaussianBlur(_frame, (3, 3), 0)
    _frame = cv.threshold(_frame, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)[1]
    kernel = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
    _frame = cv.morphologyEx(_frame, cv.MORPH_OPEN, kernel, iterations=1)
    _frame = 255 - _frame
    return np.dstack([_frame, _frame, _frame])

def to_data(frame):
    img_data = pytesseract.image_to_data(frame, config='--oem 3 --psm 7', output_type=Output.DICT)
    return img_data.get('text', [-1])[-1]

def run(file_name, network, interval, show):
    file = pathlib.Path(PLAYBACK_PATH.format(str(file_name)))
    cap = cv.VideoCapture(str(file), 0)
    fps = int(cap.get(cv.CAP_PROP_FPS))

    dataset = []

    ix = 0
    while True:
        ret, frame = cap.read()
        if frame is None:
            break
        if ix % (fps * interval) == 0:
            row = []
            for lookup, xy in NETWORKS[network].items():
                _frame = preprocess(frame, xy)
                _data = to_data(_frame)
                row.append(_data)
                if show:
                    cv.imshow('frame', _frame)
                    cv.waitKey(1)

            row.append(ix)
            dataset.append(row)
        else:
            pass

        ix += 1

    cap.release()
    cv.destroyAllWindows()
    return dataset

from Retrieve text from video frames via pytesseract

Hemant Vishwakarma

Sunday, 2 July 2023

Retrieve text from video frames via pytesseract

No comments:

Post a Comment