Friday, 9 April 2021

Using TimeseriesGenerator for large videos to extract frames

I want to train a many-to-many LSTM model to predict dance moves. I am using a relatively large video, and my PC cannot handle to extract all frames in the video. I created a custom class with moviepy to extract frames by using given frame number.

from moviepy.video.io.VideoFileClip import VideoFileClip
from matplotlib import pyplot as plt
from pathlib import Path
from math import ceil
import numpy as np
import time

class Video:
    def __init__(self,path,**kwargs):
        self.path       = path
        self.video      = VideoFileClip(str(path),**kwargs)
    
    def __repr__(self):
        duration = time.strftime('%H:%M:%S',time.gmtime(self.video.duration))
        return f"<{duration} - {self.path.name}>"

    def __len__(self):
        return ceil(self.video.duration*self.video.fps)

    def __getitem__(self,frame_num):
        frame   = self.video.get_frame(frame_num/self.video.fps)
        return frame
    
    def __iter__(self):
        for frame_num in range(self.__len__()):
            yield self.__getitem__(frame_num)

This custom class managed to extract single frames with given frame numbers.

PATH  = Path("data/HenryStickmin.mp4")
HENRY = Video(PATH, audio=False)
<00:59:54 - HenryStickmin.mp4>

frame_nums = np.random.randint(0, len(HENRY), 4)
plt.figure(figsize=(21,13))
for fig_num, frame_num in zip(range(5), frame_nums):
    plt.subplot(221 + fig_num)
    plt.imshow(HENRY[frame_num])
    plt.axis('off')
    plt.title(f'Frame No: {frame_num}', fontweight='bold')
plt.show()

enter image description here

My next goal was to create timeseries dataset but I got this error

import tensorflow as tf
fps  = 30
gen  = tf.keras.preprocessing.sequence.TimeseriesGenerator(HENRY, HENRY, fps * 2, sampling_rate=2, stride=fps)
X, y = gen[0]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-37-a7b22e584018> in <module>
----> 1 X, y = gen[0]

~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in __getitem__(self, index)
    370                                     self.stride, self.end_index + 1), self.stride)
    371 
--> 372         samples = np.array([self.data[row - self.length:row:self.sampling_rate]
    373                             for row in rows])
    374         targets = np.array([self.targets[row] for row in rows])

~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in <listcomp>(.0)
    370                                     self.stride, self.end_index + 1), self.stride)
    371 
--> 372         samples = np.array([self.data[row - self.length:row:self.sampling_rate]
    373                             for row in rows])
    374         targets = np.array([self.targets[row] for row in rows])

<ipython-input-2-40570a429d12> in __getitem__(self, frame_num)
     13 
     14     def __getitem__(self,frame_num):
---> 15         frame   = self.video.get_frame(frame_num/self.video.fps)
     16         return frame
     17 

TypeError: unsupported operand type(s) for /: 'slice' and 'float'

I wanted to train my model with 1 * FPS frame (1 seconds) to predict 1 * FPS frames (1 second), and expected to get something like this

X[0] = array(['frame[000]', 'frame[002]', 'frame[004]', 'frame[006]',
       'frame[008]', 'frame[010]', 'frame[012]', 'frame[014]',
       'frame[016]', 'frame[018]', 'frame[020]', 'frame[022]',
       'frame[024]', 'frame[026]', 'frame[028]', 'frame[030]',
       'frame[032]', 'frame[034]', 'frame[036]', 'frame[038]',
       'frame[040]', 'frame[042]', 'frame[044]', 'frame[046]',
       'frame[048]', 'frame[050]', 'frame[052]', 'frame[054]',
       'frame[056]', 'frame[058]'])

y[0] = array(['frame[060]', 'frame[062]', 'frame[064]', 'frame[066]',
       'frame[068]', 'frame[070]', 'frame[072]', 'frame[074]',
       'frame[076]', 'frame[078]', 'frame[080]', 'frame[082]',
       'frame[084]', 'frame[086]', 'frame[088]', 'frame[090]',
       'frame[092]', 'frame[094]', 'frame[096]', 'frame[098]',
       'frame[100]', 'frame[102]', 'frame[104]', 'frame[106]',
       'frame[108]', 'frame[110]', 'frame[112]', 'frame[114]',
       'frame[116]', 'frame[118]'])

How can I create a generator to extract (data, target) =(1 second, 1 second) frames from my video?



from Using TimeseriesGenerator for large videos to extract frames

No comments:

Post a Comment