My task is to detect videos that contain violence. My dataset includes 2000 videos in 2 categories of violent videos or nonviolent videos. This is the code that creates the model and loads the weight:
# Main repo: https://github.com/eazydammy/violence-detection-with-C3D
import skvideo.io
import tensorflow.keras.backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv3D, MaxPooling3D, ZeroPadding3D
from sports1M_utils import preprocess_input, decode_predictions
import cv2
import matplotlib.pyplot as plt
WEIGHTS_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/sports1M_weights_tf.h5'
def C3D(weights='sports1M'):
"""
Keyword arguments:
weights -- weights to load into model. (default is sports1M)
Returns:
A Keras model.
"""
# if weights not in {'sports1M', None}:
# raise ValueError('weights should be either be sports1M or None')
if K.image_data_format() == 'channels_last':
shape = (16, 112, 112, 3)
else:
shape = (3, 16, 112, 112)
model = Sequential()
model.add(Conv3D(64, 3, activation='relu', padding='same', name='conv1', input_shape=shape))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='same', name='pool1'))
model.add(Conv3D(128, 3, activation='relu', padding='same', name='conv2'))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='pool2'))
model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3a'))
model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3b'))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='pool3'))
model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4a'))
model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4b'))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='pool4'))
model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5a'))
model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5b'))
model.add(ZeroPadding3D(padding=(0, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='pool5'))
model.add(Flatten())
model.add(Dense(4096, activation='relu', name='fc6'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu', name='fc7'))
model.add(Dropout(0.5))
model.add(Dense(487, activation='softmax', name='fc8')) # Would not count
model.load_weights(weights)
fc7 = model.get_layer('fc7').output
classifier = Dense(512, activation='relu', name='classifier_512')(fc7)
classifier = Dropout(0.3)(classifier)
classifier = Dense(1, activation='sigmoid', name='binary_classifier')(classifier)
model = Model(inputs=model.input, outputs=classifier)
for layer in model.layers[:-3]:
layer.trainable = False
return model
if __name__ == '__main__':
model = C3D(weights='weights/sports1M_weights_tf.h5')
print(model.summary())
vid_path = 'video/1.mp4'
vid = skvideo.io.vread(vid_path)
vid = vid[40:56]
vid = preprocess_input(vid)
preds = model.predict(vid)
print(decode_predictions(preds))
This is the structure of my dataset directories(training directory):
/ fight
/ folder 1
frame_1.png
frame_2.png
....
frame_16.png
/ folder 2
....
/ nonfight
/ folder 1
frame_1.png
frame_2.png
....
frame_16.png
/ folder 2
....
The same thing is applied to the validation dataset.
Now I used this code as data generator for training. The model input accepts (16, 16, 112, 112, 3) which stands for (batch_size, number_of_frames_in_seq, width, height, channels). I followed this instruction and adapted the same idea for loading and generating batches of 3D shaped arrays:
# Main repo: https://github.com/eazydammy/violence-detection-with-C3D
import cv2
from glob import glob
import random as rnd
from tensorflow import keras
from tensorflow.keras.utils import Sequence
import os
from pathlib import Path
import numpy as np
from tqdm import tqdm
from natsort import natsorted
class Meta:
npy_file = '../weights/c3d_mean.npy' # Address: https://github.com/adamcasson/c3d/releases/download/v0.1/c3d_mean.npy
class DataGeneratorV1(Sequence):
def __init__(self, path="train", batch_size=16, shuffle=False, dim=(16, 112, 112),
channels=3):
assert os.path.isdir(path), "directory does not exist!"
self.main_path = Path(path) # parent directory to fight/nonfight directories
self.dim = dim
self.channels = channels
self.batch_size = batch_size
self.shuffle = shuffle
# get all labels based on the dir names: -> fight | non-fight
self.labels = {} # keeps labels for sequences
self.data = {} # keeps key: addresses to images
self.n_classes = 2
# Resize based on repo inference code
self.width = 171
self.height = 128
fight_path = self.main_path / 'fight'
non_fight_path = self.main_path / 'nonfight'
train_fights = fight_path.glob(f'**') # All folders in fight subdirectory that keep 16 frames each
train_non_fights = non_fight_path.glob(f'**') # All folders in nonfight subdirectory that keep 16 frames each
train_fights = [(fight, 1) for fight in train_fights] # assign label 1 to fight sequences
train_non_fights = [(non_fight, 0) for non_fight in train_non_fights] # assign label 0 to nonfight sequences
del train_fights[0] # Contains the main directory which has to be removed.
del train_non_fights[0] # Contains the main directory which has to be removed.
data = train_fights + train_non_fights # Gathering all dataset as one
for id_, (sub_dir, label) in enumerate(tqdm(data, desc="Initializing V/NV")):
images = glob(f"{sub_dir.as_posix()}/*.jpeg")
images = natsorted(images)
self.data[id_] = images
self.labels[id_] = label
self.on_epoch_end()
def on_epoch_end(self):
self.keys = list(self.data.keys())
if self.shuffle:
rnd.shuffle(self.keys)
def __data_generation(self, list_IDs_temp):
X = np.empty((self.batch_size, *self.dim, self.channels)) # (16, 16, 112, 112, 3)
Y = np.empty((16, 1))
mean = np.load(str(Meta.npy_file))
# shape the batch of 3d arrays. | output of for loop => (16, 16, 112, 112, 3)
for i, item in enumerate(list_IDs_temp):
pack_16_frames = self.data[item]
frames = self.__read_images(pack_16_frames)
frames -= mean # preprocess (subtract mean)
frames = frames[:, 8: 120, 30: 142, :] # reshape it to (16, 112, 112, 3)
X[i] = frames
Y[i] = self.labels[item]
return X, keras.utils.to_categorical(Y, num_classes=self.n_classes)
def __read_images(self, pack_16_frames):
# images = []
frames = np.zeros((16, 128, 171, 3), dtype='float')
for i, id_ in enumerate(pack_16_frames):
img = cv2.imread(id_)
img = cv2.resize(img, (171, 128), interpolation=cv2.INTER_CUBIC)
frames[i, :, :, :] = img
return frames
def __len__(self):
return int(np.floor(len(self.data) / self.batch_size))
def __getitem__(self, index):
current = index * self.batch_size
after = (index + 1) * self.batch_size
keys = self.keys[current:after]
batch_x, batch_y = self.__data_generation(keys)
return batch_x, batch_y
if __name__ == '__main__':
path = '../data/train'
batch_size = 16
shuffle = True
data_gen = DataGeneratorV1(path=path, batch_size=batch_size, shuffle=shuffle)
a, b = data_gen[5]
print(b)
About the code above, I overrided tensorflow.keras.utils.Sequence
. It makes a dictionary that keeps a sequence of 16 frames for each key (__init__
). Then in each call for batch of data (__getitem__
), it reads images from disk (__data_generation
and __read_images
), reshape it in (16, 112, 112, 3)
after preprocess (__data_generation
). Then a batch of data is ready (__data_generation
).
The problem is that process of validation and training, starves for the data to get ready. This will make the training and validation process super slow.
Now I intend to rewrite the same framework with tf.data
which can do the same trick more efficiently, but I am not experienced with it.
I need a data loader that can handle both training and io using parallel processing. During training process, another thread can load the next batches of data to some queue and then stop after m batches of data are ready in queue.
would you help me with your ideas?
from How to use tensorflow.data to generate sequences of frames for C3D networks?
No comments:
Post a Comment