Wednesday, 23 June 2021

Use Tensorflow Profiler with custom training loops and Tensorboard

I have a custom training loop that I'm trying to profile to debug some memory issues. I'm also using a file writer to capture training information to then display in Tensorboard:

@tf.function
def train_step(self, optimizer, data, train_loss_object):
    with tf.GradientTape() as tape:
        audio_data, transcrs, audio_lengths, label_lengths = data
        logits = self.model(audio_data, training=True)
        ctc_loss = self.loss_function(logits, transcrs, audio_lengths, label_lengths)
            
    grads = tape.gradient(ctc_loss, self.model.trainable_variables)
    optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
    train_loss_object(ctc_loss)

def train_full(self, train_dataset, num_epochs = 5, batch_size = 8):

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_dir = 'logs/' + current_time + '/train'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)

    optimizer = tf.optimizers.Adam()

    train_loss_object = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)

    start = time.time()
    with tf.profiler.experimental.Profile('logs/'):
        for epoch in range(num_epochs):
            for data in train_dataset:
                self.train_step(optimizer, data, train_loss_object)

            with train_summary_writer.as_default():
                tf.summary.scalar('ctc_loss', train_loss_object.result(), step=epoch)
                
            stop = time.time()
            print(f'Epoch {epoch}, Train Loss: {train_loss_object.result()}, Time elapsed: {stop-start} seconds')

            train_loss_object.reset_states()

This gives me a folder called logs that contains two folders, one folder with the training losses and another folder plugins/profiler containing the .pb profiling files.

When I point TensorBoard at the logs folder using tensorboard --logdir logs it displays the scalars fine, but when I navigate to the dropdown menu and click on PROFILE nothing is displayed. What am I missing?



from Use Tensorflow Profiler with custom training loops and Tensorboard

No comments:

Post a Comment