I have a custom training loop that I'm trying to profile to debug some memory issues. I'm also using a file writer to capture training information to then display in Tensorboard:
@tf.function
def train_step(self, optimizer, data, train_loss_object):
with tf.GradientTape() as tape:
audio_data, transcrs, audio_lengths, label_lengths = data
logits = self.model(audio_data, training=True)
ctc_loss = self.loss_function(logits, transcrs, audio_lengths, label_lengths)
grads = tape.gradient(ctc_loss, self.model.trainable_variables)
optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
train_loss_object(ctc_loss)
def train_full(self, train_dataset, num_epochs = 5, batch_size = 8):
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/' + current_time + '/train'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
optimizer = tf.optimizers.Adam()
train_loss_object = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
start = time.time()
with tf.profiler.experimental.Profile('logs/'):
for epoch in range(num_epochs):
for data in train_dataset:
self.train_step(optimizer, data, train_loss_object)
with train_summary_writer.as_default():
tf.summary.scalar('ctc_loss', train_loss_object.result(), step=epoch)
stop = time.time()
print(f'Epoch {epoch}, Train Loss: {train_loss_object.result()}, Time elapsed: {stop-start} seconds')
train_loss_object.reset_states()
This gives me a folder called logs that contains two folders, one folder with the training losses and another folder plugins/profiler containing the .pb profiling files.
When I point TensorBoard at the logs folder using tensorboard --logdir logs it displays the scalars fine, but when I navigate to the dropdown menu and click on PROFILE nothing is displayed. What am I missing?
from Use Tensorflow Profiler with custom training loops and Tensorboard
No comments:
Post a Comment