I am trying to implement a custom modified ReLU in Tensorflow 1, in which I use two learnable parameters. But the parameters are not getting learnt even after running 1000 training steps, as suggested by printing their values before and after training. I have observed that inside the function, when I do not split x, i.e. execute the commented lines, then the coefficients are learnt. Could anyone suggest why splitting the input results in the trainable coefficients not being learnt and how this can be resolved?
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
def weight_variable(shape,vari_name):
initial = tf.truncated_normal(shape, stddev=0.1,dtype=tf.float32)
return tf.Variable(initial,name = vari_name)
def init_Prelu_coefficient(var1, var2):
coeff = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
coeff1 = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
return tf.Variable(coeff, trainable=True, name=var1), tf.Variable(coeff1, trainable=True, name=var2)
def Prelu(x, coeff, coeff1):
s = int(x.shape[-1])
sop = x[:,:,:,:s//2]*coeff+x[:,:,:,s//2:]*coeff1
sop1 = x[:,:,:,:s//2]*coeff-x[:,:,:,s//2:]*coeff1
copied_variable = tf.concat([sop, sop1], axis=-1)
copied_variable = tf.math.maximum(copied_variable,0)/copied_variable
# copied_variable = tf.identity(x)
# copied_variable = tf.math.maximum(copied_variable*coeff+copied_variable*coeff1,0)/copied_variable
# copied_variable = tf.multiply(copied_variable,x)
return copied_variable
def conv2d_dilate(x, W, dilate_rate):
return tf.nn.convolution(x, W,padding='VALID',dilation_rate = [1,dilate_rate])
matr = np.random.rand(1, 60, 40, 8)
target = np.random.rand(1, 58, 36, 8)
def learning(sess):
# define placeholder for inputs to network
Input = tf.placeholder(tf.float32, [1, 60, 40, 8])
input_Target = tf.placeholder(tf.float32, [1, 58, 36, 8])
kernel = weight_variable([3, 3, 8, 8],'G1')
coeff, coeff1 = init_Prelu_coefficient('alpha', 'alpha1')
conv = Prelu(conv2d_dilate(Input, kernel , 2), coeff, coeff1)
error_norm = 1*tf.norm(input_Target - conv)
print("MOMENTUM LEARNING")
train_step = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9,use_nesterov=False).minimize(error_norm)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
print("INIT coefficient ", sess.run(coeff), sess.run(coeff1))
init_var = tf.trainable_variables()
error_prev = 1 # initial error, we set 1 and it began to decrease.
for i in range(1000):
sess.run(train_step, feed_dict={Input: matr, input_Target: target})
if i % 100 == 0:
error_now=sess.run(error_norm,feed_dict={Input : matr, input_Target: target})
print('The',i,'th iteration gives an error',error_now)
error = sess.run(error_norm,feed_dict={Input: matr, input_Target: target})
print(sess.run(kernel))
print("LEARNT coefficient ", sess.run(coeff), sess.run(coeff1))
sess = tf.Session()
learning(sess)
from Tensorflow : Trainable variable not getting learnt
No comments:
Post a Comment