Hemant Vishwakarma: A customized Lasagne layer whose output is a matrix of the first layer indexed according to a second layer: layer1[np.arange(64),np.argmax(layer2)]

I need a lasagne layer, which takes two layers as inputs, and return a tensor of the first layer indexed using second layer vector, which is the result after performing argmax operation on it. Here's an example: the first layer (layer0) shape is (64,9,19,21), while the second layer (layer1) shape is (64,8). so the operation i want to perform is: Argmax_layer: performing an argmax operation on dimension(1) of layer1, which will return a 1-d array. then take argmax result from the previouse step to index the 2nd dimension of layer0, i.e., layer0[np.arange(64),Argmax_layer,:,:], so the output shape is (64,19,21).

Here's the implementation of the custom layer:

class ArgmaxLayer(lasagne.layers.MergeLayer):

def __init__(self, incomings, **kwargs):
    super(ArgmaxLayer, self).__init__(incomings=incomings, **kwargs)
    if len(incomings) != 2:
        raise ValueError("ArgmaxLayer requires two inputs.")
def get_output_for(self, inputs, **kwargs):
    # Sample z from Normal(z|mu, sigma) using the reparameterization trick.
    

    input0_layer = inputs[0]
    input1_layer = inputs[1]
    layer_argmax = np.argmax(input1_layer,axis=-1)
    result = input0_layer[np.arange(64),layer_argmax,:,:]
    
    return result
def get_output_shape_for(self, input_shapes):
    inshape1= input_shapes[0]
    return (inshape1[0], inshape1[2],inshape1[3])

Here's the NN:

def build_model():
    l_in_1 = lasagne.layers.InputLayer(shape=(None, 19, 21))
    
    l_in_2 = lasagne.layers.InputLayer(shape=(None, 9, 19, 21))
    l_reshape_a2 = lasagne.layers.ReshapeLayer(
        l_in_2, (batch_size, 9*19*21))
    l_12 = lasagne.layers.DenseLayer(
        l_reshape_a2, num_units=8, nonlinearity=lasagne.nonlinearities.softmax)

    
    l_ArgmaxLayer = ArgmaxLayer(incomings=[l_in_2,l_12])
    
    l_in_sal1 = lasagne.layers.ConcatLayer([l_in_1, l_ArgmaxLayer], axis=-1) 
    l_reshape_a = lasagne.layers.ReshapeLayer(
        l_in_sal1, (batch_size, 19*42))
    l_1 = lasagne.layers.DenseLayer(
        l_reshape_a, num_units=N_L1, nonlinearity=lasagne.nonlinearities.rectify)
    l_1_b = lasagne.layers.batch_norm(l_1)


    l_out = lasagne.layers.DenseLayer(
        l_1_b, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax)
        
    return l_in_1, l_in_2, l_out

Here's the NN summary:

| Layer | Layer_name                | output_shape         |  # parameters  |
_____________________________________________________________________________
|   0   | InputLayer                | (None, 19, 21)       |          0     |
|   1   | InputLayer                | (None, 9, 19, 21)    |          0     |
|   2   | ReshapeLayer              | (64, 3591)           |          0     |
|   3   | DenseLayer                | (64, 8)              |      28736     |
|   4   | ArgmaxLayer               | (None, 19, 21)       |      28736     |
|   5   | ConcatLayer               | (None, 19, 42)       |      28736     |
|   6   | ReshapeLayer              | (64, 798)            |      28736     |
|   7   | DenseLayer                | (64, 200)            |     188336     |
|   8   | BatchNormLayer            | (64, 200)            |     189136     |
|   9   | NonlinearityLayer         | (64, 200)            |     189136     |
|  10   | DenseLayer                | (64, 8)              |     190744     |

when I try to build the NN i recieve the following error:

DisconnectedInputError                    Traceback (most recent call last)
Cell In[1], line 2018
   2014     print()
   2017 if __name__ == '__main__':
-> 2018     main()

Cell In[1], line 464, in main()
    460 momentum = theano.shared(np.float32(clr_momentum))
    461 # print(" on_train_begin setting learning rate to %.8f: " % learning_rate.get_value())
    462 ##############################################################################################
--> 464 all_grads = T.grad(cost_tr, all_params)
    466 cut_norm = config.cut_grad
    467 updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True)

theano\gradient.py:589, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
    586 for elem in wrt:
    587     if elem not in var_to_app_to_idx and elem is not cost \
    588             and elem not in grad_dict:
--> 589         handle_disconnected(elem)
    590         grad_dict[elem] = disconnected_type()
    592 cost_name = None

theano\gradient.py:576, in grad.<locals>.handle_disconnected(var)
    574 elif disconnected_inputs == 'raise':
    575     message = utils.get_variable_trace_string(var)
--> 576     raise DisconnectedInputError(message)
    577 else:
    578     raise ValueError("Invalid value for keyword "
    579                      "'disconnected_inputs', valid values are "
    580                      "'ignore', 'warn' and 'raise'.")

DisconnectedInputError:  
Backtrace when that variable is created:

  File "C:\*\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "C:\*\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\*\AppData\Local\Temp\ipykernel_13860\1447824330.py", line 2018, in <module>
    main()
  File "C:\*\AppData\Local\Temp\ipykernel_13860\1447824330.py", line 257, in main
    l_in_1, l_in_2, l_out = config.build_model()
  File "E:\*.py", line 122, in build_model
    l_12 = lasagne.layers.DenseLayer(
  File "C:\*\lib\site-packages\lasagne\layers\dense.py", line 107, in __init__
    self.b = self.add_param(b, (num_units,), name="b",
  File "C:\*\lib\site-packages\lasagne\layers\base.py", line 234, in add_param
    param = utils.create_param(spec, shape, name)
  File "C:\*\lib\site-packages\lasagne\utils.py", line 393, in create_param
    spec = theano.shared(spec, broadcastable=bcast)

What is wrong with this implementation?

when I tried to passing all layers as lasagne.layers.InputLayer, the ArgmaxLayer works without any errors:

Here's an example:

class ArgmaxLayer(lasagne.layers.MergeLayer):

    def __init__(self, incomings, **kwargs):
        super(ArgmaxLayer, self).__init__(incomings, **kwargs)

        self.incomings = incomings
        if len(self.incomings) != 2:
            raise ValueError("ArgmaxLayer requires two inputs.")
    def get_output_for(self, inputs, **kwargs):
        input0_layer = inputs[0]
        input1_layer = inputs[1]
        layer_argmax = np.argmax(input1_layer[:,0:9],axis=-1)
        result = input0_layer[np.arange(64),layer_argmax,:,:]
        return result
    def get_output_shape_for(self, input_shapes):
        inshape1= input_shapes[0]
        return (inshape1[0], inshape1[2],inshape1[3])

    
def build_model():
    l_in_1 = lasagne.layers.InputLayer(shape=(None, 19, 21))
    l_in_2 = lasagne.layers.InputLayer(shape=(None, 9, 19, 21))
    l_reshape_a2 = lasagne.layers.ReshapeLayer(
        l_in_1, (batch_size, 19*21))
    l_ArgmaxLayer = ArgmaxLayer(incomings=[l_in_2,l_reshape_a2])
    l_reshape_a = lasagne.layers.ReshapeLayer(
        l_ArgmaxLayer, (batch_size, 19*21))
    l_1 = lasagne.layers.DenseLayer(
        l_reshape_a, num_units=N_L1, nonlinearity=lasagne.nonlinearities.rectify)
    l_1_b = lasagne.layers.batch_norm(l_1)
    l_out = lasagne.layers.DenseLayer(
        l_1_b, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax)
        
    return l_in_1, l_in_2, l_out

but in real scenario my second layer can be any intermediate layer not only InputLayer.

How this can be fixed?

from A customized Lasagne layer whose output is a matrix of the first layer indexed according to a second layer: layer1[np.arange(64),np.argmax(layer2)]

Hemant Vishwakarma

Wednesday, 11 October 2023

A customized Lasagne layer whose output is a matrix of the first layer indexed according to a second layer: layer1[np.arange(64),np.argmax(layer2)]

No comments:

Post a Comment