Monday, 23 August 2021

Why does my 2-layer neural network achieve just 50% accuracy for binary classification?

Following a review post, I constructed this dataset for binary classification that contains Fashion MNIST T-shirt vs. Shirt.

import tensorflow as tf
import numpy as np

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# generate the indices
idx_digit_train = np.argwhere((y_train == 0) | (y_train == 6)).flatten()
idx_digit_test = np.argwhere((y_test == 0) | (y_test == 6)).flatten()

# construct the training set
y_train_mnist = y_train[idx_digit_train].reshape(-1,1)
x_train_mnist = x_train[idx_digit_train].reshape(-1,28*28)

# construct the test set
y_test_mnist = y_test[idx_digit_test].reshape(-1,1)
x_test_mnist = x_test[idx_digit_test].reshape(-1,28*28)

x_train_mnist = x_train_mnist/255.
x_test_mnist = x_test_mnist/255.

y_train_mnist[y_train_mnist==6]=1
y_test_mnist[y_test_mnist==6]=1

y_train_mnist = np.array(y_train_mnist, dtype=np.float32)
y_test_mnist = np.array(y_test_mnist, dtype=np.float32)

Adapted from another review post, I built a 2 layer neural network from scratch with NumPy.

import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

class MLPClassifier:
    def __init__(self, eta=.05, n_epoch=10, n_0=2, n_1=2, n_2=1,
          model_w1=[], model_b1=[], model_w2=[], model_b2=[]):
        self.eta = eta
        self.n_epoch = n_epoch
        self.model_w1 = model_w1
        self.model_b1 = model_b1
        self.model_w2 = model_w2
        self.model_b2 = model_b2
        self.n_1 = n_1
        self.n_2 = n_2
        
    def initialize_params(self, n_0, n_1, n_2):
        if len(self.model_w1) == 0:
            self.model_w1 = np.random.random(size=(n_0, n_1))
        if len(self.model_w2) == 0:
            self.model_w2 = np.random.random(size=(n_1, n_2))
        if len(self.model_b1) == 0:
            self.model_b1 = np.random.random(size=(1, n_1))
        if len(self.model_b2) == 0:
            self.model_b2 = np.random.random(size=(1, n_2))
        
    def predict(self, x):
        _, a2 = self.forward_propagation(x)
        yhat = a2 >= 0.5
        return 1*yhat

    def forward_propagation(self, x):
        z1 = np.dot(x, self.model_w1) + self.model_b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, self.model_w2) + self.model_b2
        a2 = sigmoid(z2)
        return a1, a2
    
    def backward_propagation(self, x, y, a1, a2):
        m = len(x)
        n_1 = self.n_1
        n_2 = self.n_2
        a1 = a1.reshape(m, -1)
        dz2 = a2 - y
        dw2 = np.dot(a1.T, dz2)/m
        dw2 = dw2.reshape(n_1, n_2)
        db2 = np.mean(dz2, keepdims = True)
        dz1 = np.dot(dz2, self.model_w2.T) * (a1*(1-a1))
        dw1 = np.dot(x.T, dz1)/m
        db1 = np.mean(dz1, axis=0)
        return dw2, db2, dw1, db1

    def update_params(self, dw2, db2, dw1, db1):
        self.model_w2 -= self.eta * dw2
        self.model_b2 -= self.eta * db2
        self.model_w1 -= self.eta * dw1
        self.model_b1 -= self.eta * db1

    def fit(self, x, y, verbose=False):
        n_0 = x.shape[-1]
        n_1 = self.n_1
        n_2 = self.n_2
        self.initialize_params(n_0, n_1, n_2)        
        for i in range(self.n_epoch):
            a1, a2 = self.forward_propagation(x)
            dw2, db2, dw1, db1 = self.backward_propagation(x, y, a1, a2)
            self.update_params(dw2, db2, dw1, db1)

Per the first post, a Logistic Regression model can achieve

train accuracy: 0.828583

However, when I trained my handcraft model 1, 2, though 9 epochs and then even 9999 epochs, it just achieve 0.5 accuracy on both train and test sets.

classifier = MLPClassifier(.1, 9999)
classifier.fit(x_train_mnist, y_train_mnist, verbose=False)
acc = np.count_nonzero(np.squeeze(classifier.predict(x_train_mnist)) == np.squeeze(y_train_mnist))

I also tried a range of numbers for hidden units, none of them worked better.

acc_list=[]
for i in np.arange(2, 99):
  for j in range(9):
    classifier = MLPClassifier(.1, j, n_1=i)
    classifier.fit(x_train_mnist, y_train_mnist, verbose=False)
    acc = np.count_nonzero(np.squeeze(classifier.predict(x_train_mnist)) == np.squeeze(y_train_mnist))
    acc_list.append(acc)

In contrast to that, the tensorflow version

model = tf.keras.Sequential([
  tf.keras.layers.Dense(2, activation=tf.nn.sigmoid),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

model.compile(loss=tf.keras.losses.binary_crossentropy,
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(x_train_mnist, y_train_mnist, epochs=20, verbose=1)

hit 0.85 after 30 Epochs

Epoch 30/50 375/375 [==============================] - 1s 1ms/step - loss: 0.3326 - accuracy: 0.8525

What am I missing?

How do I improve my 2-layer handcraft model without adding more layers?



from Why does my 2-layer neural network achieve just 50% accuracy for binary classification?

No comments:

Post a Comment