Following a review post, I constructed this dataset for binary classification that contains Fashion MNIST T-shirt vs. Shirt.
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
# generate the indices
idx_digit_train = np.argwhere((y_train == 0) | (y_train == 6)).flatten()
idx_digit_test = np.argwhere((y_test == 0) | (y_test == 6)).flatten()
# construct the training set
y_train_mnist = y_train[idx_digit_train].reshape(-1,1)
x_train_mnist = x_train[idx_digit_train].reshape(-1,28*28)
# construct the test set
y_test_mnist = y_test[idx_digit_test].reshape(-1,1)
x_test_mnist = x_test[idx_digit_test].reshape(-1,28*28)
x_train_mnist = x_train_mnist/255.
x_test_mnist = x_test_mnist/255.
y_train_mnist[y_train_mnist==6]=1
y_test_mnist[y_test_mnist==6]=1
y_train_mnist = np.array(y_train_mnist, dtype=np.float32)
y_test_mnist = np.array(y_test_mnist, dtype=np.float32)
Adapted from another review post, I built a 2 layer neural network from scratch with NumPy.
import numpy as np
def sigmoid(z):
return 1 / (1 + np.exp(-z))
class MLPClassifier:
def __init__(self, eta=.05, n_epoch=10, n_0=2, n_1=2, n_2=1,
model_w1=[], model_b1=[], model_w2=[], model_b2=[]):
self.eta = eta
self.n_epoch = n_epoch
self.model_w1 = model_w1
self.model_b1 = model_b1
self.model_w2 = model_w2
self.model_b2 = model_b2
self.n_1 = n_1
self.n_2 = n_2
def initialize_params(self, n_0, n_1, n_2):
if len(self.model_w1) == 0:
self.model_w1 = np.random.random(size=(n_0, n_1))
if len(self.model_w2) == 0:
self.model_w2 = np.random.random(size=(n_1, n_2))
if len(self.model_b1) == 0:
self.model_b1 = np.random.random(size=(1, n_1))
if len(self.model_b2) == 0:
self.model_b2 = np.random.random(size=(1, n_2))
def predict(self, x):
_, a2 = self.forward_propagation(x)
yhat = a2 >= 0.5
return 1*yhat
def forward_propagation(self, x):
z1 = np.dot(x, self.model_w1) + self.model_b1
a1 = sigmoid(z1)
z2 = np.dot(a1, self.model_w2) + self.model_b2
a2 = sigmoid(z2)
return a1, a2
def backward_propagation(self, x, y, a1, a2):
m = len(x)
n_1 = self.n_1
n_2 = self.n_2
a1 = a1.reshape(m, -1)
dz2 = a2 - y
dw2 = np.dot(a1.T, dz2)/m
dw2 = dw2.reshape(n_1, n_2)
db2 = np.mean(dz2, keepdims = True)
dz1 = np.dot(dz2, self.model_w2.T) * (a1*(1-a1))
dw1 = np.dot(x.T, dz1)/m
db1 = np.mean(dz1, axis=0)
return dw2, db2, dw1, db1
def update_params(self, dw2, db2, dw1, db1):
self.model_w2 -= self.eta * dw2
self.model_b2 -= self.eta * db2
self.model_w1 -= self.eta * dw1
self.model_b1 -= self.eta * db1
def fit(self, x, y, verbose=False):
n_0 = x.shape[-1]
n_1 = self.n_1
n_2 = self.n_2
self.initialize_params(n_0, n_1, n_2)
for i in range(self.n_epoch):
a1, a2 = self.forward_propagation(x)
dw2, db2, dw1, db1 = self.backward_propagation(x, y, a1, a2)
self.update_params(dw2, db2, dw1, db1)
Per the first post, a Logistic Regression model can achieve
train accuracy: 0.828583
However, when I trained my handcraft model 1, 2, though 9 epochs and then even 9999 epochs, it just achieve 0.5 accuracy on both train and test sets.
classifier = MLPClassifier(.1, 9999)
classifier.fit(x_train_mnist, y_train_mnist, verbose=False)
acc = np.count_nonzero(np.squeeze(classifier.predict(x_train_mnist)) == np.squeeze(y_train_mnist))
I also tried a range of numbers for hidden units, none of them worked better.
acc_list=[]
for i in np.arange(2, 99):
for j in range(9):
classifier = MLPClassifier(.1, j, n_1=i)
classifier.fit(x_train_mnist, y_train_mnist, verbose=False)
acc = np.count_nonzero(np.squeeze(classifier.predict(x_train_mnist)) == np.squeeze(y_train_mnist))
acc_list.append(acc)
In contrast to that, the tensorflow version
model = tf.keras.Sequential([
tf.keras.layers.Dense(2, activation=tf.nn.sigmoid),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(loss=tf.keras.losses.binary_crossentropy,
optimizer='sgd',
metrics=['accuracy'])
model.fit(x_train_mnist, y_train_mnist, epochs=20, verbose=1)
hit 0.85 after 30 Epochs
Epoch 30/50 375/375 [==============================] - 1s 1ms/step - loss: 0.3326 - accuracy: 0.8525
What am I missing?
How do I improve my 2-layer handcraft model without adding more layers?
from Why does my 2-layer neural network achieve just 50% accuracy for binary classification?
No comments:
Post a Comment