Wednesday 6 December 2023

How can I fix my perceptron to recognize numbers?

My exercise is to train 10 perceptrons to recognize numbers (0 - 9). Each perceptron should learn a single digit. As training data, I've created 30 images (5x7 bmp). 3 variants per digit.

I've got a perceptron class:

import numpy as np


def unit_step_func(x):
    return np.where(x > 0, 1, 0)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


class Perceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.activation_func = unit_step_func
        self.weights = None
        self.bias = None
        #self.best_weights = None
        #self.best_bias = None
        #self.best_error = float('inf')

    def fit(self, X, y):
        n_samples, n_features = X.shape

        self.weights = np.zeros(n_features)
        self.bias = 0

        #self.best_weights = self.weights.copy()
        #self.best_bias = self.bias

        for _ in range(self.n_iters):
            for x_i, y_i in zip(X, y):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation_func(linear_output)

                update = self.lr * (y_i - y_predicted)
                self.weights += update * x_i
                self.bias += update

            #current_error = np.mean(np.abs(y - self.predict(X)))
            #if current_error < self.best_error:
            #    self.best_weights = self.weights.copy()
            #    self.best_bias = self.bias
            #    self.best_error = current_error

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation_func(linear_output)
        return y_predicted

I've tried both, unit_step_func and sigmoid, activation functions, and pocketing algorithm to see if there's any difference. I'm a noob, so I'm not sure if this is even implemented correctly.

This is how I train these perceptrons:

import numpy as np
from PIL import Image
from Perceptron import Perceptron
import os

def load_images_from_folder(folder, digit):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img is not None:
            images.append(np.array(img).flatten())
            label = 1 if filename.startswith(f"{digit}_") else 0
            labels.append(label)
    return np.array(images), np.array(labels)


digits_to_recognize = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

perceptrons = []
for digit_to_recognize in digits_to_recognize:
    X, y = load_images_from_folder("data", digit_to_recognize)
    p = Perceptron()
    p.fit(X, y)
    perceptrons.append(p)

in short:

training data filename is in the format digit_variant. As I said before, each digit has 3 variants,

so for digit 0 it is 0_0, 0_1, 0_2,

for digit 1 it's: 1_0, 1_1, 1_2,

and so on...

load_images_from_folder function loads 30 images and checks the name. If digit part of the name is the same as digit input then it appends 1 in labels, so that the perceptron knows that it's the desired digit.

I know that it'd be better to load these images once and save them in some array of tuples, for example, but I don't care about the performance right now (I won't care later either).

for digit 0 labels array is [1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

for digit 1 labels array is [0,0,0, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

and so on...

then I train 10 perceptrons using this data.

This exercise also requires to have some kind of GUI that allows me to draw a number. I've choosen pygame, I could use pyQT, it actually does not matter.

This is the code, you can skip it, it's not that important (except for on_rec_button function, but I'll address on it):

import pygame
import sys

pygame.init()

cols, rows = 5, 7
square_size = 50
width, height = cols * square_size, (rows + 2) * square_size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Zad1")

rec_button_color = (0, 255, 0)
rec_button_rect = pygame.Rect(0, rows * square_size, width, square_size)

clear_button_color = (255, 255, 0)
clear_button_rect = pygame.Rect(0, (rows + 1) * square_size + 1, width, square_size)

mouse_pressed = False

drawing_matrix = np.zeros((rows, cols), dtype=int)


def color_square(x, y):
    col = x // square_size
    row = y // square_size

    if 0 <= row < rows and 0 <= col < cols:
        drawing_matrix[row, col] = 1


def draw_button(color, rect):
    pygame.draw.rect(screen, color, rect)


def on_rec_button():
    np_array_representation = drawing_matrix.flatten()

    for digit_to_recognize in digits_to_recognize:
        p = perceptrons[digit_to_recognize]
        predicted_number = p.predict(np_array_representation)
        if predicted_number == digit_to_recognize:
            print(f"Image has been recognized as number {digit_to_recognize}")


def on_clear_button():
    drawing_matrix.fill(0)


while True:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()

        elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3:
            mouse_pressed = True

        elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
            mouse_pressed = False

        elif event.type == pygame.MOUSEMOTION:
            mouse_x, mouse_y = event.pos
            if mouse_pressed:
                color_square(mouse_x, mouse_y)

        elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:
            if rec_button_rect.collidepoint(event.pos):
                on_rec_button()
            if clear_button_rect.collidepoint(event.pos):
                on_clear_button()

    for i in range(rows):
        for j in range(cols):
            if drawing_matrix[i, j] == 1:
                pygame.draw.rect(screen, (255, 0, 0), (j * square_size, i * square_size, square_size, square_size))
            else:
                pygame.draw.rect(screen, (0, 0, 0), (j * square_size, i * square_size, square_size, square_size))

    draw_button(rec_button_color, rec_button_rect)
    draw_button(clear_button_color, clear_button_rect)

    pygame.display.flip()

so, now that I run the app, draw the digit 3, and click the green button that runs on_rec_button function, I expected to see Image has been recognized as number 3, but I get Image has been recognized as number 0.

This is what I draw:

enter image description here

These are training data:

enter image description here enter image description here enter image description here

These are very small because of the resolution 5x7 that was required in the exercise.

When I draw the digit 1 then I get 2 results: Image has been recognized as number 0 Image has been recognized as number 1

enter image description here

What should I do to make it work the way I want? I don't expect this to work 100% accurate but I guess it could be better.



from How can I fix my perceptron to recognize numbers?

No comments:

Post a Comment