My exercise is to train 10 perceptrons to recognize numbers (0 - 9). Each perceptron should learn a single digit. As training data, I've created 30 images (5x7 bmp). 3 variants per digit.
I've got a perceptron class:
import numpy as np
def unit_step_func(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class Perceptron:
def __init__(self, learning_rate=0.01, n_iters=1000):
self.lr = learning_rate
self.n_iters = n_iters
self.activation_func = unit_step_func
self.weights = None
self.bias = None
#self.best_weights = None
#self.best_bias = None
#self.best_error = float('inf')
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
#self.best_weights = self.weights.copy()
#self.best_bias = self.bias
for _ in range(self.n_iters):
for x_i, y_i in zip(X, y):
linear_output = np.dot(x_i, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
update = self.lr * (y_i - y_predicted)
self.weights += update * x_i
self.bias += update
#current_error = np.mean(np.abs(y - self.predict(X)))
#if current_error < self.best_error:
# self.best_weights = self.weights.copy()
# self.best_bias = self.bias
# self.best_error = current_error
def predict(self, X):
linear_output = np.dot(X, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
return y_predicted
I've tried both, unit_step_func
and sigmoid
, activation functions, and pocketing algorithm to see if there's any difference. I'm a noob, so I'm not sure if this is even implemented correctly.
This is how I train these perceptrons:
import numpy as np
from PIL import Image
from Perceptron import Perceptron
import os
def load_images_from_folder(folder, digit):
images = []
labels = []
for filename in os.listdir(folder):
img = Image.open(os.path.join(folder, filename))
if img is not None:
images.append(np.array(img).flatten())
label = 1 if filename.startswith(f"{digit}_") else 0
labels.append(label)
return np.array(images), np.array(labels)
digits_to_recognize = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
perceptrons = []
for digit_to_recognize in digits_to_recognize:
X, y = load_images_from_folder("data", digit_to_recognize)
p = Perceptron()
p.fit(X, y)
perceptrons.append(p)
in short:
training data filename is in the format digit
_variant
. As I said before, each digit has 3 variants,
so for digit 0
it is 0_0
, 0_1
, 0_2
,
for digit 1
it's: 1_0
, 1_1
, 1_2
,
and so on...
load_images_from_folder
function loads 30 images and checks the name. If digit
part of the name is the same as digit
input then it appends 1
in labels, so that the perceptron knows that it's the desired digit.
I know that it'd be better to load these images once and save them in some array of tuples
, for example, but I don't care about the performance right now (I won't care later either).
for digit 0
labels array is [1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
for digit 1
labels array is [0,0,0, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
and so on...
then I train 10 perceptrons using this data.
This exercise also requires to have some kind of GUI that allows me to draw a number. I've choosen pygame
, I could use pyQT
, it actually does not matter.
This is the code, you can skip it, it's not that important (except for on_rec_button
function, but I'll address on it):
import pygame
import sys
pygame.init()
cols, rows = 5, 7
square_size = 50
width, height = cols * square_size, (rows + 2) * square_size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Zad1")
rec_button_color = (0, 255, 0)
rec_button_rect = pygame.Rect(0, rows * square_size, width, square_size)
clear_button_color = (255, 255, 0)
clear_button_rect = pygame.Rect(0, (rows + 1) * square_size + 1, width, square_size)
mouse_pressed = False
drawing_matrix = np.zeros((rows, cols), dtype=int)
def color_square(x, y):
col = x // square_size
row = y // square_size
if 0 <= row < rows and 0 <= col < cols:
drawing_matrix[row, col] = 1
def draw_button(color, rect):
pygame.draw.rect(screen, color, rect)
def on_rec_button():
np_array_representation = drawing_matrix.flatten()
for digit_to_recognize in digits_to_recognize:
p = perceptrons[digit_to_recognize]
predicted_number = p.predict(np_array_representation)
if predicted_number == digit_to_recognize:
print(f"Image has been recognized as number {digit_to_recognize}")
def on_clear_button():
drawing_matrix.fill(0)
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3:
mouse_pressed = True
elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
mouse_pressed = False
elif event.type == pygame.MOUSEMOTION:
mouse_x, mouse_y = event.pos
if mouse_pressed:
color_square(mouse_x, mouse_y)
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:
if rec_button_rect.collidepoint(event.pos):
on_rec_button()
if clear_button_rect.collidepoint(event.pos):
on_clear_button()
for i in range(rows):
for j in range(cols):
if drawing_matrix[i, j] == 1:
pygame.draw.rect(screen, (255, 0, 0), (j * square_size, i * square_size, square_size, square_size))
else:
pygame.draw.rect(screen, (0, 0, 0), (j * square_size, i * square_size, square_size, square_size))
draw_button(rec_button_color, rec_button_rect)
draw_button(clear_button_color, clear_button_rect)
pygame.display.flip()
so, now that I run the app, draw the digit 3
, and click the green button that runs on_rec_button
function, I expected to see Image has been recognized as number 3
, but I get Image has been recognized as number 0
.
This is what I draw:
These are training data:
These are very small because of the resolution 5x7
that was required in the exercise.
When I draw the digit 1
then I get 2 results: Image has been recognized as number 0
Image has been recognized as number 1
What should I do to make it work the way I want? I don't expect this to work 100% accurate but I guess it could be better.
from How can I fix my perceptron to recognize numbers?
No comments:
Post a Comment