Tuesday, 24 November 2020

Create a confusion matrix

First note: I am creating a recommender system. This should later suggest articles to the user that they might also like.


I'm in the process of creating a confusion matrix. Unfortunately I am not able to make it. I'm getting an error. I have attached an example below, unfortunately I don't know how to rebuild my code.

  • How do I create a confusion matrix based on my existing data?

How do I have to rebuild it to get such a "nice" confusion matrix like in the example?

Dataframe:

d = {'purchaseid': [0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9],
     'itemid': [ 3, 8, 2, 10, 3, 10, 4, 12, 3, 12, 3, 4, 8, 6, 3, 0, 5, 12, 9, 9, 13, 1, 7, 11, 11]}
df = pd.DataFrame(data=d)

   purchaseid  itemid
0           0       3
1           0       8
2           0       2
3           1      10
4           2       3
...         ...    ...

Code:

PERCENTAGE_SPLIT = 20
    NUM_NEGATIVES = 4
    def splitter(df):
      df_ = pd.DataFrame()
      sum_purchase = df['purchaseid'].nunique()
      amount = round((sum_purchase / 100) * PERCENTAGE_SPLIT)
    
      random_list = random.sample(df['purchaseid'].unique().tolist(), amount)
      df_ = df.loc[df['purchaseid'].isin(random_list)]
      df_reduced = df.loc[~df['purchaseid'].isin(random_list)]
      return [df_reduced, df_]
    
    def generate_matrix(df_main, dataframe, name):
      
      mat = sp.dok_matrix((df_main.shape[0], len(df_main['itemid'].unique())), dtype=np.float32)
      for purchaseid, itemid in zip(dataframe['purchaseid'], dataframe['itemid']):
        mat[purchaseid, itemid] = 1.0
    
      return mat
    
dfs = splitter(df)
df_tr = dfs[0].copy(deep=True)
df_val = dfs[1].copy(deep=True)
    
train_mat = generate_matrix(df, df_tr, 'train')
val_mat = generate_matrix(df, df_val, 'val')
num_users, num_items = train_mat.shape


def get_train_samples(train_mat, num_negatives):
    user_input, item_input, labels = [], [], []
    num_user, num_item = train_mat.shape
    for (u, i) in train_mat.keys():
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # negative instances
        for t in range(num_negatives):
            j = np.random.randint(num_item)
            while (u, j) in train_mat.keys():
                j = np.random.randint(num_item)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

user_input, item_input, labels = get_train_samples(train_mat, NUM_NEGATIVES)
val_user_input, val_item_input, val_labels = get_train_samples(val_mat, NUM_NEGATIVES)

hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                 validation_data=([np.array(val_user_input), np.array(val_item_input)], np.array(val_labels)))

from sklearn.metrics import classification_report
x_train = user_input, item_input
y_train = labels
x_test = val_user_input, val_item_input
y_test = val_labels


y_pred = model.predict(([np.array(val_user_input), np.array(val_item_input)], np.array(val_labels)), batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_bool))

Example:

import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix

# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01).fit(X_train, y_train)

np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
title = ("Confusion matrix, without normalization")


disp = plot_confusion_matrix(classifier, X_test, y_test, display_labels=class_names, cmap=plt.cm.Blues,)
disp.ax_.set_title(title)
print(title)
print(disp.confusion_matrix)

plt.show()

enter image description here

Try:

import seaborn as sns
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, roc_curve, auc
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)

[OUT] ValueError: Classification metrics can't handle a mix of binary and continuous targets

EDIT:

I'm using the NCF model

enter image description here

The architecture of a Neural Collaborative Filtering model. Taken from the Neural Collaborative Filtering paper.

# full NCF model
def get_model(num_users, num_items, latent_dim=8, dense_layers=[64, 32, 16, 8],
              reg_layers=[0, 0, 0, 0], reg_mf=0):

    # input layer
    input_user = Input(shape=(1,), dtype='int32', name='user_input')
    input_item = Input(shape=(1,), dtype='int32', name='item_input')
    
    # embedding layer
    mf_user_embedding = Embedding(input_dim=num_users, output_dim=latent_dim,
                        name='mf_user_embedding',
                        embeddings_initializer='RandomNormal',
                        embeddings_regularizer=l2(reg_mf), input_length=1)
    mf_item_embedding = Embedding(input_dim=num_items, output_dim=latent_dim,
                        name='mf_item_embedding',
                        embeddings_initializer='RandomNormal',
                        embeddings_regularizer=l2(reg_mf), input_length=1)
    mlp_user_embedding = Embedding(input_dim=num_users, output_dim=int(dense_layers[0]/2),
                         name='mlp_user_embedding',
                         embeddings_initializer='RandomNormal',
                         embeddings_regularizer=l2(reg_layers[0]), 
                         input_length=1)
    mlp_item_embedding = Embedding(input_dim=num_items, output_dim=int(dense_layers[0]/2),
                         name='mlp_item_embedding',
                         embeddings_initializer='RandomNormal',
                         embeddings_regularizer=l2(reg_layers[0]), 
                         input_length=1)

    # MF latent vector
    mf_user_latent = Flatten()(mf_user_embedding(input_user))
    mf_item_latent = Flatten()(mf_item_embedding(input_item))
    mf_cat_latent = Multiply()([mf_user_latent, mf_item_latent])

    # MLP latent vector
    mlp_user_latent = Flatten()(mlp_user_embedding(input_user))
    mlp_item_latent = Flatten()(mlp_item_embedding(input_item))
    mlp_cat_latent = Concatenate()([mlp_user_latent, mlp_item_latent])
    
    mlp_vector = mlp_cat_latent
    
    # build dense layer for model
    for i in range(1,len(dense_layers)):
        layer = Dense(dense_layers[i],
                      activity_regularizer=l2(reg_layers[i]),
                      activation='relu',
                      name='layer%d' % i)
        mlp_vector = layer(mlp_vector)

    predict_layer = Concatenate()([mf_cat_latent, mlp_vector])
    result = Dense(1, activation='sigmoid', 
                   kernel_initializer='lecun_uniform',name='result')

    model = Model(inputs=[input_user,input_item], outputs=result(predict_layer))

    return model


from Create a confusion matrix

No comments:

Post a Comment