First note: I am creating a recommender system. This should later suggest articles to the user that they might also like.
I'm in the process of creating a confusion matrix. Unfortunately I am not able to make it. I'm getting an error. I have attached an example below, unfortunately I don't know how to rebuild my code.
- How do I create a confusion matrix based on my existing data?
How do I have to rebuild it to get such a "nice" confusion matrix like in the example?
Dataframe:
d = {'purchaseid': [0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9],
'itemid': [ 3, 8, 2, 10, 3, 10, 4, 12, 3, 12, 3, 4, 8, 6, 3, 0, 5, 12, 9, 9, 13, 1, 7, 11, 11]}
df = pd.DataFrame(data=d)
purchaseid itemid
0 0 3
1 0 8
2 0 2
3 1 10
4 2 3
... ... ...
Code:
PERCENTAGE_SPLIT = 20
NUM_NEGATIVES = 4
def splitter(df):
df_ = pd.DataFrame()
sum_purchase = df['purchaseid'].nunique()
amount = round((sum_purchase / 100) * PERCENTAGE_SPLIT)
random_list = random.sample(df['purchaseid'].unique().tolist(), amount)
df_ = df.loc[df['purchaseid'].isin(random_list)]
df_reduced = df.loc[~df['purchaseid'].isin(random_list)]
return [df_reduced, df_]
def generate_matrix(df_main, dataframe, name):
mat = sp.dok_matrix((df_main.shape[0], len(df_main['itemid'].unique())), dtype=np.float32)
for purchaseid, itemid in zip(dataframe['purchaseid'], dataframe['itemid']):
mat[purchaseid, itemid] = 1.0
return mat
dfs = splitter(df)
df_tr = dfs[0].copy(deep=True)
df_val = dfs[1].copy(deep=True)
train_mat = generate_matrix(df, df_tr, 'train')
val_mat = generate_matrix(df, df_val, 'val')
num_users, num_items = train_mat.shape
def get_train_samples(train_mat, num_negatives):
user_input, item_input, labels = [], [], []
num_user, num_item = train_mat.shape
for (u, i) in train_mat.keys():
user_input.append(u)
item_input.append(i)
labels.append(1)
# negative instances
for t in range(num_negatives):
j = np.random.randint(num_item)
while (u, j) in train_mat.keys():
j = np.random.randint(num_item)
user_input.append(u)
item_input.append(j)
labels.append(0)
return user_input, item_input, labels
user_input, item_input, labels = get_train_samples(train_mat, NUM_NEGATIVES)
val_user_input, val_item_input, val_labels = get_train_samples(val_mat, NUM_NEGATIVES)
hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
validation_data=([np.array(val_user_input), np.array(val_item_input)], np.array(val_labels)))
from sklearn.metrics import classification_report
x_train = user_input, item_input
y_train = labels
x_test = val_user_input, val_item_input
y_test = val_labels
y_pred = model.predict(([np.array(val_user_input), np.array(val_item_input)], np.array(val_labels)), batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_bool))
Example:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01).fit(X_train, y_train)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
title = ("Confusion matrix, without normalization")
disp = plot_confusion_matrix(classifier, X_test, y_test, display_labels=class_names, cmap=plt.cm.Blues,)
disp.ax_.set_title(title)
print(title)
print(disp.confusion_matrix)
plt.show()
Try:
import seaborn as sns
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, roc_curve, auc
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)
[OUT] ValueError: Classification metrics can't handle a mix of binary and continuous targets
EDIT:
I'm using the NCF model
The architecture of a Neural Collaborative Filtering model. Taken from the Neural Collaborative Filtering paper.
# full NCF model
def get_model(num_users, num_items, latent_dim=8, dense_layers=[64, 32, 16, 8],
reg_layers=[0, 0, 0, 0], reg_mf=0):
# input layer
input_user = Input(shape=(1,), dtype='int32', name='user_input')
input_item = Input(shape=(1,), dtype='int32', name='item_input')
# embedding layer
mf_user_embedding = Embedding(input_dim=num_users, output_dim=latent_dim,
name='mf_user_embedding',
embeddings_initializer='RandomNormal',
embeddings_regularizer=l2(reg_mf), input_length=1)
mf_item_embedding = Embedding(input_dim=num_items, output_dim=latent_dim,
name='mf_item_embedding',
embeddings_initializer='RandomNormal',
embeddings_regularizer=l2(reg_mf), input_length=1)
mlp_user_embedding = Embedding(input_dim=num_users, output_dim=int(dense_layers[0]/2),
name='mlp_user_embedding',
embeddings_initializer='RandomNormal',
embeddings_regularizer=l2(reg_layers[0]),
input_length=1)
mlp_item_embedding = Embedding(input_dim=num_items, output_dim=int(dense_layers[0]/2),
name='mlp_item_embedding',
embeddings_initializer='RandomNormal',
embeddings_regularizer=l2(reg_layers[0]),
input_length=1)
# MF latent vector
mf_user_latent = Flatten()(mf_user_embedding(input_user))
mf_item_latent = Flatten()(mf_item_embedding(input_item))
mf_cat_latent = Multiply()([mf_user_latent, mf_item_latent])
# MLP latent vector
mlp_user_latent = Flatten()(mlp_user_embedding(input_user))
mlp_item_latent = Flatten()(mlp_item_embedding(input_item))
mlp_cat_latent = Concatenate()([mlp_user_latent, mlp_item_latent])
mlp_vector = mlp_cat_latent
# build dense layer for model
for i in range(1,len(dense_layers)):
layer = Dense(dense_layers[i],
activity_regularizer=l2(reg_layers[i]),
activation='relu',
name='layer%d' % i)
mlp_vector = layer(mlp_vector)
predict_layer = Concatenate()([mf_cat_latent, mlp_vector])
result = Dense(1, activation='sigmoid',
kernel_initializer='lecun_uniform',name='result')
model = Model(inputs=[input_user,input_item], outputs=result(predict_layer))
return model
from Create a confusion matrix
No comments:
Post a Comment