Monday, 5 October 2020

How to use cross validation in keras classifier

I was practicing the keras classification for imbalanced data. I followed the official example:

https://keras.io/examples/structured_data/imbalanced_classification/

and used the scikit-learn api to do cross-validation. I have tried the model with different parameter. However, all the times one of the 3 folds has value 0.

eg.

results [0.99242424 0.99236641 0.        ]

What am I doing wrong? How to get ALL THREE validation recall values of order "0.8"?

MWE

%%time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

import os
import random
SEED = 100
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

# load the data
ifile = "https://github.com/bhishanpdl/Datasets/blob/master/Projects/Fraud_detection/raw/creditcard.csv.zip?raw=true"
df = pd.read_csv(ifile,compression='zip')

# train test split
target = 'Class'
Xtrain,Xtest,ytrain,ytest = train_test_split(df.drop([target],axis=1),
    df[target],test_size=0.2,stratify=df[target],random_state=SEED)

print(f"Xtrain shape: {Xtrain.shape}")
print(f"ytrain shape: {ytrain.shape}")


# build the model
def build_fn(n_feats):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(256, activation="relu", input_shape=(n_feats,)))
    model.add(keras.layers.Dense(256, activation="relu"))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(256, activation="relu"))
    model.add(keras.layers.Dropout(0.3))

    # last layer is dense 1 for binary sigmoid
    model.add(keras.layers.Dense(1, activation="sigmoid"))

    # compile
    model.compile(loss='binary_crossentropy',
                optimizer=keras.optimizers.Adam(1e-2),
                metrics=['Recall'])

    return model

# fitting the model
n_feats      = Xtrain.shape[-1]
counts = np.bincount(ytrain)
weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
class_weight = {0: weight_for_0, 1: weight_for_1}
FIT_PARAMS   = {'class_weight' : class_weight}

clf_keras = KerasClassifier(build_fn=build_fn,
                            n_feats=n_feats, # custom argument
                            epochs=30,
                            batch_size=2048,
                            verbose=2)

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=SEED)
results = cross_val_score(clf_keras, Xtrain, ytrain,
                          cv=skf,
                          scoring='recall',
                          fit_params = FIT_PARAMS,
                          n_jobs = -1,
                          error_score='raise'
                          )

print('results', results)

Result

Xtrain shape: (227845, 30)
ytrain shape: (227845,)
results [0.99242424 0.99236641 0.        ]
CPU times: user 3.62 s, sys: 117 ms, total: 3.74 s
Wall time: 5min 15s

Problem

I am getting the third recall as 0. I am expecting it of the order 0.8, how to make sure all three values are around 0.8 or more?



from How to use cross validation in keras classifier

No comments:

Post a Comment