Saturday, 19 September 2020

Keras custom loss function per tensor group

I am writing a custom loss function that requires calculating ratios of predicted values per group. As a simplified example, here is what my Data and model code looks like:

def main():
    df = pd.DataFrame(columns=["feature_1", "feature_2", "condition_1", "condition_2", "label"],
                      data=[[5, 10, "a", "1", 0],
                            [30, 20, "a", "1", 1],
                            [50, 40, "a", "1", 0],
                            [15, 20, "a", "2", 0],
                            [25, 30, "b", "2", 1],
                            [35, 40, "b", "1", 0],
                            [10, 80, "b", "1", 1]])
    features = ["feature_1", "feature_2"]
    conds_and_label = ["condition_1", "condition_2", "label"]
    X = df[features]
    Y = df[conds_and_label]
    model = my_model(input_shape=len(features))
    model.fit(X, Y, epochs=10, batch_size=128)
    model.evaluate(X, Y)


def custom_loss(conditions, y_pred):  # this is what I need help with
    conds = ["condition_1", "condition_2"]
    conditions["label_pred"] = y_pred
    g = conditions.groupby(by=conds,
                           as_index=False).apply(lambda x: x["label_pred"].sum() /
                                                           len(x)).reset_index(name="pred_ratio")
    # true_ratios will be a constant, external DataFrame. Simplified example here:
    true_ratios = pd.DataFrame(columns=["condition_1", "condition_2", "true_ratio"],
                               data=[["a", "1", 0.1],
                                     ["a", "2", 0.2],
                                     ["b", "1", 0.8],
                                     ["b", "2", 0.9]])
    merged = pd.merge(g, true_ratios, on=conds)
    merged["diff"] = merged["pred_ratio"] - merged["true_ratio"]
    return K.mean(K.abs(merged["diff"]))


def joint_loss(conds_and_label, y_pred):
    y_true = conds_and_label[:, 2]
    conditions = tf.gather(conds_and_label, [0, 1], axis=1)
    loss_1 = standard_loss(y_true=y_true, y_pred=y_pred)  # not shown
    loss_2 = custom_loss(conditions=conditions, y_pred=y_pred)
    return 0.5 * loss_1 + 0.5 * loss_2


def my_model(input_shape=None):
    model = Sequential()
    model.add(Dense(units=2, activation="relu"), input_shape=(input_shape,))
    model.add(Dense(units=1, activation='sigmoid'))
    model.add(Flatten())
    model.compile(loss=joint_loss, optimizer="Adam",
                  metrics=[joint_loss, custom_loss, "accuracy"])
    return model

What I need help with is the custom_loss function. As you can see, it is currently written as if the inputs are Pandas DataFrames. However, the inputs will be Keras Tensors (with tensorflow backend), so I am trying to figure out how to convert the current code in custom_loss to use Keras/TF backend functions. For example, I searched online and couldn't find out a way to do a groupby in Keras/TF to get the ratios I need...

Some context/explanation that might be helpful to you:

  1. My main loss function is joint_loss, which consists of standard_loss (not shown) and custom_loss. But I only need help converting custom_loss.
  2. What custom_loss does is:
    1. Groupby on two condition columns (these two columns represent the groups of the data).
    2. Get the ratio of predicted 1s to total number of batch samples per each group.
    3. Compare the "pred_ratio" to a set of "true_ratio" and get the difference.
    4. Calculate mean absolute error from the differences.


from Keras custom loss function per tensor group

No comments:

Post a Comment