Given input features as such, just raw numbers:
tensor([0.2153, 0.2190, 0.0685, 0.2127, 0.2145, 0.1260, 0.1480, 0.1483, 0.1489,
0.1400, 0.1906, 0.1876, 0.1900, 0.1925, 0.0149, 0.1857, 0.1871, 0.2715,
0.1887, 0.1804, 0.1656, 0.1665, 0.1137, 0.1668, 0.1168, 0.0278, 0.1170,
0.1189, 0.1163, 0.2337, 0.2319, 0.2315, 0.2325, 0.0519, 0.0594, 0.0603,
0.0586, 0.0067, 0.0624, 0.2691, 0.0617, 0.2790, 0.2805, 0.2848, 0.2454,
0.1268, 0.2483, 0.2454, 0.2475], device='cuda:0')
And the expected output is a single real number output, e.g.
tensor(-34.8500, device='cuda:0')
Full code on https://www.kaggle.com/alvations/pytorch-mlp-regression
I've tried creating a simple 2 layer network with:
class MLP(nn.Module):
def __init__(self, input_size, output_size, hidden_size):
super(MLP, self).__init__()
self.linear = nn.Linear(input_size, hidden_size)
self.classifier = nn.Linear(hidden_size, output_size)
def forward(self, inputs, hidden=None, dropout=0.5):
inputs = F.dropout(inputs, dropout) # Drop-in.
# First Layer.
output = F.relu(self.linear(inputs))
# Matrix manipulation magic.
batch_size, sequence_len, hidden_size = output.shape
# Technically, linear layer takes a 2-D matrix as input, so more manipulation...
output = output.contiguous().view(batch_size * sequence_len, hidden_size)
# Apply dropout.
output = F.dropout(output, dropout)
# Put it through the classifier
# And reshape it to [batch_size x sequence_len x vocab_size]
output = self.classifier(output).view(batch_size, sequence_len, -1)
return output
And training as such:
# Training routine.
def train(num_epochs, dataloader, valid_dataset, model, criterion, optimizer):
losses = []
valid_losses = []
learning_rates = []
plt.ion()
x_valid, y_valid = valid_dataset
for _e in range(num_epochs):
for batch in tqdm(dataloader):
# Zero gradient.
optimizer.zero_grad()
#print(batch)
this_x = torch.tensor(batch['x'].view(len(batch['x']), 1, -1)).to(device)
this_y = torch.tensor(batch['y'].view(len(batch['y']), 1, 1)).to(device)
# Feed forward.
output = model(this_x)
prediction, _ = torch.max(output, dim=1)
loss = criterion(prediction, this_y.view(len(batch['y']), -1))
loss.backward()
optimizer.step()
losses.append(torch.sqrt(loss.float()).data)
with torch.no_grad():
# Zero gradient.
optimizer.zero_grad()
output = model(x_valid.view(len(x_valid), 1, -1))
prediction, _ = torch.max(output, dim=1)
loss = criterion(prediction, y_valid.view(len(y_valid), -1))
valid_losses.append(torch.sqrt(loss.float()).data)
clear_output(wait=True)
plt.plot(losses, label='Train')
plt.plot(valid_losses, label='Valid')
plt.legend()
plt.pause(0.05)
Tuning several hyperparameters, it looks like the model doesn't train well, the validation loss doesn't move at all e.g.
hyperparams = Hyperparams(input_size=train_dataset.x.shape[1],
output_size=1,
hidden_size=150,
loss_func=nn.MSELoss,
learning_rate=1e-8,
optimizer=optim.Adam,
batch_size=500)
And it's loss curve:
Any idea what's wrong with the network?
Am I training the regression model with the wrong loss? Or I've just not yet found the right hyperparameters?
Or am I validating the model wrongly?
from Validation loss not moving with MLP in Regression
No comments:
Post a Comment