There are quite a few questions about using nn.Linear with batched data, if I understand answers correctly the first dimension of the input tensor contains current batch items.
Taken this as base I tried to model parabola,like this:
import torch
from torch.utils.data import DataLoader, TensorDataset
# Create the dataset with N_SAMPLES samples
# N_SAMPLES, D_in, H, D_out = 10000, 1000, 100, 10
N_SAMPLES, D_in, H, D_out = 1000, 1, 10, 1
# x = torch.randn(N_SAMPLES, D_in)
x = torch.arange(0, N_SAMPLES, dtype=torch.float32).view(N_SAMPLES, -1)
# y = torch.randn(N_SAMPLES, D_out)
y = x ** 2
# Define the batch size and the number of epochs
BATCH_SIZE = 1
N_EPOCHS = 100
# Use torch.utils.data to create a DataLoader
# that will take care of creating batches
dataset = TensorDataset(x, y)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
# Define model, loss and optimizer
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ELU(),
torch.nn.Linear(H, H),
torch.nn.ELU(),
torch.nn.Linear(H, H),
torch.nn.ELU(),
torch.nn.Linear(H, H),
torch.nn.ELU(),
torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.HuberLoss()
optimizer = torch.optim.Adam(model.parameters(), 0.001)
# Get the dataset size for printing (it is equal to N_SAMPLES)
dataset_size = len(dataloader.dataset)
# Loop over epochs
for epoch in range(N_EPOCHS):
print(f"Epoch {epoch + 1}\n-------------------------------")
# Loop over batches in an epoch using DataLoader
for id_batch, (x_batch, y_batch) in enumerate(dataloader):
y_batch_pred = model(x_batch)
loss = loss_fn(y_batch_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Every 100 batches, print the loss for this batch
# as well as the number of examples processed so far
if id_batch % 100 == 0:
loss, current = loss.item(), (id_batch + 1)* len(x_batch)
print(f"loss: {loss:>7f} [{current:>5d}/{dataset_size:>5d}]")
with torch.no_grad():
model.eval()
predicted_y = []
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)
for batch, (X, y) in enumerate(dataloader):
predicted_y.extend(model(X.view(X.size()[0], -1)))
plot_array(predicted_y)
when BATCH_SIZE set to 1 it works fine:
but when BATCH_SIZE increases it gets screwed and turns into straight line when it's more than 10.
Why is it so? Is there any way to make it work?