Deep Learning Model Life-Cycle in PyTorch
A model has a life-cycle, and this very simple knowledge provides the backbone for both modeling a dataset and understanding the PyTorch API.
The five steps in the life-cycle are as follows:
- Prepare the Data.
- Define the Model.
- Train the Model.
- Evaluate the Model.
- Make Predictions.
Lets do the same with MNIST dataset
Convolutional Neural Networks, or CNNs for short, are a type of network designed for image input.
They are comprised of models with convolutional layers that extract features (called feature maps) and pooling layers that distill features down to the most salient elements.
CNNs are best suited to image classification tasks, although they can be used on a wide array of tasks that take images as input.
A popular image classification task is the MNIST handwritten digit classification. It involves tens of thousands of handwritten digits that must be classified as a number between 0 and 9.
The torchvision API provides a convenience function to download and load this dataset directly.
The example below loads the dataset and plots the first few images.
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import Compose
from torchvision.transforms import ToTensor
from matplotlib import pyplot
# define location to save or load the dataset
path = './Pytorch Workflow with MNIST'
# define the transforms to apply to the data
trans = Compose([ToTensor()])
# download and define the datasets
train = MNIST(path, train=True, download=True, transform=trans)
test = MNIST(path, train=False, download=True, transform=trans)
# define how to enumerate the datasets
train_dl = DataLoader(train, batch_size=32, shuffle=True)
test_dl = DataLoader(test, batch_size=32, shuffle=True)
# get one batch of images
i, (inputs, targets) = next(enumerate(train_dl))
# plot some images
for i in range(25):
# define subplot
pyplot.subplot(5, 5, i+1)
# plot raw pixel data
pyplot.imshow(inputs[i][0], cmap='gray')
# show the figure
pyplot.show()
We can train a CNN model to classify the images in the MNIST dataset.
Note that the images are arrays of grayscale pixel data, therefore, we must add a channel dimension to the data before we can use the images as input to the model.
It is a good idea to scale the pixel values from the default range of 0-255 to have a zero mean and a standard deviation of 1.
from numpy import vstack
from numpy import argmax
from pandas import read_csv
from sklearn.metrics import accuracy_score
from torchvision.datasets import MNIST
from torchvision.transforms import Compose
from torchvision.transforms import ToTensor
from torchvision.transforms import Normalize
from torch.utils.data import DataLoader
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
# prepare the dataset
def prepare_data(path):
# define standardization
trans = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])
# load dataset
train = MNIST(path, train=True, download=True, transform=trans)
test = MNIST(path, train=False, download=True, transform=trans)
# prepare data loaders
train_dl = DataLoader(train, batch_size=64, shuffle=True)
test_dl = DataLoader(test, batch_size=1024, shuffle=False)
return train_dl, test_dl
class CNN(Module):
# define model elements
def __init__(self, n_channels):
super(CNN, self).__init__()
# input to first hidden layer
self.hidden1 = Conv2d(n_channels, 32, (3,3))
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
self.act1 = ReLU()
# first pooling layer
self.pool1 = MaxPool2d((2,2), stride=(2,2))
# second hidden layer
self.hidden2 = Conv2d(32, 32, (3,3))
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
self.act2 = ReLU()
# second pooling layer
self.pool2 = MaxPool2d((2,2), stride=(2,2))
# fully connected layer
self.hidden3 = Linear(5*5*32, 100)
kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
self.act3 = ReLU()
# output layer
self.hidden4 = Linear(100, 10)
xavier_uniform_(self.hidden4.weight)
self.act4 = Softmax(dim=1)
# forward propagate input
def forward(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.act1(X)
X = self.pool1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
X = self.pool2(X)
# flatten
X = X.view(-1, 4*4*50)
# third hidden layer
X = self.hidden3(X)
X = self.act3(X)
# output layer
X = self.hidden4(X)
X = self.act4(X)
return X
def train_model(train_dl, model):
# define the optimization
criterion = CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
# enumerate epochs
for epoch in range(10):
# enumerate mini batches
for i, (inputs, targets) in enumerate(train_dl):
# clear the gradients
optimizer.zero_grad()
# compute the model output
yhat = model(inputs)
# calculate loss
loss = criterion(yhat, targets)
# credit assignment
loss.backward()
# update model weights
optimizer.step()
def evaluate_model(test_dl, model):
predictions, actuals = list(), list()
for i, (inputs, targets) in enumerate(test_dl):
# evaluate the model on the test set
yhat = model(inputs)
# retrieve numpy array
yhat = yhat.detach().numpy()
actual = targets.numpy()
# convert to class labels
yhat = argmax(yhat, axis=1)
# reshape for stacking
actual = actual.reshape((len(actual), 1))
yhat = yhat.reshape((len(yhat), 1))
# store
predictions.append(yhat)
actuals.append(actual)
predictions, actuals = vstack(predictions), vstack(actuals)
# calculate accuracy
acc = accuracy_score(actuals, predictions)
return acc
path = './Pytorch Workflow with MNIST'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model = CNN(1)
## train the model
train_model(train_dl, model)
# evaluate the model
acc = evaluate_model(test_dl, model)
print('Accuracy: %.3f' % acc)
## 60000 10000
## Accuracy: 0.902