Jovian
⭐️
Sign In
In [1]:
import torch
import torchvision
from torchvision.datasets import MNIST
In [2]:
dataset = MNIST(root='data/', download=True)
In [3]:
len(dataset)
Out[3]:
60000
In [4]:
test_dataset = MNIST(root='data/', train=False)
len(test_dataset)
Out[4]:
10000
In [5]:
dataset[0]
Out[5]:
(<PIL.Image.Image image mode=L size=28x28 at 0x27CF16AC7B8>, tensor(5))
In [6]:
import matplotlib.pyplot as plt
%matplotlib inline
In [7]:
image, label = dataset[0]
plt.imshow(image)
print('Label:', label.item())
Label: 5
Notebook Image
In [8]:
import torchvision.transforms as tansforms
In [9]:
dataset = MNIST(root='data/', train=True, transform=tansforms.ToTensor())
In [10]:
img_tensor, label = dataset[0]
print(img_tensor.shape, label)
torch.Size([1, 28, 28]) tensor(5)
In [11]:
plt.imshow(img_tensor[0], cmap='gray')
Out[11]:
<matplotlib.image.AxesImage at 0x27cfa40c0b8>
Notebook Image
In [12]:
import numpy as np
In [13]:
def split_indices(n, val_pct):
    n_val = int(val_pct * n)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]
In [14]:
train_indices, val_indices = split_indices(len(dataset), 0.2)
print(len(train_indices), len(val_indices))
print('Sample val indices: ', val_indices[:20])
48000 12000 Sample val indices: [39075 2155 9511 14072 23196 33362 5232 13933 54589 56494 15372 33441 26504 51249 38961 8062 10187 50960 53390 32481]
In [15]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
In [16]:
batch_size = 100

train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset, batch_size, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, batch_size, sampler=val_sampler)
In [17]:
import torch.nn as nn
In [18]:
input_size = 28*28
num_classes = 10
model = nn.Linear(input_size, num_classes)
In [19]:
print(model.weight.shape)
model.weight
torch.Size([10, 784])
Out[19]:
Parameter containing:
tensor([[ 0.0039, -0.0308, -0.0300,  ..., -0.0228, -0.0313, -0.0093],
        [ 0.0018,  0.0322, -0.0062,  ..., -0.0177, -0.0239,  0.0003],
        [-0.0025,  0.0306, -0.0282,  ..., -0.0342, -0.0260, -0.0345],
        ...,
        [ 0.0038,  0.0263, -0.0039,  ...,  0.0338,  0.0254,  0.0317],
        [-0.0122,  0.0066, -0.0200,  ..., -0.0217, -0.0346, -0.0328],
        [-0.0181,  0.0356, -0.0042,  ...,  0.0203,  0.0280, -0.0322]],
       requires_grad=True)
In [20]:
print(model.bias.shape)
model.bias
torch.Size([10])
Out[20]:
Parameter containing:
tensor([ 0.0169,  0.0166,  0.0081,  0.0209,  0.0170, -0.0031,  0.0026, -0.0053,
        -0.0246,  0.0029], requires_grad=True)
In [21]:
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
In [22]:
model = MnistModel()
In [23]:
list(model.parameters())
Out[23]:
[Parameter containing:
 tensor([[-1.0169e-02, -8.9463e-05, -2.0416e-02,  ..., -3.2464e-03,
          -6.9234e-03, -1.4503e-02],
         [-9.2438e-04,  1.5892e-02, -2.0388e-02,  ..., -2.6330e-02,
          -3.4826e-02,  2.8030e-02],
         [-1.3538e-03, -3.0174e-02, -3.3158e-02,  ..., -1.3178e-02,
           3.5687e-02, -2.3300e-02],
         ...,
         [-1.7840e-02, -2.9727e-02, -1.5809e-02,  ..., -5.3660e-03,
           2.8140e-02, -3.5568e-02],
         [-2.6750e-02, -4.2255e-03, -9.5854e-03,  ..., -3.2289e-02,
           3.0917e-02, -2.5180e-02],
         [-2.8535e-02,  1.7256e-02,  3.0004e-02,  ..., -1.4042e-02,
           3.3186e-02,  2.2145e-02]], requires_grad=True),
 Parameter containing:
 tensor([-0.0319,  0.0107, -0.0314,  0.0192, -0.0147, -0.0041,  0.0312,  0.0161,
         -0.0326, -0.0057], requires_grad=True)]
In [29]:
for images, labels in train_loader:
    outputs = model(images)
    print(images.shape)
    print(labels.shape)
    break

print('outputs.shape: ', outputs.shape)
print('Sample outputs: \n', outputs[:2].data)
torch.Size([100, 1, 28, 28]) torch.Size([100]) outputs.shape: torch.Size([100, 10]) Sample outputs: tensor([[-0.0852, 0.2988, 0.0138, 0.0319, 0.0129, -0.2011, -0.1205, -0.0572, -0.0851, -0.1740], [-0.0172, 0.1607, -0.0269, -0.4496, 0.3015, 0.1205, 0.2388, 0.2582, 0.0296, 0.0812]])
In [27]:
import torch.nn.functional as F
In [34]:
probs = F.softmax(outputs, dim=1)
print('Sample probabilities:\n', probs[:2])
print('Sum: ', torch.sum(probs[0]).item())
Sample probabilities: tensor([[0.0943, 0.1385, 0.1042, 0.1061, 0.1041, 0.0840, 0.0911, 0.0970, 0.0944, 0.0863], [0.0900, 0.1075, 0.0891, 0.0584, 0.1237, 0.1032, 0.1162, 0.1185, 0.0943, 0.0992]], grad_fn=<SliceBackward>) Sum: 0.9999998807907104
In [38]:
max_probs, preds = torch.max(probs, dim=1)
print(preds)
tensor([1, 4, 1, 1, 1, 7, 1, 2, 7, 5, 1, 2, 3, 2, 1, 7, 0, 0, 5, 1, 5, 7, 7, 1, 1, 1, 6, 1, 1, 1, 5, 1, 5, 7, 0, 0, 1, 2, 5, 1, 0, 1, 5, 1, 1, 2, 0, 4, 0, 1, 5, 5, 1, 1, 5, 0, 0, 9, 1, 2, 1, 1, 5, 1, 7, 1, 5, 0, 1, 0, 1, 4, 6, 0, 5, 1, 1, 1, 7, 6, 0, 0, 1, 1, 1, 1, 1, 2, 6, 1, 1, 5, 1, 2, 1, 5, 0, 1, 1, 1])
In [39]:
labels
Out[39]:
tensor([7, 0, 3, 6, 4, 5, 9, 5, 5, 8, 9, 1, 2, 1, 2, 2, 1, 4, 1, 6, 8, 6, 3, 3,
        4, 6, 5, 9, 7, 5, 4, 0, 6, 4, 1, 4, 2, 0, 0, 3, 9, 6, 2, 6, 4, 8, 7, 5,
        1, 1, 2, 7, 8, 6, 8, 7, 5, 0, 6, 6, 6, 7, 8, 6, 5, 1, 7, 3, 2, 5, 0, 1,
        7, 4, 2, 4, 4, 1, 5, 0, 4, 5, 9, 3, 9, 2, 8, 1, 0, 4, 8, 9, 1, 9, 4, 4,
        7, 9, 4, 2])
In [40]:
def accuracy(l1, l2):
    return torch.sum(l1 == l2).item() / len(l1)
In [41]:
accuracy(preds, labels)
Out[41]:
0.04
In [45]:
max_probs
Out[45]:
tensor([0.1385, 0.1237, 0.1288, 0.1419, 0.1163, 0.1367, 0.1283, 0.1330, 0.1315,
        0.1240, 0.1453, 0.1174, 0.1281, 0.1210, 0.1749, 0.1127, 0.1254, 0.1337,
        0.1259, 0.1324, 0.1358, 0.1258, 0.1175, 0.1303, 0.1697, 0.1512, 0.1307,
        0.1566, 0.1273, 0.1209, 0.1297, 0.1406, 0.1425, 0.1258, 0.1133, 0.1348,
        0.1618, 0.1274, 0.1328, 0.1291, 0.1432, 0.1768, 0.1570, 0.1447, 0.1377,
        0.1389, 0.1318, 0.1226, 0.1086, 0.1187, 0.1418, 0.1225, 0.1246, 0.1464,
        0.1402, 0.1245, 0.1402, 0.1099, 0.1375, 0.1181, 0.1264, 0.1354, 0.1568,
        0.1516, 0.1467, 0.1123, 0.1253, 0.1206, 0.1449, 0.1325, 0.1164, 0.1165,
        0.1236, 0.1604, 0.1454, 0.1312, 0.1375, 0.1104, 0.1328, 0.1375, 0.1325,
        0.1302, 0.1367, 0.1225, 0.1494, 0.1500, 0.1349, 0.1260, 0.1207, 0.1201,
        0.1425, 0.1322, 0.1128, 0.1307, 0.1487, 0.1273, 0.1320, 0.1319, 0.1551,
        0.1301], grad_fn=<MaxBackward0>)
In [46]:
loss_fn = F.cross_entropy
In [47]:
loss = loss_fn(outputs, labels)
loss
Out[47]:
tensor(2.3415, grad_fn=<NllLossBackward>)
In [48]:
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
In [49]:
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)
    loss = loss_func(preds, yb)
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    metric_result = None
    
    if metric is not None:
        metric_result = metric(preds, yb)
    return loss.item(), len(xb), metric_result
In [50]:
def evaluate(model, loss_fn, valid_dl, metric=None):
    with torch.no_grad():
        results = [loss_batch(model, loss_fn, xb, yb, metric=metric) for xb,yb in valid_dl]
        losses, nums, metrics = zip(*results)
        total = np.sum(nums)
        total_loss = np.sum(np.multiply(losses, nums))
        avg_loss = total_loss / total
        avg_metric = None
        if metric is not None:
            tot_metric = np.sum(np.multiply(metrics, nums))
            avg_metric = tot_metric / total
        return avg_loss, total, avg_metric
In [51]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.sum(preds == labels).item() / len(preds)
In [52]:
val_loss, total, val_acc = evaluate(model, loss_fn, val_loader, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss, val_acc))
Loss: 2.3646, Accuracy: 0.0755
In [53]:
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
    for epoch in range(epochs):
        for xb,yb in train_dl:
            loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt)
        result = evaluate(model, loss_fn, valid_dl, metric)
        val_loss, total, val_metric = result
        
        if metric is None:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, val_loss))
        else:
            print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__, val_metric))
In [54]:
model = MnistModel()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
In [55]:
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)
Epoch [1/5], Loss: 1.8698, accuracy: 0.6837 Epoch [2/5], Loss: 1.5724, accuracy: 0.7633 Epoch [3/5], Loss: 1.3634, accuracy: 0.7861 Epoch [4/5], Loss: 1.2135, accuracy: 0.7981 Epoch [5/5], Loss: 1.1025, accuracy: 0.8089
In [56]:
test_dataset = MNIST(root='data/', train=False, transform=tansforms.ToTensor())
In [58]:
img, label = test_dataset[0]
plt.imshow(img[0], cmap='gray')
Out[58]:
<matplotlib.image.AxesImage at 0x27cf451cb38>
Notebook Image
In [59]:
print('shape:', img.shape)
print('label:', label.item())
shape: torch.Size([1, 28, 28]) label: 7
In [60]:
def predict_image(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _,preds = torch.max(yb, dim=1)
    return preds[0].item()
In [61]:
plt.imshow(img[0], cmap='gray')
print('Label:', label.item(), '\nPredicted:', predict_image(img, model))
Label: 7 Predicted: 7
Notebook Image
In [62]:
img, label = test_dataset[10]
plt.imshow(img[0], cmap='gray')
print('Label:', label.item(), '\nPredicted:', predict_image(img, model))
Label: 0 Predicted: 0
Notebook Image
In [63]:
img, label = test_dataset[193]
plt.imshow(img[0], cmap='gray')
print('Label:', label.item(), '\nPredicted:', predict_image(img, model))
Label: 9 Predicted: 3
Notebook Image
In [64]:
test_loader = DataLoader(test_dataset, batch_size=200)
test_loss, total, test_acc = evaluate(model, loss_fn, test_loader, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))
Loss: 1.0794, Accuracy: 0.8229
In [65]:
torch.save(model.state_dict(), 'mnist-logistic.pth')
In [66]:
model.state_dict()
Out[66]:
OrderedDict([('linear.weight',
              tensor([[ 0.0073, -0.0248, -0.0119,  ..., -0.0166,  0.0080,  0.0151],
                      [ 0.0034,  0.0295,  0.0020,  ..., -0.0173,  0.0283, -0.0110],
                      [-0.0179,  0.0334,  0.0029,  ...,  0.0192, -0.0087,  0.0183],
                      ...,
                      [-0.0018, -0.0126,  0.0109,  ..., -0.0222,  0.0105, -0.0064],
                      [ 0.0161, -0.0242, -0.0271,  ..., -0.0315,  0.0214, -0.0260],
                      [-0.0137, -0.0156, -0.0203,  ..., -0.0351,  0.0050, -0.0101]])),
             ('linear.bias',
              tensor([-0.0444,  0.0762, -0.0051,  0.0078,  0.0011,  0.0332, -0.0266,  0.0334,
                      -0.0505, -0.0293]))])
In [67]:
model2 = MnistModel()
model2.load_state_dict(torch.load('mnist-logistic.pth'))
model2.state_dict()
Out[67]:
OrderedDict([('linear.weight',
              tensor([[ 0.0073, -0.0248, -0.0119,  ..., -0.0166,  0.0080,  0.0151],
                      [ 0.0034,  0.0295,  0.0020,  ..., -0.0173,  0.0283, -0.0110],
                      [-0.0179,  0.0334,  0.0029,  ...,  0.0192, -0.0087,  0.0183],
                      ...,
                      [-0.0018, -0.0126,  0.0109,  ..., -0.0222,  0.0105, -0.0064],
                      [ 0.0161, -0.0242, -0.0271,  ..., -0.0315,  0.0214, -0.0260],
                      [-0.0137, -0.0156, -0.0203,  ..., -0.0351,  0.0050, -0.0101]])),
             ('linear.bias',
              tensor([-0.0444,  0.0762, -0.0051,  0.0078,  0.0011,  0.0332, -0.0266,  0.0334,
                      -0.0505, -0.0293]))])
In [68]:
test_loss, total, test_acc = evaluate(model2, loss_fn, test_loader, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))
Loss: 1.0794, Accuracy: 0.8229
In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..
In [ ]: