Jovian
⭐️
Sign In
In [4]:
import torch
import numpy as np
import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
In [6]:
dataset = MNIST(root='data/',
               download = True,
               transform=ToTensor())
0it [00:00, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
100%|█████████▉| 9904128/9912422 [00:24<00:00, 439229.67it/s]
Extracting data/MNIST/raw/train-images-idx3-ubyte.gz
0it [00:00, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
0%| | 0/28881 [00:00<?, ?it/s] 57%|█████▋ | 16384/28881 [00:00<00:00, 121650.65it/s] 32768it [00:00, 57165.24it/s] 0it [00:00, ?it/s]
Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
0%| | 0/1648877 [00:00<?, ?it/s] 1%| | 16384/1648877 [00:00<00:15, 103299.19it/s] 1%|▏ | 24576/1648877 [00:00<00:18, 89370.30it/s] 2%|▏ | 40960/1648877 [00:00<00:16, 99439.65it/s] 5%|▌ | 90112/1648877 [00:00<00:12, 124048.15it/s] 8%|▊ | 139264/1648877 [00:01<00:10, 144674.85it/s] 11%|█▏ | 188416/1648877 [00:01<00:08, 165096.08it/s] 14%|█▍ | 237568/1648877 [00:01<00:06, 204047.29it/s] 16%|█▋ | 270336/1648877 [00:01<00:06, 200392.31it/s] 18%|█▊ | 303104/1648877 [00:01<00:06, 201366.16it/s] 21%|██▏ | 352256/1648877 [00:01<00:05, 243556.21it/s] 23%|██▎ | 385024/1648877 [00:02<00:05, 229636.63it/s] 26%|██▌ | 425984/1648877 [00:02<00:05, 231926.79it/s] 29%|██▉ | 475136/1648877 [00:02<00:04, 273657.18it/s] 31%|███ | 507904/1648877 [00:02<00:04, 255495.42it/s] 33%|███▎ | 548864/1648877 [00:02<00:03, 283648.13it/s] 35%|███▌ | 581632/1648877 [00:02<00:04, 260024.62it/s] 38%|███▊ | 622592/1648877 [00:02<00:03, 289903.46it/s] 40%|███▉ | 655360/1648877 [00:03<00:03, 257949.91it/s] 43%|████▎ | 704512/1648877 [00:03<00:03, 293010.20it/s] 45%|████▌ | 745472/1648877 [00:03<00:03, 269828.12it/s] 48%|████▊ | 794624/1648877 [00:03<00:02, 303915.32it/s] 51%|█████ | 835584/1648877 [00:03<00:02, 281603.77it/s] 54%|█████▍ | 892928/1648877 [00:03<00:02, 320350.52it/s] 57%|█████▋ | 933888/1648877 [00:03<00:02, 284115.74it/s] 60%|█████▉ | 983040/1648877 [00:03<00:02, 324838.31it/s] 62%|██████▏ | 1024000/1648877 [00:04<00:02, 291585.31it/s] 65%|██████▌ | 1073152/1648877 [00:04<00:01, 320215.92it/s] 68%|██████▊ | 1114112/1648877 [00:04<00:01, 289772.33it/s] 71%|███████ | 1163264/1648877 [00:04<00:01, 319138.71it/s] 73%|███████▎ | 1204224/1648877 [00:04<00:01, 286127.64it/s] 76%|███████▌ | 1253376/1648877 [00:04<00:01, 327057.75it/s] 78%|███████▊ | 1294336/1648877 [00:05<00:01, 296435.58it/s] 82%|████████▏ | 1351680/1648877 [00:05<00:00, 336907.23it/s] 85%|████████▍ | 1400832/1648877 [00:05<00:00, 368961.15it/s] 87%|████████▋ | 1441792/1648877 [00:05<00:00, 302516.28it/s] 90%|█████████ | 1490944/1648877 [00:05<00:00, 336593.51it/s] 93%|█████████▎| 1531904/1648877 [00:05<00:00, 299923.42it/s] 96%|█████████▌| 1581056/1648877 [00:05<00:00, 339294.97it/s] 99%|█████████▉| 1638400/1648877 [00:05<00:00, 376406.82it/s] 0it [00:00, ?it/s]
Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
0%| | 0/4542 [00:00<?, ?it/s] 8192it [00:00, 38181.01it/s]
Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz Processing... Done!
9920512it [00:39, 439229.67it/s] 1654784it [00:24, 376406.82it/s]
In [7]:
def split_indices(n, val_pct):
    n_val = int(val_pct*n)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]
In [8]:
train_indices, val_indices = split_indices(len(dataset), val_pct=0.2)
In [9]:
print(len(train_indices), len(val_indices))
48000 12000
In [12]:
batch_size=100

train_sampler = SubsetRandomSampler(train_indices)
train_dl = DataLoader(dataset,
                     batch_size,
                     sampler = train_sampler)
valid_sampler = SubsetRandomSampler(val_indices)
valid_dl = DataLoader(dataset,
                     batch_size,
                     sampler = valid_sampler)
In [13]:
import torch.nn.functional as F
import torch.nn as nn
In [15]:
class MnistModel(nn.Module):
    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()
        
        self.linear1 = nn.Linear(in_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, out_size)
        
    def forward(self, xb):
        xb = xb.view(xb.size(0), -1)
        out = self.linear1(xb)
        out = F.relu(out)
        out = self.linear2(out)
        
        return out
In [16]:
input_size = 784
num_classes = 10

model = MnistModel(input_size, hidden_size=32, out_size=num_classes)
In [17]:
for t in model.parameters():
    print(t.shape)
torch.Size([32, 784]) torch.Size([32]) torch.Size([10, 32]) torch.Size([10])
In [19]:
for images, labels in train_dl:
    outputs = model(images)
    loss = F.cross_entropy(outputs, labels)
    print('Loss: ',loss.item())
    break
    
print(outputs.shape)
Loss: 2.3213088512420654 torch.Size([100, 10])
In [20]:
torch.cuda.is_available()
Out[20]:
True
In [21]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
In [22]:
device = get_default_device()
In [23]:
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)
In [24]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)
            
    def __len__(self):
        return len(self.dl)
In [25]:
train_dl =DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)
In [27]:
for xb, yb in valid_dl:
    print(xb.device)
    print(yb)
    print(xb)
    break
cuda:0 tensor([3, 2, 0, 3, 7, 8, 1, 7, 6, 3, 9, 5, 6, 8, 5, 6, 7, 1, 3, 2, 1, 4, 6, 1, 7, 1, 5, 3, 9, 3, 8, 0, 4, 3, 1, 3, 0, 4, 1, 0, 6, 2, 0, 5, 2, 9, 5, 2, 1, 1, 5, 3, 0, 9, 3, 7, 3, 3, 1, 7, 1, 2, 9, 6, 4, 8, 5, 9, 2, 2, 0, 8, 3, 6, 9, 3, 3, 9, 3, 6, 9, 7, 4, 1, 6, 4, 7, 1, 9, 3, 3, 2, 9, 7, 1, 6, 4, 0, 9, 1], device='cuda:0') tensor([[[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]], [[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]], [[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]], ..., [[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]], [[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]], [[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0')
In [28]:
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    
    preds = model(xb)
    
    loss = loss_func(preds, yb)
    
    if opt is not None:
        
        loss.backward()
        
        opt.step()
        
        opt.zero_grad()
    
    metric_result = None
    if metric is not None:
        metric_result = metric(preds, yb)
    
    return loss.item(), len(xb), metric_result
In [69]:
def evaluate(model, loss_fn, valid_dl, metric=None):
    with torch.no_grad():
        
        results = [loss_batch(model, loss_fn, xb, yb, metric=metric) for xb, yb in valid_dl]
        losses, nums, metrics = zip(*results)
        total = np.sum(nums)
        avg_loss = np.sum(np.multiply(losses, nums)) / total
        avg_metric = None
        if metric is not None:
            avg_metric = np.sum(np.multiply(metrics, nums))/total
        return avg_loss, total, avg_metric
In [70]:
def fit(epochs, lr, model, loss_fn, train_dl, valid_dl, metric=None, opt_fn = None):
    losses, metrics = [], []
    
    if opt_fn is None:
        opt_fn = torch.optim.SGD
    opt = torch.optim.SGD(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        
        for xb, yb in train_dl:
            loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt)
            
        result = evaluate(model, loss_fn, valid_dl, metric)
        val_loss, total, val_metric = result
        
        losses.append(val_loss)
        metrics.append(val_metric)
        
        if metric is None:
            print('Epoch [{}/{}], Loss:{:.4f}'.format(epoch+1, epochs, val_loss))
            
        else:
            print('Epoch [{}/{}], Loss:{:.4f}, {}:{:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__, val_metric))
    return losses, metrics
In [71]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.sum(preds == labels).item()/len(preds)
In [76]:
model = MnistModel(input_size, hidden_size=32, out_size=num_classes)
to_device(model, device)
Out[76]:
MnistModel(
  (linear1): Linear(in_features=784, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=10, bias=True)
)
In [83]:
val_loss, total, val_acc = evaluate(model, F.cross_entropy, valid_dl, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss, val_acc))
Loss: 0.1207, Accuracy: 0.9635
In [48]:
losses1, metric1 = fit(5, 0.5, model, F.cross_entropy, train_dl, valid_dl, accuracy)
Epoch [1/5], Loss:0.2242, accuracy:0.9329 Epoch [2/5], Loss:0.1817, accuracy:0.9445 Epoch [3/5], Loss:0.1530, accuracy:0.9552 Epoch [4/5], Loss:0.1459, accuracy:0.9551 Epoch [5/5], Loss:0.1448, accuracy:0.9552
In [78]:
losses2, metric2 = fit(20, 0.1, model, F.cross_entropy, train_dl, valid_dl, accuracy)
Epoch [1/20], Loss:0.3380, accuracy:0.9001 Epoch [2/20], Loss:0.3000, accuracy:0.9121 Epoch [3/20], Loss:0.2547, accuracy:0.9255 Epoch [4/20], Loss:0.2316, accuracy:0.9326 Epoch [5/20], Loss:0.2078, accuracy:0.9393 Epoch [6/20], Loss:0.1888, accuracy:0.9433 Epoch [7/20], Loss:0.1801, accuracy:0.9469 Epoch [8/20], Loss:0.1694, accuracy:0.9488 Epoch [9/20], Loss:0.1572, accuracy:0.9526 Epoch [10/20], Loss:0.1519, accuracy:0.9539 Epoch [11/20], Loss:0.1444, accuracy:0.9565 Epoch [12/20], Loss:0.1419, accuracy:0.9563 Epoch [13/20], Loss:0.1363, accuracy:0.9582 Epoch [14/20], Loss:0.1359, accuracy:0.9580 Epoch [15/20], Loss:0.1315, accuracy:0.9589 Epoch [16/20], Loss:0.1322, accuracy:0.9590 Epoch [17/20], Loss:0.1284, accuracy:0.9610 Epoch [18/20], Loss:0.1266, accuracy:0.9611 Epoch [19/20], Loss:0.1234, accuracy:0.9627 Epoch [20/20], Loss:0.1207, accuracy:0.9635
In [80]:
import matplotlib.pyplot as plt

accuracies = [val_acc]  + metric2
plt.plot(accuracies, '-o')
Out[80]:
[<matplotlib.lines.Line2D at 0x7f323eccf630>]
Notebook Image
In [84]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
In [ ]: