Learn data science and machine learning by building real-world projects on Jovian

Vadapav, Burger Classification

Datasets from Kaggle can be downloaded using the opendatsets

In [2]:
!pip install opendatasets --upgrade --quiet
In [3]:
import opendatasets as od
In [4]:
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds Your Kaggle username: avinashck Your Kaggle Key: ··········
2%|▏ | 5.00M/295M [00:00<00:07, 38.7MB/s]
Downloading vadapav.zip to ./vadapav
100%|██████████| 295M/295M [00:03<00:00, 83.2MB/s]
In [5]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, random_split, DataLoader
from PIL import ImageFile
import torchvision.models as models
import matplotlib.pyplot as plt
import torchvision.transforms as T
import torch.nn.functional as F
import torch.nn as nn
from torchvision.utils import make_grid
from torchvision import  datasets
%matplotlib inline

Exploring the Data

Data is jpg images of vadapav and burgers downloaded into label folders. The objective is to build a classifier which to classify between burgers and vadapavs

In [6]:
DATA_DIR = './vadapav/VadaPav'
In [7]:
In [84]:
def target_to_oh(target):
    NUM_CLASS = 2  # hard code here, can do partial
    one_hot = torch.eye(NUM_CLASS)[target]
    return one_hot

def decode_target(target, threshold=0.5):
    result = []
    for i, x in enumerate(target):
        if (x >= threshold):
            return i
In [76]:
transform =T.Compose([ T.Resize(image_size),T.CenterCrop(image_size),T.ToTensor()])

dataset=datasets.ImageFolder(DATA_DIR,target_transform = target_to_oh ,transform= transform)

Let's check how many samples the dataset contains

In [77]:
print("Dataset size :",len(dataset) )
Dataset size : 1161
In [78]:
print(" Classes :",dataset.classes)
Classes : ['burger', 'vada pav']

Let's take a look at a sample image from the dataset.

In [87]:
def show_sample(img, target):
    plt.imshow(img.permute(1, 2, 0))
    print('Labels:', dataset.classes[decode_target(target)])
In [89]:
Labels: burger
Notebook Image

Training & Validation sets

As a good practice, we should split the data into training,validation and testing datasets. Let's fix a seed for PyTorch (to ensure we always get the same validation set), and create the datasets using random_split.

In [120]:
<torch._C.Generator at 0x7fa57d109708>

We'll be using 19% of data for validation set and 1% for Test set

In [123]:
val_pct , test_pct = 0.19, 0.01
val_size = int(val_pct * len(dataset))
test_size = int(test_pct*len(dataset))
train_size = len(dataset) - val_size - test_size
In [124]:
train_ds, val_ds ,test_ds= random_split(dataset, [train_size, val_size,test_size])
len(train_ds), len(val_ds),len(test_ds)
(930, 220, 11)

Data Loaders

In [125]:
batch_size = 32
In [126]:
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size, shuffle=True)
In [127]:
def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(12, 10))
        ax.set_xticks([]); ax.set_yticks([])
        data = images
        ax.imshow(make_grid(data, nrow=8).permute(1, 2, 0))
In [128]:
Notebook Image

About Data

The data seems to be very diverse. But since the dataset is small. It will be difficult for any model to learn the features


In [129]:
class BunClassificationBase(nn.Module):
    def training_step(self, batch):
        images, targets = batch
        out = self(images)         
        loss = F.binary_cross_entropy(out, targets.float())      
        return loss
    def validation_step(self, batch):
        images, targets = batch 
        out = self(images)                                   # Generate predictions
        loss = F.binary_cross_entropy(out, targets.float())  # Calculate loss
        score = F_score(out, targets)
        return {'val_loss': loss.detach(), 'val_score': score.detach() }
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_scores = [x['val_score'] for x in outputs]
        epoch_score = torch.stack(batch_scores).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_score': epoch_score.item()}
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_score: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_score']))
In [130]:
def F_score(output, targets, threshold=0.5):    
    pred = output
    pred[output<threshold]= 0
    res= (pred==targets).float()

    return torch.mean(res)
In [131]:
class BunCnnModel(BunClassificationBase):
    def __init__(self):
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.MaxPool2d(2, 2),  

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.MaxPool2d(2, 2),    

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.MaxPool2d(2, 2),


            nn.Linear(256, 128),
            nn.Linear(128, 64),
            nn.Linear(64, 2),
    def forward(self, xb):
        return self.network(xb)
In [132]:
model = BunCnnModel()
In [133]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
        return torch.device('cpu')
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
In [134]:
device = get_default_device()
In [150]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
test_dl = DeviceDataLoader(test_dl, device)
to_device(model, device);
In [136]:
def try_batch(dl):
    for images, labels in dl:
        print('images.shape:', images.shape)
        out = model(images)
        print('out.shape:', out.shape)
        print('out[0]:', out[0])

images.shape: torch.Size([32, 3, 224, 224]) out.shape: torch.Size([32, 2]) out[0]: tensor([0.5040, 0.4686], device='cuda:0', grad_fn=<SelectBackward>)

Training the model

In [137]:
from tqdm.notebook import tqdm
In [138]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        train_losses = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
    return history
In [139]:
model = to_device(BunCnnModel(), device)
In [140]:
evaluate(model, val_dl)
{'val_loss': 0.7008935809135437, 'val_score': 0.5}
In [146]:
num_epochs = 10
opt_func = torch.optim.Adam
lr = 1e-3
In [147]:
history = fit(num_epochs, lr, model, train_dl, val_dl, opt_func)
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [0], train_loss: 0.1545, val_loss: 1.0399, val_score: 0.6282
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [1], train_loss: 0.1366, val_loss: 0.5010, val_score: 0.7774
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [2], train_loss: 0.0948, val_loss: 0.2343, val_score: 0.9318
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [3], train_loss: 0.1458, val_loss: 0.3846, val_score: 0.8412
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [4], train_loss: 0.0969, val_loss: 0.3239, val_score: 0.8986
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [5], train_loss: 0.0887, val_loss: 1.0106, val_score: 0.7800
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [6], train_loss: 0.1118, val_loss: 0.6927, val_score: 0.7519
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [7], train_loss: 0.2034, val_loss: 0.2550, val_score: 0.9152
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [8], train_loss: 0.1500, val_loss: 0.2261, val_score: 0.9168
HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))
Epoch [9], train_loss: 0.1009, val_loss: 0.2032, val_score: 0.9318 CPU times: user 4min 36s, sys: 4.64 s, total: 4min 41s Wall time: 4min 41s

Making predictions on individual images

To start with, let's create a helper function to make a prediction on a single image.

In [148]:
def predict_single(image):
    xb = image.unsqueeze(0)
    xb = to_device(xb, device)
    preds = model(xb)
    prediction = preds[0]
    print("Prediction: ", prediction)
    show_sample(image, prediction)
In [160]:
Prediction: tensor([0.0159, 0.9836], device='cuda:0', grad_fn=<SelectBackward>) Labels: vada pav