Learn data science and machine learning by building real-world projects on Jovian
  1. Life expectancy prediction using linear regression (minimal)

Using the WHO Life Expectancy Dataset

In [2]:
# Uncomment and run the commands below if imports fail
# !conda install numpy pytorch torchvision cpuonly -c pytorch -y
# !pip install matplotlib --upgrade --quiet
!pip install jovian --upgrade --quiet
In [4]:
# Imports
import torch
import jovian
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
In [5]:
# Hyperparameters
batch_size=64
learning_rate=1e-1


# Other constants
DATASET_URL = "https://github.com/THC-QA/who-life-expectancy-dataset/raw/master/Life%20Expectancy%20Data.csv"
DATA_FILENAME = "Life%20Expectancy%20Data.csv"
TARGET_COLUMN = 'Life expectancy '
input_size=19
output_size=1

Dataset & Data loaders

In [6]:
# Download the data
download_url(DATASET_URL, '.')
HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))
In [7]:
dataframe = pd.read_csv(DATA_FILENAME)
dataframe.head()
Out[7]:
In [5]:
cat_f = dataframe.select_dtypes(include='object')
categorical_cols = list(cat_f.columns.values.tolist())
categorical_cols
Out[5]:
['Country', 'Status']
In [6]:
# Convert from Pandas dataframe to numpy arrays
dataframe1 = dataframe.copy(deep=True).dropna()
for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
inputs = dataframe1.drop(['Country', 'Year', 'Life expectancy '], axis=1)
mean = inputs.mean()
dev = inputs.std()
inputs = (inputs - mean) / dev
inputs = inputs.values
targets = dataframe1[['Life expectancy ']].values
inputs.shape, targets.shape
Out[6]:
((1649, 19), (1649, 1))
In [7]:
num_rows = len(dataframe1.index)
print(num_rows)
1649
In [8]:
# Convert to PyTorch dataset
dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32))
val_percent = 0.125 # between 0.1 and 0.2
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size
print(val_size, train_size)


train_ds, val_ds = random_split(dataset, [train_size, val_size]) # Use the random_split function to split dataset into 2 parts of the desired length
print(len(train_ds), len(val_ds))

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size*2)

206 1443 1443 206

Model

In [9]:
class HousingModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, xb):
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)                 # Generate predictions
        loss = F.l1_loss(out, targets)    # Calculate loss
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)                 # Generate predictions
        loss = F.l1_loss(out, targets)    # Calculate loss
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}".format(epoch, result['val_loss']))
In [10]:
model = HousingModel()
In [11]:
list(model.parameters())
Out[11]:
[Parameter containing:
 tensor([[ 0.1372,  0.1546,  0.0894, -0.2060, -0.0333,  0.0300, -0.0682, -0.1110,
           0.0606, -0.0631, -0.0886,  0.0608, -0.1294,  0.0679,  0.0947, -0.1000,
           0.0027, -0.1260, -0.1222]], requires_grad=True),
 Parameter containing:
 tensor([-0.0796], requires_grad=True)]

Training

In [12]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history
In [13]:
result = evaluate(model, val_loader)
result
Out[13]:
{'val_loss': 68.93002319335938}
In [14]:
learning_rate = 1e-1
epochs = 100
In [15]:
history = fit(epochs, learning_rate, model, train_loader, val_loader)
Epoch [0], val_loss: 66.6378 Epoch [1], val_loss: 64.3458 Epoch [2], val_loss: 62.0519 Epoch [3], val_loss: 59.7574 Epoch [4], val_loss: 57.4652 Epoch [5], val_loss: 55.1739 Epoch [6], val_loss: 52.8822 Epoch [7], val_loss: 50.5892 Epoch [8], val_loss: 48.2964 Epoch [9], val_loss: 46.0054 Epoch [10], val_loss: 43.7151 Epoch [11], val_loss: 41.4214 Epoch [12], val_loss: 39.1290 Epoch [13], val_loss: 36.8376 Epoch [14], val_loss: 34.5479 Epoch [15], val_loss: 32.2590 Epoch [16], val_loss: 29.9645 Epoch [17], val_loss: 27.6729 Epoch [18], val_loss: 25.3823 Epoch [19], val_loss: 23.0881 Epoch [20], val_loss: 20.8454 Epoch [21], val_loss: 18.6300 Epoch [22], val_loss: 16.4663 Epoch [23], val_loss: 14.3507 Epoch [24], val_loss: 12.2578 Epoch [25], val_loss: 10.1903 Epoch [26], val_loss: 8.1791 Epoch [27], val_loss: 6.3540 Epoch [28], val_loss: 4.8748 Epoch [29], val_loss: 3.8728 Epoch [30], val_loss: 3.3374 Epoch [31], val_loss: 3.0738 Epoch [32], val_loss: 2.9778 Epoch [33], val_loss: 2.9714 Epoch [34], val_loss: 2.9484 Epoch [35], val_loss: 2.9370 Epoch [36], val_loss: 2.9415 Epoch [37], val_loss: 2.9509 Epoch [38], val_loss: 2.9502 Epoch [39], val_loss: 2.9599 Epoch [40], val_loss: 2.9714 Epoch [41], val_loss: 2.9661 Epoch [42], val_loss: 2.9548 Epoch [43], val_loss: 2.9435 Epoch [44], val_loss: 2.9539 Epoch [45], val_loss: 2.9487 Epoch [46], val_loss: 2.9555 Epoch [47], val_loss: 2.9527 Epoch [48], val_loss: 2.9536 Epoch [49], val_loss: 2.9647 Epoch [50], val_loss: 2.9564 Epoch [51], val_loss: 2.9604 Epoch [52], val_loss: 2.9551 Epoch [53], val_loss: 2.9496 Epoch [54], val_loss: 2.9523 Epoch [55], val_loss: 2.9822 Epoch [56], val_loss: 2.9674 Epoch [57], val_loss: 2.9624 Epoch [58], val_loss: 2.9700 Epoch [59], val_loss: 2.9584 Epoch [60], val_loss: 2.9674 Epoch [61], val_loss: 2.9681 Epoch [62], val_loss: 2.9723 Epoch [63], val_loss: 2.9674 Epoch [64], val_loss: 2.9656 Epoch [65], val_loss: 2.9543 Epoch [66], val_loss: 2.9551 Epoch [67], val_loss: 2.9466 Epoch [68], val_loss: 2.9604 Epoch [69], val_loss: 2.9711 Epoch [70], val_loss: 2.9706 Epoch [71], val_loss: 2.9571 Epoch [72], val_loss: 2.9759 Epoch [73], val_loss: 2.9610 Epoch [74], val_loss: 2.9665 Epoch [75], val_loss: 2.9583 Epoch [76], val_loss: 2.9568 Epoch [77], val_loss: 2.9738 Epoch [78], val_loss: 2.9609 Epoch [79], val_loss: 2.9647 Epoch [80], val_loss: 2.9484 Epoch [81], val_loss: 2.9607 Epoch [82], val_loss: 2.9550 Epoch [83], val_loss: 2.9603 Epoch [84], val_loss: 2.9551 Epoch [85], val_loss: 2.9710 Epoch [86], val_loss: 2.9604 Epoch [87], val_loss: 2.9563 Epoch [88], val_loss: 2.9650 Epoch [89], val_loss: 2.9673 Epoch [90], val_loss: 2.9640 Epoch [91], val_loss: 2.9609 Epoch [92], val_loss: 2.9606 Epoch [93], val_loss: 2.9783 Epoch [94], val_loss: 2.9717 Epoch [95], val_loss: 2.9548 Epoch [96], val_loss: 2.9686 Epoch [97], val_loss: 2.9625 Epoch [98], val_loss: 2.9816 Epoch [99], val_loss: 2.9996
In [16]:
losses = [r['val_loss'] for r in [result] + history]
plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('val_loss')
plt.title('val_loss vs. epochs');
Notebook Image

Prediction

In [17]:
def predict_single(x, model):
    xb = x.unsqueeze(0)
    return model(x).item()
In [18]:
x, target = val_ds[10]
pred = predict_single(x, model)
print("Input: ", x)
print("Target: ", target.item())
print("Prediction:", pred)
Input: tensor([-2.4105, -0.7439, -0.2611, 1.2402, 1.3445, 0.5773, -0.2066, 1.0566, -0.2592, 0.3312, 0.9672, 0.3635, -0.3123, 1.2157, -0.2077, -0.9025, -0.9042, 1.4608, 2.9978]) Target: 79.9000015258789 Prediction: 83.34243774414062

Save and upload

In [19]:
loss_final = history[-1]['val_loss']
loss_final
Out[19]:
2.9996213912963867
In [20]:
model_list=x.tolist()
model_list
Out[20]:
[-2.4105029106140137,
 -0.7438749670982361,
 -0.26109886169433594,
 1.240151286125183,
 1.34452486038208,
 0.577328085899353,
 -0.20657695829868317,
 1.056551218032837,
 -0.259181410074234,
 0.33119067549705505,
 0.9672476649284363,
 0.36353325843811035,
 -0.3122938871383667,
 1.21574866771698,
 -0.2076941728591919,
 -0.9024637341499329,
 -0.9041646122932434,
 1.4607603549957275,
 2.9978344440460205]
In [21]:
tar_end = target.item()
tar_end
Out[21]:
79.9000015258789
In [22]:
acc = 100-(((pred-tar_end)/tar_end)*100)
acc
Out[22]:
95.69156927093832
In [23]:
torch.save(model.state_dict(), 'extension2-linear-minimal.pth')
jovian.log_hyperparams(bs=batch_size, 
                       lrs=learning_rate, 
                       epochs=epochs)
jovian.log_metrics(test_loss=loss_final, model=model_list, pred_acc=acc)
[jovian] Please enter your API key ( from https://jovian.ml/ ): API KEY: ········ [jovian] Hyperparams logged. [jovian] Metrics logged.
In [ ]:
jovian.commit(project='extension2-linear-minimal', environment=None, outputs=['extension2-linear-minimal'])
jovian.commit(project='extension2-linear-minimal', environment=None, outputs=['extension2-linear-minimal']) # Kaggle commit fails sometimes, so try again..
[jovian] Attempting to save notebook.. [jovian] Detected Kaggle notebook... [jovian] Please enter your API key ( from https://jovian.ml/ ): API KEY: ········ [jovian] Uploading notebook to https://jovian.ml/thc-qa/extension2-linear-minimal
[jovian] Attempting to save notebook.. [jovian] Detected Kaggle notebook... [jovian] Uploading notebook to https://jovian.ml/thc-qa/extension2-linear-minimal
In [ ]: