Jovian
Sign In

House price prediction using linear regression (minimal)

Using the boston housing dataset: https://www.kaggle.com/c/boston-housing/

In [1]:
# Uncomment and run the commands below if imports fail
# !conda install numpy pytorch torchvision cpuonly -c pytorch -y
# !pip install matplotlib --upgrade --quiet
!pip install jovian --upgrade --quiet
WARNING: You are using pip version 20.1; however, version 20.1.1 is available. You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.
In [122]:
# Imports
import torch
import jovian
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
In [123]:
# Hyperparameters
batch_size=64
learning_rate=5e-7


# Other constants
DATASET_URL = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
DATA_FILENAME = "BostonHousing.csv"
TARGET_COLUMN = 'medv'
input_size=13
output_size=1

Dataset & Data loaders

In [124]:
# Download the data
download_url(DATASET_URL, '.')
dataframe = pd.read_csv(DATA_FILENAME)
dataframe.head()
Using downloaded and verified file: ./BostonHousing.csv
Out[124]:
In [125]:
# Convert from Pandas dataframe to numpy arrays
inputs = dataframe.drop('medv', axis=1).values
targets = dataframe[['medv']].values
inputs.shape, targets.shape
Out[125]:
((506, 13), (506, 1))
In [126]:
# Convert to PyTorch dataset
dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32))
train_ds, val_ds = random_split(dataset, [406, 100])

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size*2)

Model

In [127]:
class HousingModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, xb):
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)                 # Generate predictions
        loss = F.mse_loss(out, targets)    # Calculate loss
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)                 # Generate predictions
        loss = F.mse_loss(out, targets)    # Calculate loss
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}".format(epoch, result['val_loss']))
    
model = HousingModel()

Training

In [128]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history
In [129]:
result = evaluate(model, val_loader)
result
Out[129]:
{'val_loss': 3850.06103515625}
In [130]:
history = fit(10, learning_rate, model, train_loader, val_loader)
Epoch [0], val_loss: 147.9092 Epoch [1], val_loss: 121.6677 Epoch [2], val_loss: 112.0455 Epoch [3], val_loss: 106.7845 Epoch [4], val_loss: 100.6463 Epoch [5], val_loss: 97.4375 Epoch [6], val_loss: 98.7278 Epoch [7], val_loss: 95.8225 Epoch [8], val_loss: 89.9052 Epoch [9], val_loss: 88.4346
In [131]:
losses = [r['val_loss'] for r in [result] + history]
plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('val_loss')
plt.title('val_loss vs. epochs');

Prediction

In [132]:
def predict_single(x, model):
    xb = x.unsqueeze(0)
    return model(x).item()
In [133]:
x, target = val_ds[10]
pred = predict_single(x, model)
print("Input: ", x)
print("Target: ", target.item())
print("Prediction:", pred)
Input: tensor([4.6469e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.1400e-01, 6.9800e+00, 6.7600e+01, 2.5329e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.7468e+02, 1.1660e+01]) Target: 29.799999237060547 Prediction: 25.074195861816406

Save and upload

In [134]:
torch.save(model.state_dict(), 'housing-linear.pth')
In [ ]:
jovian.commit(project='housing-linear-minimal', environment=None, outputs=['housing-linear.pth'])
jovian.commit(project='housing-linear-minimal', environment=None, outputs=['housing-linear.pth']) # Kaggle commit fails sometimes, so try again..
[jovian] Attempting to save notebook..
In [ ]: