# walid-gomaa/linear-regression

a year ago
In [2]:
import numpy as np
import torch
In [3]:
# Input (temperature, rainfall, humidity).
inputs = np.array([
[73, 67, 43],
[91, 88, 64],
[87, 134, 58],
[102, 43, 37],
[69, 96, 70]
], dtype = 'float32')
In [4]:
inputs
Out[4]:
array([[ 73.,  67.,  43.],
[ 91.,  88.,  64.],
[ 87., 134.,  58.],
[102.,  43.,  37.],
[ 69.,  96.,  70.]], dtype=float32)
In [5]:
# Targets (apples, oranges).
targets = np.array([
[56, 70],
[81, 101],
[119, 133],
[22, 37],
[103, 119]
], dtype = 'float32')
In [6]:
targets
Out[6]:
array([[ 56.,  70.],
[ 81., 101.],
[119., 133.],
[ 22.,  37.],
[103., 119.]], dtype=float32)

### Convert inputs to tensors.

In [7]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73., 67., 43.], [ 91., 88., 64.], [ 87., 134., 58.], [102., 43., 37.], [ 69., 96., 70.]]) tensor([[ 56., 70.], [ 81., 101.], [119., 133.], [ 22., 37.], [103., 119.]]) 

## Creating a linear model.

Very simple model that is linear in both the parameters and the inputs

In [8]:
w = torch.randn(2, 3, requires_grad = True)
In [9]:
w
Out[9]:
tensor([[-1.1090,  0.4660, -0.3295],
[ 0.8665,  1.2027,  0.0416]], requires_grad=True)
In [10]:
b = torch.randn(2, requires_grad = True)
In [11]:
b
Out[11]:
tensor([1.4969, 0.9979], requires_grad=True)

### Defining the model.

@ represents matrix multiplication in PyTorch.

In [12]:
def model(x):
return x @ w.t() + b

#### Generate predictions.

In [13]:
preds = model(inputs)
In [14]:
print(preds)
tensor([[ -62.4059, 146.6199], [ -79.5011, 188.3463], [ -51.6510, 239.9558], [-103.7748, 142.6331], [ -53.3516, 179.1550]], grad_fn=<AddBackward0>) 
In [16]:
diff = targets - preds
In [17]:
diff
Out[17]:
tensor([[ 118.4059,  -76.6199],
[ 160.5011,  -87.3463],
[ 170.6510, -106.9558],
[ 125.7748, -105.6331],
[ 156.3516,  -60.1550]], grad_fn=<SubBackward0>)
In [18]:
diff_sqr = diff * diff
In [19]:
diff_sqr
Out[19]:
tensor([[14019.9492,  5870.6025],
[25760.5938,  7629.3813],
[29121.7637, 11439.5449],
[15819.2988, 11158.3525],
[24445.8164,  3618.6238]], grad_fn=<MulBackward0>)
In [20]:
torch.sum(diff_sqr)
Out[20]:
tensor(148883.9219, grad_fn=<SumBackward0>)
In [23]:
torch.sum(diff_sqr) / diff.numel()
Out[23]:
tensor(14888.3926, grad_fn=<DivBackward0>)
In [24]:
type(diff)
Out[24]:
torch.Tensor
In [26]:
rr = torch.tensor(3)
In [27]:
rr.numel()
Out[27]:
1
In [30]:
rr2 = torch.tensor([[1,2],[3,4]])
In [31]:
rr2.numel()
Out[31]:
4

#### Define the loss function.

In [32]:
def mse(t1, t2):
diff = t1 - t2
return torch.sum(diff * diff) / diff.numel()
In [33]:
# Compute loss.
loss = mse(preds, targets)
loss

Out[33]:
tensor(14888.3926, grad_fn=<DivBackward0>)
In [34]:
# Compute gradients
loss.backward()

In [35]:
# Gradients for weights.
print(w)

tensor([[-1.1090, 0.4660, -0.3295], [ 0.8665, 1.2027, 0.0416]], requires_grad=True) tensor([[-12342.6309, -13068.5186, -8171.9111], [ 7554.4390, 7493.8384, 4641.5063]]) 
In [36]:
# Gradients for biases.
print(b)

tensor([1.4969, 0.9979], requires_grad=True) tensor([-146.3369, 87.3420]) 
In [37]:
w.grad.zero_()

tensor([[0., 0., 0.], [0., 0., 0.]]) tensor([0., 0.]) 

## Train the model.

In [39]:
# Generate preditions.
preds = model(inputs)
print(preds)

tensor([[ -62.4059, 146.6199], [ -79.5011, 188.3463], [ -51.6510, 239.9558], [-103.7748, 142.6331], [ -53.3516, 179.1550]], grad_fn=<AddBackward0>) 
In [40]:
# Generate the loss.
loss = mse(preds, targets)
print(loss)

tensor(14888.3926, grad_fn=<DivBackward0>) 
In [41]:
# Compute the gradients.
loss.backward()

tensor([[-12342.6309, -13068.5186, -8171.9111], [ 7554.4390, 7493.8384, 4641.5063]]) tensor([-146.3369, 87.3420]) 
In [42]:
# Adjust weights and reset the gradient.

In [43]:
print(w)
print(b)

tensor([[-0.9856, 0.5967, -0.2478], [ 0.7909, 1.1278, -0.0048]], requires_grad=True) tensor([1.4984, 0.9970], requires_grad=True) 
In [44]:
# Calculate loss.
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(10110.9971, grad_fn=<DivBackward0>) 

#### Train for multiple epochs.

In [112]:
# Train for 100 epochs.
for i in range(100):
preds = model(inputs)
loss = mse(preds, targets)
loss.backward()

In [ ]:
# Calcualte loss.
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

In [114]:
preds
Out[114]:
tensor([[ 57.4484,  70.5337],
[ 81.7897, 100.0755],
[119.1913, 133.9527],
[ 21.2008,  37.2804],
[101.4223, 118.1445]], grad_fn=<AddBackward0>)
In [115]:
targets
Out[115]:
tensor([[ 56.,  70.],
[ 81., 101.],
[119., 133.],
[ 22.,  37.],
[103., 119.]])
In [116]:
!pip install jovian --upgrade -q
In [117]:
import jovian
In [118]:
jovian.commit()
[jovian] Saving notebook.. 
[jovian] Creating a new notebook on https://jovian.ml/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ml/walid-gomaa/linear-regression 

### Linear regression using PyTorch built-ins

In [119]:
import torch.nn as nn
In [123]:
# Input (temp, rainfall, humidity)
inputs = np.array([
[73, 67, 43], [91, 88, 64], [87, 134, 58],
[102, 43, 37], [69, 96, 70], [73, 67, 43],
[91, 88, 64], [87, 134, 58], [102, 43, 37],
[69, 96, 70], [73, 67, 43], [91, 88, 64],
[87, 134, 58], [102, 43, 37], [69, 96, 70]
], dtype = 'float32')
In [124]:
inputs.shape
Out[124]:
(15, 3)
In [125]:
# Targets (apples, oranges)
targets = np.array([
[56, 70], [81, 101], [119, 133],
[22, 37], [103, 119], [56, 70],
[81, 101], [119, 133], [22, 37],
[103, 119], [56, 70], [81, 101],
[119, 133], [22, 37], [103, 119]
], dtype = 'float32')
In [126]:
inputs = torch.tensor(inputs)
In [127]:
inputs
Out[127]:
tensor([[ 73.,  67.,  43.],
[ 91.,  88.,  64.],
[ 87., 134.,  58.],
[102.,  43.,  37.],
[ 69.,  96.,  70.],
[ 73.,  67.,  43.],
[ 91.,  88.,  64.],
[ 87., 134.,  58.],
[102.,  43.,  37.],
[ 69.,  96.,  70.],
[ 73.,  67.,  43.],
[ 91.,  88.,  64.],
[ 87., 134.,  58.],
[102.,  43.,  37.],
[ 69.,  96.,  70.]])
In [128]:
targets = torch.tensor(targets)

In [129]:
from torch.utils.data import TensorDataset
In [130]:
# Define dataset.
train_ds = TensorDataset(inputs, targets)

In [131]:
train_ds[0:3]
Out[131]:
(tensor([[ 73.,  67.,  43.],
[ 91.,  88.,  64.],
[ 87., 134.,  58.]]), tensor([[ 56.,  70.],
[ 81., 101.],
[119., 133.]]))
In [132]:
train_ds[[1,3,5,7]]
Out[132]:
(tensor([[ 91.,  88.,  64.],
[102.,  43.,  37.],
[ 73.,  67.,  43.],
[ 87., 134.,  58.]]), tensor([[ 81., 101.],
[ 22.,  37.],
[ 56.,  70.],
[119., 133.]]))
In [133]:
from torch.utils.data import DataLoader
In [134]:
# Define data loader.
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle = True)
In [135]:
for xb, yb in train_dl:
print(xb)
print(yb)
break
tensor([[ 91., 88., 64.], [ 87., 134., 58.], [102., 43., 37.], [ 73., 67., 43.], [102., 43., 37.]]) tensor([[ 81., 101.], [119., 133.], [ 22., 37.], [ 56., 70.], [ 22., 37.]]) 

#### Defining the model

In [136]:
model = nn.Linear(3, 2)
In [137]:
print(model.weight)
Parameter containing: tensor([[-0.1147, -0.2876, 0.4160], [-0.4951, -0.5615, 0.5414]], requires_grad=True) 
In [138]:
print(model.bias)
Parameter containing: tensor([0.0805, 0.1290], requires_grad=True) 
In [140]:
list(model.parameters())
Out[140]:
[Parameter containing:
tensor([[-0.1147, -0.2876,  0.4160],
Parameter containing:
tensor([0.0805, 0.1290], requires_grad=True)]
In [141]:
# Generate predictions.
preds = model(inputs)
preds

Out[141]:
tensor([[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417],
[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417],
[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417]], grad_fn=<AddmmBackward>)

#### Defining the loss function

In [142]:
# Import nn.functional.
import torch.nn.functional as F
In [143]:
loss_fn = F.mse_loss
In [144]:
? nn.Linear
Init signature:  nn.Linear(in_features, out_features, bias=True)
Docstring:
Applies a linear transformation to the incoming data: :math:y = xA^T + b

Args:
in_features: size of each input sample
out_features: size of each output sample
bias: If set to False, the layer will not learn an additive bias.
Default: True

Shape:
- Input: :math:(N, *, H_{in}) where :math:* means any number of
additional dimensions and :math:H_{in} = \text{in\_features}
- Output: :math:(N, *, H_{out}) where all but the last dimension
are the same shape as the input and :math:H_{out} = \text{out\_features}.

Attributes:
weight: the learnable weights of the module of shape
:math:(\text{out\_features}, \text{in\_features}). The values are
initialized from :math:\mathcal{U}(-\sqrt{k}, \sqrt{k}), where
:math:k = \frac{1}{\text{in\_features}}
bias:   the learnable bias of the module of shape :math:(\text{out\_features}).
If :attr:bias is True, the values are initialized from
:math:\mathcal{U}(-\sqrt{k}, \sqrt{k}) where
:math:k = \frac{1}{\text{in\_features}}

Examples::

>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.
File:           /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/modules/linear.py
Type:           type
Subclasses:     Linear

In [145]:
? F
Type:        module
String form: <module 'torch.nn.functional' from '/anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/functional.py'>
File:        /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/functional.py
Docstring:   Functional interface

In [146]:
? print
Docstring:
print(value, ..., sep=' ', end='\n', file=sys.stdout, flush=False)

Prints the values to a stream, or to sys.stdout by default.
Optional keyword arguments:
file:  a file-like object (stream); defaults to the current sys.stdout.
sep:   string inserted between values, default a space.
end:   string appended after the last value, default a newline.
flush: whether to forcibly flush the stream.
Type:      builtin_function_or_method

In [149]:
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(17140.1035, grad_fn=<MseLossBackward>) 

#### Defining the optimizer.

In [152]:
? torch.optim.SGD
Init signature:
torch.optim.SGD(
params,
lr=<required parameter>,
momentum=0,
dampening=0,
weight_decay=0,
nesterov=False,
)
Docstring:
Implements stochastic gradient descent (optionally with momentum).

Nesterov momentum is based on the formula from
On the importance of initialization and momentum in deep learning__.

Args:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float): learning rate
momentum (float, optional): momentum factor (default: 0)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
dampening (float, optional): dampening for momentum (default: 0)
nesterov (bool, optional): enables Nesterov momentum (default: False)

Example:
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> loss_fn(model(input), target).backward()
>>> optimizer.step()

__ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf

.. note::
The implementation of SGD with Momentum/Nesterov subtly differs from
Sutskever et. al. and implementations in some other frameworks.

Considering the specific case of Momentum, the update can be written as

.. math::
v_{t+1} = \mu * v_{t} + g_{t+1} \\
p_{t+1} = p_{t} - lr * v_{t+1}

where p, g, v and :math:\mu denote the parameters, gradient,
velocity, and momentum respectively.

This is in contrast to Sutskever et. al. and
other frameworks which employ an update of the form

.. math::
v_{t+1} = \mu * v_{t} + lr * g_{t+1} \\
p_{t+1} = p_{t} - v_{t+1}

The Nesterov version is analogously modified.
File:           /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/optim/sgd.py
Type:           type
Subclasses:

In [153]:
opt = torch.optim.SGD(model.parameters(), lr = 1e-5)
In [154]:
opt
Out[154]:
SGD (
Parameter Group 0
dampening: 0
lr: 1e-05
momentum: 0
nesterov: False
weight_decay: 0
)
In [155]:
list(model.parameters())
Out[155]:
[Parameter containing:
tensor([[-0.1147, -0.2876,  0.4160],
Parameter containing:
tensor([0.0805, 0.1290], requires_grad=True)]

#### Training the model.

In [156]:
? opt.step
Signature:  opt.step(closure=None)
Docstring:
Performs a single optimization step.

Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
File:      /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/optim/sgd.py
Type:      method

In [157]:
# Define a utility function to train the model.
def fit(num_epochs, model, loss_fn, opt):
# Repeat for the given number of epochs.
for epoch in range(num_epochs):
# Train with batches of data.
for xb, yb in train_dl: # Use the DataLoader object.
# 1. Generate predictions.
preds = model(xb)
# 2. Calculate loss.
loss = loss_fn(preds, yb)
loss.backward()
# 4. Update parameters using gradients.
opt.step()
# 5. Reset gradients to zero.
# Print the progress.
if (epoch + 1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
In [158]:
loss.item()
Out[158]:
17140.103515625
In [159]:
fit(100, model, loss_fn, opt)
Epoch [10/100], Loss: 274.4447 Epoch [20/100], Loss: 345.7473 Epoch [30/100], Loss: 252.0515 Epoch [40/100], Loss: 227.0331 Epoch [50/100], Loss: 72.4297 Epoch [60/100], Loss: 39.6769 Epoch [70/100], Loss: 7.5991 Epoch [80/100], Loss: 17.1577 Epoch [90/100], Loss: 17.7797 Epoch [100/100], Loss: 7.8511 
In [160]:
fit(200, model, loss_fn, opt)
Epoch [10/200], Loss: 8.5315 Epoch [20/200], Loss: 6.5630 Epoch [30/200], Loss: 5.3663 Epoch [40/200], Loss: 3.7242 Epoch [50/200], Loss: 3.1195 Epoch [60/200], Loss: 1.3423 Epoch [70/200], Loss: 1.7627 Epoch [80/200], Loss: 1.4846 Epoch [90/200], Loss: 1.0412 Epoch [100/200], Loss: 2.7521 Epoch [110/200], Loss: 1.4776 Epoch [120/200], Loss: 1.5228 Epoch [130/200], Loss: 1.5755 Epoch [140/200], Loss: 1.1981 Epoch [150/200], Loss: 0.9064 Epoch [160/200], Loss: 1.2216 Epoch [170/200], Loss: 0.9610 Epoch [180/200], Loss: 1.0257 Epoch [190/200], Loss: 1.1858 Epoch [200/200], Loss: 1.0127 
In [161]:
fit(1000, model, loss_fn, opt)
Epoch [10/1000], Loss: 0.8390 Epoch [20/1000], Loss: 0.8243 Epoch [30/1000], Loss: 0.9213 Epoch [40/1000], Loss: 0.8647 Epoch [50/1000], Loss: 0.7388 Epoch [60/1000], Loss: 0.8686 Epoch [70/1000], Loss: 0.7849 Epoch [80/1000], Loss: 0.8086 Epoch [90/1000], Loss: 0.7943 Epoch [100/1000], Loss: 0.6792 Epoch [110/1000], Loss: 0.7685 Epoch [120/1000], Loss: 0.8260 Epoch [130/1000], Loss: 0.7298 Epoch [140/1000], Loss: 0.7044 Epoch [150/1000], Loss: 0.6369 Epoch [160/1000], Loss: 1.0164 Epoch [170/1000], Loss: 0.6252 Epoch [180/1000], Loss: 0.6698 Epoch [190/1000], Loss: 0.5765 Epoch [200/1000], Loss: 0.6256 Epoch [210/1000], Loss: 0.6333 Epoch [220/1000], Loss: 0.6669 Epoch [230/1000], Loss: 0.6584 Epoch [240/1000], Loss: 0.7312 Epoch [250/1000], Loss: 0.6932 Epoch [260/1000], Loss: 0.7157 Epoch [270/1000], Loss: 0.7490 Epoch [280/1000], Loss: 0.8552 Epoch [290/1000], Loss: 0.5181 Epoch [300/1000], Loss: 0.5712 Epoch [310/1000], Loss: 0.7218 Epoch [320/1000], Loss: 0.6120 Epoch [330/1000], Loss: 0.5133 Epoch [340/1000], Loss: 0.5491 Epoch [350/1000], Loss: 0.5977 Epoch [360/1000], Loss: 0.7913 Epoch [370/1000], Loss: 0.5320 Epoch [380/1000], Loss: 0.5622 Epoch [390/1000], Loss: 0.5086 Epoch [400/1000], Loss: 0.5702 Epoch [410/1000], Loss: 0.5553 Epoch [420/1000], Loss: 0.7722 Epoch [430/1000], Loss: 0.5213 Epoch [440/1000], Loss: 0.5490 Epoch [450/1000], Loss: 0.5937 Epoch [460/1000], Loss: 0.6259 Epoch [470/1000], Loss: 0.5269 Epoch [480/1000], Loss: 0.5356 Epoch [490/1000], Loss: 0.6047 Epoch [500/1000], Loss: 0.7214 Epoch [510/1000], Loss: 0.6213 Epoch [520/1000], Loss: 0.6083 Epoch [530/1000], Loss: 0.5366 Epoch [540/1000], Loss: 0.4997 Epoch [550/1000], Loss: 0.5347 Epoch [560/1000], Loss: 0.6164 Epoch [570/1000], Loss: 0.4688 Epoch [580/1000], Loss: 0.6317 Epoch [590/1000], Loss: 0.5179 Epoch [600/1000], Loss: 0.6119 Epoch [610/1000], Loss: 0.4962 Epoch [620/1000], Loss: 0.4906 Epoch [630/1000], Loss: 0.5880 Epoch [640/1000], Loss: 0.6129 Epoch [650/1000], Loss: 0.5069 Epoch [660/1000], Loss: 0.6276 Epoch [670/1000], Loss: 0.6865 Epoch [680/1000], Loss: 0.5891 Epoch [690/1000], Loss: 0.5254 Epoch [700/1000], Loss: 0.4983 Epoch [710/1000], Loss: 0.4166 Epoch [720/1000], Loss: 0.5242 Epoch [730/1000], Loss: 0.4065 Epoch [740/1000], Loss: 0.6223 Epoch [750/1000], Loss: 0.6136 Epoch [760/1000], Loss: 0.5263 Epoch [770/1000], Loss: 0.4307 Epoch [780/1000], Loss: 0.4745 Epoch [790/1000], Loss: 0.6394 Epoch [800/1000], Loss: 0.5219 Epoch [810/1000], Loss: 0.2932 Epoch [820/1000], Loss: 0.6071 Epoch [830/1000], Loss: 0.5987 Epoch [840/1000], Loss: 0.4504 Epoch [850/1000], Loss: 0.5209 Epoch [860/1000], Loss: 0.4173 Epoch [870/1000], Loss: 0.5004 Epoch [880/1000], Loss: 0.7521 Epoch [890/1000], Loss: 0.5490 Epoch [900/1000], Loss: 0.4022 Epoch [910/1000], Loss: 0.5318 Epoch [920/1000], Loss: 0.4129 Epoch [930/1000], Loss: 0.6227 Epoch [940/1000], Loss: 0.5948 Epoch [950/1000], Loss: 0.6896 Epoch [960/1000], Loss: 0.5198 Epoch [970/1000], Loss: 0.5326 Epoch [980/1000], Loss: 0.5125 Epoch [990/1000], Loss: 0.8017 Epoch [1000/1000], Loss: 0.4466 
In [162]:
# Generate predictions.
preds = model(inputs)
preds

Out[162]:
tensor([[ 57.1696,  70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618,  36.9971],
[101.9490, 119.2129],
[ 57.1696,  70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618,  36.9971],
[101.9490, 119.2129],
[ 57.1696,  70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618,  36.9971],
[101.9490, 119.2129]], grad_fn=<AddmmBackward>)
In [163]:
# Compare with targets
targets

Out[163]:
tensor([[ 56.,  70.],
[ 81., 101.],
[119., 133.],
[ 22.,  37.],
[103., 119.],
[ 56.,  70.],
[ 81., 101.],
[119., 133.],
[ 22.,  37.],
[103., 119.],
[ 56.,  70.],
[ 81., 101.],
[119., 133.],
[ 22.,  37.],
[103., 119.]])
In [164]:
import jovian
In [165]:
jovian.commit()
[jovian] Saving notebook.. 
[jovian] Updating notebook "35272fe05c0c46e484280c69962e8c98" on https://jovian.ml/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ml/walid-gomaa/linear-regression 
In [ ]: