import numpy as np
import torch
# Input (temperature, rainfall, humidity).
inputs = np.array([
[73, 67, 43],
[91, 88, 64],
[87, 134, 58],
[102, 43, 37],
[69, 96, 70]
], dtype = 'float32')
inputs
array([[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 69., 96., 70.]], dtype=float32)
# Targets (apples, oranges).
targets = np.array([
[56, 70],
[81, 101],
[119, 133],
[22, 37],
[103, 119]
], dtype = 'float32')
targets
array([[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.]], dtype=float32)
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)
tensor([[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 69., 96., 70.]])
tensor([[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.]])
Very simple model that is linear in both the parameters and the inputs
w = torch.randn(2, 3, requires_grad = True)
w
tensor([[-1.1090, 0.4660, -0.3295],
[ 0.8665, 1.2027, 0.0416]], requires_grad=True)
b = torch.randn(2, requires_grad = True)
b
tensor([1.4969, 0.9979], requires_grad=True)
@ represents matrix multiplication in PyTorch.
def model(x):
return x @ w.t() + b
preds = model(inputs)
print(preds)
tensor([[ -62.4059, 146.6199],
[ -79.5011, 188.3463],
[ -51.6510, 239.9558],
[-103.7748, 142.6331],
[ -53.3516, 179.1550]], grad_fn=<AddBackward0>)
diff = targets - preds
diff
tensor([[ 118.4059, -76.6199],
[ 160.5011, -87.3463],
[ 170.6510, -106.9558],
[ 125.7748, -105.6331],
[ 156.3516, -60.1550]], grad_fn=<SubBackward0>)
diff_sqr = diff * diff
diff_sqr
tensor([[14019.9492, 5870.6025],
[25760.5938, 7629.3813],
[29121.7637, 11439.5449],
[15819.2988, 11158.3525],
[24445.8164, 3618.6238]], grad_fn=<MulBackward0>)
torch.sum(diff_sqr)
tensor(148883.9219, grad_fn=<SumBackward0>)
torch.sum(diff_sqr) / diff.numel()
tensor(14888.3926, grad_fn=<DivBackward0>)
type(diff)
torch.Tensor
rr = torch.tensor(3)
rr.numel()
1
rr2 = torch.tensor([[1,2],[3,4]])
rr2.numel()
4
def mse(t1, t2):
diff = t1 - t2
return torch.sum(diff * diff) / diff.numel()
# Compute loss.
loss = mse(preds, targets)
loss
tensor(14888.3926, grad_fn=<DivBackward0>)
# Compute gradients
loss.backward()
# Gradients for weights.
print(w)
print(w.grad)
tensor([[-1.1090, 0.4660, -0.3295],
[ 0.8665, 1.2027, 0.0416]], requires_grad=True)
tensor([[-12342.6309, -13068.5186, -8171.9111],
[ 7554.4390, 7493.8384, 4641.5063]])
# Gradients for biases.
print(b)
print(b.grad)
tensor([1.4969, 0.9979], requires_grad=True)
tensor([-146.3369, 87.3420])
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)
tensor([[0., 0., 0.],
[0., 0., 0.]])
tensor([0., 0.])
# Generate preditions.
preds = model(inputs)
print(preds)
tensor([[ -62.4059, 146.6199],
[ -79.5011, 188.3463],
[ -51.6510, 239.9558],
[-103.7748, 142.6331],
[ -53.3516, 179.1550]], grad_fn=<AddBackward0>)
# Generate the loss.
loss = mse(preds, targets)
print(loss)
tensor(14888.3926, grad_fn=<DivBackward0>)
# Compute the gradients.
loss.backward()
print(w.grad)
print(b.grad)
tensor([[-12342.6309, -13068.5186, -8171.9111],
[ 7554.4390, 7493.8384, 4641.5063]])
tensor([-146.3369, 87.3420])
# Adjust weights and reset the gradient.
with torch.no_grad():
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
print(w)
print(b)
tensor([[-0.9856, 0.5967, -0.2478],
[ 0.7909, 1.1278, -0.0048]], requires_grad=True)
tensor([1.4984, 0.9970], requires_grad=True)
# Calculate loss.
preds = model(inputs)
loss = mse(preds, targets)
print(loss)
tensor(10110.9971, grad_fn=<DivBackward0>)
# Train for 100 epochs.
for i in range(100):
preds = model(inputs)
loss = mse(preds, targets)
loss.backward()
with torch.no_grad():
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
# Calcualte loss.
preds = model(inputs)
loss = mse(preds, targets)
print(loss)
preds
tensor([[ 57.4484, 70.5337],
[ 81.7897, 100.0755],
[119.1913, 133.9527],
[ 21.2008, 37.2804],
[101.4223, 118.1445]], grad_fn=<AddBackward0>)
targets
tensor([[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.]])
!pip install jovian --upgrade -q
import jovian
jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/walid-gomaa/linear-regression
import torch.nn as nn
# Input (temp, rainfall, humidity)
inputs = np.array([
[73, 67, 43], [91, 88, 64], [87, 134, 58],
[102, 43, 37], [69, 96, 70], [73, 67, 43],
[91, 88, 64], [87, 134, 58], [102, 43, 37],
[69, 96, 70], [73, 67, 43], [91, 88, 64],
[87, 134, 58], [102, 43, 37], [69, 96, 70]
], dtype = 'float32')
inputs.shape
(15, 3)
# Targets (apples, oranges)
targets = np.array([
[56, 70], [81, 101], [119, 133],
[22, 37], [103, 119], [56, 70],
[81, 101], [119, 133], [22, 37],
[103, 119], [56, 70], [81, 101],
[119, 133], [22, 37], [103, 119]
], dtype = 'float32')
inputs = torch.tensor(inputs)
inputs
tensor([[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 69., 96., 70.],
[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 69., 96., 70.],
[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 69., 96., 70.]])
targets = torch.tensor(targets)
from torch.utils.data import TensorDataset
# Define dataset.
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]
(tensor([[ 73., 67., 43.],
[ 91., 88., 64.],
[ 87., 134., 58.]]), tensor([[ 56., 70.],
[ 81., 101.],
[119., 133.]]))
train_ds[[1,3,5,7]]
(tensor([[ 91., 88., 64.],
[102., 43., 37.],
[ 73., 67., 43.],
[ 87., 134., 58.]]), tensor([[ 81., 101.],
[ 22., 37.],
[ 56., 70.],
[119., 133.]]))
from torch.utils.data import DataLoader
# Define data loader.
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle = True)
for xb, yb in train_dl:
print(xb)
print(yb)
break
tensor([[ 91., 88., 64.],
[ 87., 134., 58.],
[102., 43., 37.],
[ 73., 67., 43.],
[102., 43., 37.]])
tensor([[ 81., 101.],
[119., 133.],
[ 22., 37.],
[ 56., 70.],
[ 22., 37.]])
model = nn.Linear(3, 2)
print(model.weight)
Parameter containing:
tensor([[-0.1147, -0.2876, 0.4160],
[-0.4951, -0.5615, 0.5414]], requires_grad=True)
print(model.bias)
Parameter containing:
tensor([0.0805, 0.1290], requires_grad=True)
list(model.parameters())
[Parameter containing:
tensor([[-0.1147, -0.2876, 0.4160],
[-0.4951, -0.5615, 0.5414]], requires_grad=True),
Parameter containing:
tensor([0.0805, 0.1290], requires_grad=True)]
# Generate predictions.
preds = model(inputs)
preds
tensor([[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417],
[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417],
[ -9.6722, -50.3550],
[ -9.0402, -59.6895],
[-24.3048, -86.7879],
[ -8.5926, -54.4840],
[ -6.3217, -50.0417]], grad_fn=<AddmmBackward>)
# Import nn.functional.
import torch.nn.functional as F
loss_fn = F.mse_loss
? nn.Linear
Init signature: nn.Linear(in_features, out_features, bias=True)
Docstring:
Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
Args:
in_features: size of each input sample
out_features: size of each output sample
bias: If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
Shape:
- Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
additional dimensions and :math:`H_{in} = \text{in\_features}`
- Output: :math:`(N, *, H_{out})` where all but the last dimension
are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
Attributes:
weight: the learnable weights of the module of shape
:math:`(\text{out\_features}, \text{in\_features})`. The values are
initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
:math:`k = \frac{1}{\text{in\_features}}`
bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
If :attr:`bias` is ``True``, the values are initialized from
:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
:math:`k = \frac{1}{\text{in\_features}}`
Examples::
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.
File: /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/modules/linear.py
Type: type
Subclasses: Linear
? F
Type: module
String form: <module 'torch.nn.functional' from '/anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/functional.py'>
File: /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/nn/functional.py
Docstring: Functional interface
? print
Docstring:
print(value, ..., sep=' ', end='\n', file=sys.stdout, flush=False)
Prints the values to a stream, or to sys.stdout by default.
Optional keyword arguments:
file: a file-like object (stream); defaults to the current sys.stdout.
sep: string inserted between values, default a space.
end: string appended after the last value, default a newline.
flush: whether to forcibly flush the stream.
Type: builtin_function_or_method
loss = loss_fn(model(inputs), targets)
print(loss)
tensor(17140.1035, grad_fn=<MseLossBackward>)
? torch.optim.SGD
Init signature:
torch.optim.SGD(
params,
lr=<required parameter>,
momentum=0,
dampening=0,
weight_decay=0,
nesterov=False,
)
Docstring:
Implements stochastic gradient descent (optionally with momentum).
Nesterov momentum is based on the formula from
`On the importance of initialization and momentum in deep learning`__.
Args:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float): learning rate
momentum (float, optional): momentum factor (default: 0)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
dampening (float, optional): dampening for momentum (default: 0)
nesterov (bool, optional): enables Nesterov momentum (default: False)
Example:
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> optimizer.zero_grad()
>>> loss_fn(model(input), target).backward()
>>> optimizer.step()
__ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
.. note::
The implementation of SGD with Momentum/Nesterov subtly differs from
Sutskever et. al. and implementations in some other frameworks.
Considering the specific case of Momentum, the update can be written as
.. math::
v_{t+1} = \mu * v_{t} + g_{t+1} \\
p_{t+1} = p_{t} - lr * v_{t+1}
where p, g, v and :math:`\mu` denote the parameters, gradient,
velocity, and momentum respectively.
This is in contrast to Sutskever et. al. and
other frameworks which employ an update of the form
.. math::
v_{t+1} = \mu * v_{t} + lr * g_{t+1} \\
p_{t+1} = p_{t} - v_{t+1}
The Nesterov version is analogously modified.
File: /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/optim/sgd.py
Type: type
Subclasses:
opt = torch.optim.SGD(model.parameters(), lr = 1e-5)
opt
SGD (
Parameter Group 0
dampening: 0
lr: 1e-05
momentum: 0
nesterov: False
weight_decay: 0
)
list(model.parameters())
[Parameter containing:
tensor([[-0.1147, -0.2876, 0.4160],
[-0.4951, -0.5615, 0.5414]], requires_grad=True),
Parameter containing:
tensor([0.0805, 0.1290], requires_grad=True)]
? opt.step
Signature: opt.step(closure=None)
Docstring:
Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
File: /anaconda3/envs/01-pytorch-basics/lib/python3.7/site-packages/torch/optim/sgd.py
Type: method
# Define a utility function to train the model.
def fit(num_epochs, model, loss_fn, opt):
# Repeat for the given number of epochs.
for epoch in range(num_epochs):
# Train with batches of data.
for xb, yb in train_dl: # Use the DataLoader object.
# 1. Generate predictions.
preds = model(xb)
# 2. Calculate loss.
loss = loss_fn(preds, yb)
# 3. Compute gradients.
loss.backward()
# 4. Update parameters using gradients.
opt.step()
# 5. Reset gradients to zero.
opt.zero_grad()
# Print the progress.
if (epoch + 1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
loss.item()
17140.103515625
fit(100, model, loss_fn, opt)
Epoch [10/100], Loss: 274.4447
Epoch [20/100], Loss: 345.7473
Epoch [30/100], Loss: 252.0515
Epoch [40/100], Loss: 227.0331
Epoch [50/100], Loss: 72.4297
Epoch [60/100], Loss: 39.6769
Epoch [70/100], Loss: 7.5991
Epoch [80/100], Loss: 17.1577
Epoch [90/100], Loss: 17.7797
Epoch [100/100], Loss: 7.8511
fit(200, model, loss_fn, opt)
Epoch [10/200], Loss: 8.5315
Epoch [20/200], Loss: 6.5630
Epoch [30/200], Loss: 5.3663
Epoch [40/200], Loss: 3.7242
Epoch [50/200], Loss: 3.1195
Epoch [60/200], Loss: 1.3423
Epoch [70/200], Loss: 1.7627
Epoch [80/200], Loss: 1.4846
Epoch [90/200], Loss: 1.0412
Epoch [100/200], Loss: 2.7521
Epoch [110/200], Loss: 1.4776
Epoch [120/200], Loss: 1.5228
Epoch [130/200], Loss: 1.5755
Epoch [140/200], Loss: 1.1981
Epoch [150/200], Loss: 0.9064
Epoch [160/200], Loss: 1.2216
Epoch [170/200], Loss: 0.9610
Epoch [180/200], Loss: 1.0257
Epoch [190/200], Loss: 1.1858
Epoch [200/200], Loss: 1.0127
fit(1000, model, loss_fn, opt)
Epoch [10/1000], Loss: 0.8390
Epoch [20/1000], Loss: 0.8243
Epoch [30/1000], Loss: 0.9213
Epoch [40/1000], Loss: 0.8647
Epoch [50/1000], Loss: 0.7388
Epoch [60/1000], Loss: 0.8686
Epoch [70/1000], Loss: 0.7849
Epoch [80/1000], Loss: 0.8086
Epoch [90/1000], Loss: 0.7943
Epoch [100/1000], Loss: 0.6792
Epoch [110/1000], Loss: 0.7685
Epoch [120/1000], Loss: 0.8260
Epoch [130/1000], Loss: 0.7298
Epoch [140/1000], Loss: 0.7044
Epoch [150/1000], Loss: 0.6369
Epoch [160/1000], Loss: 1.0164
Epoch [170/1000], Loss: 0.6252
Epoch [180/1000], Loss: 0.6698
Epoch [190/1000], Loss: 0.5765
Epoch [200/1000], Loss: 0.6256
Epoch [210/1000], Loss: 0.6333
Epoch [220/1000], Loss: 0.6669
Epoch [230/1000], Loss: 0.6584
Epoch [240/1000], Loss: 0.7312
Epoch [250/1000], Loss: 0.6932
Epoch [260/1000], Loss: 0.7157
Epoch [270/1000], Loss: 0.7490
Epoch [280/1000], Loss: 0.8552
Epoch [290/1000], Loss: 0.5181
Epoch [300/1000], Loss: 0.5712
Epoch [310/1000], Loss: 0.7218
Epoch [320/1000], Loss: 0.6120
Epoch [330/1000], Loss: 0.5133
Epoch [340/1000], Loss: 0.5491
Epoch [350/1000], Loss: 0.5977
Epoch [360/1000], Loss: 0.7913
Epoch [370/1000], Loss: 0.5320
Epoch [380/1000], Loss: 0.5622
Epoch [390/1000], Loss: 0.5086
Epoch [400/1000], Loss: 0.5702
Epoch [410/1000], Loss: 0.5553
Epoch [420/1000], Loss: 0.7722
Epoch [430/1000], Loss: 0.5213
Epoch [440/1000], Loss: 0.5490
Epoch [450/1000], Loss: 0.5937
Epoch [460/1000], Loss: 0.6259
Epoch [470/1000], Loss: 0.5269
Epoch [480/1000], Loss: 0.5356
Epoch [490/1000], Loss: 0.6047
Epoch [500/1000], Loss: 0.7214
Epoch [510/1000], Loss: 0.6213
Epoch [520/1000], Loss: 0.6083
Epoch [530/1000], Loss: 0.5366
Epoch [540/1000], Loss: 0.4997
Epoch [550/1000], Loss: 0.5347
Epoch [560/1000], Loss: 0.6164
Epoch [570/1000], Loss: 0.4688
Epoch [580/1000], Loss: 0.6317
Epoch [590/1000], Loss: 0.5179
Epoch [600/1000], Loss: 0.6119
Epoch [610/1000], Loss: 0.4962
Epoch [620/1000], Loss: 0.4906
Epoch [630/1000], Loss: 0.5880
Epoch [640/1000], Loss: 0.6129
Epoch [650/1000], Loss: 0.5069
Epoch [660/1000], Loss: 0.6276
Epoch [670/1000], Loss: 0.6865
Epoch [680/1000], Loss: 0.5891
Epoch [690/1000], Loss: 0.5254
Epoch [700/1000], Loss: 0.4983
Epoch [710/1000], Loss: 0.4166
Epoch [720/1000], Loss: 0.5242
Epoch [730/1000], Loss: 0.4065
Epoch [740/1000], Loss: 0.6223
Epoch [750/1000], Loss: 0.6136
Epoch [760/1000], Loss: 0.5263
Epoch [770/1000], Loss: 0.4307
Epoch [780/1000], Loss: 0.4745
Epoch [790/1000], Loss: 0.6394
Epoch [800/1000], Loss: 0.5219
Epoch [810/1000], Loss: 0.2932
Epoch [820/1000], Loss: 0.6071
Epoch [830/1000], Loss: 0.5987
Epoch [840/1000], Loss: 0.4504
Epoch [850/1000], Loss: 0.5209
Epoch [860/1000], Loss: 0.4173
Epoch [870/1000], Loss: 0.5004
Epoch [880/1000], Loss: 0.7521
Epoch [890/1000], Loss: 0.5490
Epoch [900/1000], Loss: 0.4022
Epoch [910/1000], Loss: 0.5318
Epoch [920/1000], Loss: 0.4129
Epoch [930/1000], Loss: 0.6227
Epoch [940/1000], Loss: 0.5948
Epoch [950/1000], Loss: 0.6896
Epoch [960/1000], Loss: 0.5198
Epoch [970/1000], Loss: 0.5326
Epoch [980/1000], Loss: 0.5125
Epoch [990/1000], Loss: 0.8017
Epoch [1000/1000], Loss: 0.4466
# Generate predictions.
preds = model(inputs)
preds
tensor([[ 57.1696, 70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618, 36.9971],
[101.9490, 119.2129],
[ 57.1696, 70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618, 36.9971],
[101.9490, 119.2129],
[ 57.1696, 70.3309],
[ 82.2062, 100.6933],
[118.6484, 132.8939],
[ 21.0618, 36.9971],
[101.9490, 119.2129]], grad_fn=<AddmmBackward>)
# Compare with targets
targets
tensor([[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.],
[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.],
[ 56., 70.],
[ 81., 101.],
[119., 133.],
[ 22., 37.],
[103., 119.]])
import jovian
jovian.commit()
[jovian] Saving notebook..
[jovian] Updating notebook "35272fe05c0c46e484280c69962e8c98" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/walid-gomaa/linear-regression