训练时是正对每个min-batch的,但是在测试中往往是针对单张图片,即不存在min-batch的概念。由于网络训练完毕后参数都是固定的,因此每个批次的均值和方差都是不变的,因此直接结算所有batch的均值和方差。所有Batch Normalization的训练和测试时的操作不同
在训练中,每个隐层的神经元先乘概率P,然后在进行激活,在测试中,所有的神经元先进行激活,然后每个隐层神经元的输出乘P。
主要是针对model 在训练时和评价时不同的 Batch Normalization 和 Dropout 方法模式。 eval()时,pytorch会自动把BN和DropOut固定住,不会取平均,而是用训练好的值。 不然的话,一旦test的batch_size过小,很容易就会被BN层导致生成图片颜色失真极大。
import os
import torch
import random
import numpy as np
import torchvision
import torchvision.transforms as transforms
from torch import nn
import torch.nn.functional as F
class TorchConfig(object):
use_cuda = torch.cuda.is_available()
def __init__(self, seed=2019):
self.seed = seed
self.device = torch.device('cuda' if self.use_cuda else 'cpu') # Device configuration
self.set_seed()
# torch.set_default_tensor_type(torch.DoubleTensor)
def set_seed(self):
os.environ['PYTHONHASHSEED'] = str(self.seed)
random.seed(self.seed)
np.random.seed(self.seed)
torch.manual_seed(self.seed)
if self.use_cuda:
print('GPU: %s' % torch.cuda.get_device_name(0))
torch.cuda.manual_seed(self.seed)
torch.backends.cudnn.deterministic = True
tc = TorchConfig()
# Hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
device = tc.device
GPU: Tesla P4
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Fully connected neural network with one hidden layer
class NN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
return x
model = NN(input_size, hidden_size, num_classes).to(device)
model
NN(
(fc1): Linear(in_features=784, out_features=500, bias=True)
(fc2): Linear(in_features=500, out_features=10, bias=True)
)
def train_nn(model, train_loader, num_epochs, loss_function=nn.CrossEntropyLoss(), lr=0.001):
# Loss and optimizer
optimizer = torch.optim.Adam(model.parameters(), lr)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (X, y) in enumerate(train_loader, 1):
# Move tensors to the configured device
X = X.reshape(-1, 28*28).to(device) # 需重写
y = y.to(device)
# Forward pass
loss = loss_function(model(X), y)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i, total_step, loss.item()))
train_nn(model, train_loader, num_epochs)
Epoch [1/5], Step [100/600], Loss: 0.3390
Epoch [1/5], Step [200/600], Loss: 0.2043
Epoch [1/5], Step [300/600], Loss: 0.1815
Epoch [1/5], Step [400/600], Loss: 0.1459
Epoch [1/5], Step [500/600], Loss: 0.1740
Epoch [1/5], Step [600/600], Loss: 0.1345
Epoch [2/5], Step [100/600], Loss: 0.1230
Epoch [2/5], Step [200/600], Loss: 0.1135
Epoch [2/5], Step [300/600], Loss: 0.2023
Epoch [2/5], Step [400/600], Loss: 0.1820
Epoch [2/5], Step [500/600], Loss: 0.1531
Epoch [2/5], Step [600/600], Loss: 0.0898
Epoch [3/5], Step [100/600], Loss: 0.1526
Epoch [3/5], Step [200/600], Loss: 0.1949
Epoch [3/5], Step [300/600], Loss: 0.0775
Epoch [3/5], Step [400/600], Loss: 0.0645
Epoch [3/5], Step [500/600], Loss: 0.0601
Epoch [3/5], Step [600/600], Loss: 0.0226
Epoch [4/5], Step [100/600], Loss: 0.0476
Epoch [4/5], Step [200/600], Loss: 0.0438
Epoch [4/5], Step [300/600], Loss: 0.1033
Epoch [4/5], Step [400/600], Loss: 0.0399
Epoch [4/5], Step [500/600], Loss: 0.0497
Epoch [4/5], Step [600/600], Loss: 0.0488
Epoch [5/5], Step [100/600], Loss: 0.0310
Epoch [5/5], Step [200/600], Loss: 0.0234
Epoch [5/5], Step [300/600], Loss: 0.0093
Epoch [5/5], Step [400/600], Loss: 0.0477
Epoch [5/5], Step [500/600], Loss: 0.0391
Epoch [5/5], Step [600/600], Loss: 0.0615
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() # https://blog.csdn.net/dss_dssssd/article/details/83818181
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
Accuracy of the network on the 10000 test images: 97.93 %
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
from yuan.utils.jupyter import commit
commit(nb_filename='./Pytorch.ipynb')
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlkZW50aXR5Ijp7InVzZXJuYW1lIjoiSmllLVl1YW4iLCJpZCI6Njd9LCJ0eXBlIjoiYWNjZXNzIiwiZXhwIjoxNTUyOTY1NTkzLCJpYXQiOjE1NTIzNjA3OTMsIm5iZiI6MTU1MjM2MDc5MywianRpIjoiNjM1ZTg2MjQtYjA1ZC00NGJmLTljYjAtOGVjOGRmM2ExNmJkIn0.5jglhEGGs12ITl-DWWaFL-BVPhCzaDEeMKIJvEI-bbA
[jovian] Saving notebook..
[jovian] Updating notebook "55ea492741cd4b22b6035d68c3a81bf0" on https://jvn.io
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jvn.io/Jie-Yuan/55ea492741cd4b22b6035d68c3a81bf0
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 输入图像channel:1;输出channel:6;5x5卷积核
self.conv1 = nn.Conv2d(1, 6, 5) # (32-5+2*0) / 1+1
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 4 * 4, 120) # 根据网络定义参数
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# 2x2 Max pooling
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, torch.numel(x)//x.shape[0]) # self.num_flat_features(x)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def train_nn(model, train_loader, num_epochs, loss_function=nn.CrossEntropyLoss(), optimizer=None):
# Loss and optimizer
if optimizer is None:
lr=0.001
optimizer = torch.optim.Adam(model.parameters(), lr)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (X, y) in enumerate(train_loader, 1):
# Move tensors to the configured device
X = X.to(device) # 需重写
y = y.to(device)
# Forward pass
loss = loss_function(model(X), y)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i, total_step, loss.item()))
model = Net().to(device)
train_nn(model, train_loader, num_epochs)
Epoch [1/5], Step [100/600], Loss: 0.3805
Epoch [1/5], Step [200/600], Loss: 0.3512
Epoch [1/5], Step [300/600], Loss: 0.3613
Epoch [1/5], Step [400/600], Loss: 0.2640
Epoch [1/5], Step [500/600], Loss: 0.1686
Epoch [1/5], Step [600/600], Loss: 0.0711
Epoch [2/5], Step [100/600], Loss: 0.0882
Epoch [2/5], Step [200/600], Loss: 0.0367
Epoch [2/5], Step [300/600], Loss: 0.0504
Epoch [2/5], Step [400/600], Loss: 0.1319
Epoch [2/5], Step [500/600], Loss: 0.0253
Epoch [2/5], Step [600/600], Loss: 0.0452
Epoch [3/5], Step [100/600], Loss: 0.2070
Epoch [3/5], Step [200/600], Loss: 0.0535
Epoch [3/5], Step [300/600], Loss: 0.0242
Epoch [3/5], Step [400/600], Loss: 0.0109
Epoch [3/5], Step [500/600], Loss: 0.0333
Epoch [3/5], Step [600/600], Loss: 0.0320
Epoch [4/5], Step [100/600], Loss: 0.1050
Epoch [4/5], Step [200/600], Loss: 0.0560
Epoch [4/5], Step [300/600], Loss: 0.0420
Epoch [4/5], Step [400/600], Loss: 0.0181
Epoch [4/5], Step [500/600], Loss: 0.0659
Epoch [4/5], Step [600/600], Loss: 0.0075
Epoch [5/5], Step [100/600], Loss: 0.0048
Epoch [5/5], Step [200/600], Loss: 0.0310
Epoch [5/5], Step [300/600], Loss: 0.0528
Epoch [5/5], Step [400/600], Loss: 0.0116
Epoch [5/5], Step [500/600], Loss: 0.0211
Epoch [5/5], Step [600/600], Loss: 0.0848
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
Model's state_dict:
conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([16, 6, 5, 5])
conv2.bias torch.Size([16])
fc1.weight torch.Size([120, 256])
fc1.bias torch.Size([120])
fc2.weight torch.Size([84, 120])
fc2.bias torch.Size([84])
fc3.weight torch.Size([10, 84])
fc3.bias torch.Size([10])
from torch.utils.data import Dataset, DataLoader, TensorDataset
word_to_ix = {"hello": 0, "world": 1}
embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings
lookup_tensor = torch.tensor([word_to_ix["hello"]], dtype=torch.long)
hello_embed = embeds(lookup_tensor)
print(hello_embed)
tensor([[-0.7765, -0.1868, 1.7467, -0.1098, -0.9604]],
grad_fn=<EmbeddingBackward>)