Jovian
⭐️
Sign In

문제 1

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline
In [6]:
is_cuda = False
if torch.cuda.is_available():
    is_cuda = True
In [7]:
# data으로부터 dataset을 만들 때 적용할 transformation 들을 나열하고, compose 해서
# 구성해놓는다.
#
# transforms.Normalize의 인자는 mean과 std이다.
transformation = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))]
)
In [8]:
train_dataset = datasets.MNIST('data/', train=True, transform=transformation, download=True)
test_dataset = datasets.MNIST('data/', train=False, transform=transformation, download=True)
In [9]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
In [10]:
sample_data = next(iter(train_loader))

(1)

데이터로더에서 샘플링된 batch의 2번 인덱스 이미지를 시각화한다.

In [11]:
def plot_img(image):
    image = image.numpy()[0]
    mean = 0.1307
    std = 0.3081
    image = ((mean * image) + std)
    plt.imshow(image, cmap='gray')

plot_img(sample_data[0][2])
Notebook Image

(2)

데이터로더에서 샘플링된 batch의 1번 인덱스 이미지를 시각화한다.

In [12]:
plot_img(sample_data[0][1])
Notebook Image
In [13]:
# 2층의 convolution layer와 droptout, 2층의 fully connected layer로 구성된 network를 만든다.
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        #x = F.dropout(x, p=0.1, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

model = Net()
In [14]:
if is_cuda:
    model.cuda()
In [15]:
# SGD를 사용하여 최적화 할 것이다.
optimizer = optim.SGD(model.parameters(), lr=0.01)

data, target = next(iter(train_loader))
In [16]:
output = model(data.cuda())

(3)

출력층 노드의 개수가 10개이고 배치 사이즈가 32라 output의 size는 [32, 10]이 된다.

In [17]:
output.size()
Out[17]:
torch.Size([32, 10])

(4)

타겟은 데이터 포인트 하나당 하나의 int64 label만 존재한다. 반면에 네트워크의 출력층에서는 10개의 각 레이블에 대한 score가 나왔다.

In [18]:
target.size()
Out[18]:
torch.Size([32])
In [19]:
target.dtype
Out[19]:
torch.int64

(5)

In [20]:
# 모델을 훈련 또는 검증하기 위한 코드이다.
# 20번의 epoch를 돌리며
# running_loss와 running_correct를 계속 업데이트 하고 학습이 끝나면 출력한다.
In [21]:
def fit(epoch, model, data_loader, phase='training', volatile=False):
    if phase == "training":
        model.train()
    if phase == "validation":
        model.eval()
        volatile = True
    running_loss = 0.0
    running_correct = 0
    for batch_idx, (data, target) in enumerate(data_loader):
        if is_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile), Variable(target)
        if phase == "training":
            optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        
        running_loss += F.nll_loss(output, target, size_average=False).item()
        preds = output.data.max(dim=1, keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum().item()
        if phase == "training":
            loss.backward()
            optimizer.step()
            
    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct/len(data_loader.dataset)
    print(f"{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}")
    return loss, accuracy

train_losses, train_accuracy = [], []
val_losses, val_accuracy = [], []
for epoch in range(1, 20):
    epoch_loss, epoch_accuracy = fit(epoch, model, train_loader, phase='training')
    val_epoch_loss, val_epoch_accuracy = fit(epoch, model, test_loader, phase='validation')
    train_losses.append(epoch_loss)
    train_accuracy.append(epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
/opt/conda/lib/python3.6/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead. warnings.warn(warning.format(ret))
training loss is 0.56 and training accuracy is 49643/60000 82.74 validation loss is 0.15 and validation accuracy is 9534/10000 95.34 training loss is 0.2 and training accuracy is 56457/60000 94.09 validation loss is 0.095 and validation accuracy is 9698/10000 96.98 training loss is 0.16 and training accuracy is 57263/60000 95.44 validation loss is 0.075 and validation accuracy is 9764/10000 97.64 training loss is 0.13 and training accuracy is 57687/60000 96.14 validation loss is 0.066 and validation accuracy is 9782/10000 97.82 training loss is 0.12 and training accuracy is 57918/60000 96.53 validation loss is 0.058 and validation accuracy is 9802/10000 98.02 training loss is 0.11 and training accuracy is 58080/60000 96.8 validation loss is 0.054 and validation accuracy is 9816/10000 98.16 training loss is 0.098 and training accuracy is 58296/60000 97.16 validation loss is 0.052 and validation accuracy is 9824/10000 98.24 training loss is 0.089 and training accuracy is 58430/60000 97.38 validation loss is 0.048 and validation accuracy is 9841/10000 98.41 training loss is 0.086 and training accuracy is 58509/60000 97.52 validation loss is 0.043 and validation accuracy is 9850/10000 98.5 training loss is 0.081 and training accuracy is 58491/60000 97.48 validation loss is 0.039 and validation accuracy is 9865/10000 98.65 training loss is 0.079 and training accuracy is 58623/60000 97.7 validation loss is 0.038 and validation accuracy is 9881/10000 98.81 training loss is 0.073 and training accuracy is 58677/60000 97.8 validation loss is 0.037 and validation accuracy is 9874/10000 98.74 training loss is 0.068 and training accuracy is 58793/60000 97.99 validation loss is 0.035 and validation accuracy is 9887/10000 98.87 training loss is 0.069 and training accuracy is 58795/60000 97.99 validation loss is 0.034 and validation accuracy is 9884/10000 98.84 training loss is 0.065 and training accuracy is 58815/60000 98.03 validation loss is 0.034 and validation accuracy is 9887/10000 98.87 training loss is 0.062 and training accuracy is 58911/60000 98.19 validation loss is 0.032 and validation accuracy is 9896/10000 98.96 training loss is 0.061 and training accuracy is 58894/60000 98.16 validation loss is 0.032 and validation accuracy is 9895/10000 98.95 training loss is 0.059 and training accuracy is 58961/60000 98.27 validation loss is 0.032 and validation accuracy is 9890/10000 98.9 training loss is 0.057 and training accuracy is 58996/60000 98.33 validation loss is 0.029 and validation accuracy is 9904/10000 99.04

(6)

훈련과 검증 과정에서 저장한 loss를 시각화한다.

In [22]:
plt.plot(range(1, len(train_losses)+1), train_losses, 'bo', label = 'training loss')
plt.plot(range(1, len(val_losses)+1), val_losses, 'r', label = 'validation loss')
plt.legend();
Notebook Image

(7)

훈련과 검증 과정에서 저장한 정확도를 시각화한다.

In [23]:
plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, 'bo', label = 'train accuracy')
plt.plot(range(1, len(val_losses) + 1), val_accuracy, 'r', label = 'val accuracy')
plt.legend();
Notebook Image

문제 2

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
In [25]:
# AlexNet을 구성한다.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # 배치의 axis는 제외하고 각 데이터 포인트에 대해서 flatten 시킨다.
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

(1)

Neural Network 를 구성하는 신경망의 구성요소들을 출력한다.

In [27]:
net = Net()
print(net)
Net( (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=400, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True) )

(2)

만약 인풋의 채널이 1개이고 width와 height는 32, batch_size 는 1이라면 이 네트워크(AlexNet)은 먼저 크기가 (5,5)인 kernel 6개를 한칸 씩 건너뛰며 적용한다. 그런 뒤에 relu를 거치고 크기가 2인 kernel을 사용하는 max_pooling을 진행한다. 그 다음 크기가 (5, 5)인 kernel 16개를 가진 다른 convolutional layer를 한칸 씩 건너뛰며 적용한다. 마찬가지로 relu와 크기가 2인 kernel을 사용하는 max_pooling을 적용한다. 그런 뒤 data를 flatten하고 활성함수로 relu를 사용하는 두 층의 hidden layer를 거친다. 최종적으로 출력층의 결과는 dimension이 10인 tensor가 된다.

(3)

Alex Net은 두개의 conv layer와 세개의 dense layer로 구성되어있다. 각 layer의 bias tensor까지 포함해서 네트워크의 parameter tensor의 개수는 총 10이 된다.

In [28]:
for param in net.parameters():
    print(param.size())
torch.Size([6, 1, 5, 5]) torch.Size([6]) torch.Size([16, 6, 5, 5]) torch.Size([16]) torch.Size([120, 400]) torch.Size([120]) torch.Size([84, 120]) torch.Size([84]) torch.Size([10, 84]) torch.Size([10])
In [29]:
params = list(net.parameters())
print(len(params))
print(params[0].size())
10 torch.Size([6, 1, 5, 5])

(4)

batch size가 1인 32321의 이미지를 normal distribution으로부터 생성. network를 통과시켜서 output을 출력한다.

In [30]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)
tensor([[ 0.0928, 0.0445, 0.0860, -0.0474, -0.0128, -0.0149, -0.0515, -0.0337, -0.0521, 0.0105]], grad_fn=<AddmmBackward>)
In [31]:
net.zero_grad()
out.backward(torch.randn(1, 10))
In [32]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()

loss = criterion(output, target)

(5)

MSE(Mean squared error)로 loss를 계산한다.

In [33]:
print(loss)
tensor(1.5992, grad_fn=<MseLossBackward>)
In [34]:
net.zero_grad()

(6)

In [35]:
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)
conv1.bias.grad before backward tensor([0., 0., 0., 0., 0., 0.])
In [36]:
loss.backward()

(7)

In [37]:
print("conv1.bias.grad after backward")
print(net.conv1.bias.grad)
conv1.bias.grad after backward tensor([-0.0138, 0.0005, 0.0075, 0.0063, 0.0127, -0.0109])
In [38]:
# grad를 이용해서 직접 업데이트한다.
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
In [39]:
import torch.optim as optim
In [40]:
# optimizer를 이용해서 업데이트하기 위해 SGD optimizer를 initalize한다.
optimizer = optim.SGD(net.parameters(), lr=0.01)
In [41]:
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

Problem 3

(1)

In [42]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
Files already downloaded and verified Files already downloaded and verified

(2)

In [43]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    
dataiter = iter(trainloader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))

# 배치 하나의 image와 label을 출력한다.
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
truck truck frog truck
Notebook Image
In [44]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
In [45]:
net = Net()

(3)

훈련 데이터 셋 전체에 대해 batch size 4로 SGD를 2 epoch 진행한다. loss는 전반적으로 감소하고 있음을 확인할 수 있다. 훈련이 진행되고 있다고 단정짓기 쉽지만 train loss만 가지고서는 판단할 수 없다. 훈련을 시킨 데이터셋이 아닌 다른 데이터 셋에서 성능을 검증하는 것이 필요하다. 또한 실제 우리에게 중요한 metric인 accuracy를 측정하지 않았기 때문에 훈련셋에서도 어느정도 성능이 향상되었는지 알기 힘들다.

In [47]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 1000 == 999:
            print("[%d, %5d] loss: %.3f" %
                  (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0

print("Finished Training")
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-47-472e7ff86594> in <module> 12 outputs = net(inputs) 13 loss = criterion(outputs, labels) ---> 14 loss.backward() 15 optimizer.step() 16 /opt/conda/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph) 116 products. Defaults to ``False``. 117 """ --> 118 torch.autograd.backward(self, gradient, retain_graph, create_graph) 119 120 def register_hook(self, hook): /opt/conda/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables) 91 Variable._execution_engine.run_backward( 92 tensors, grad_tensors, retain_graph, create_graph, ---> 93 allow_unreachable=True) # allow_unreachable flag 94 95 KeyboardInterrupt:
In [ ]:
dataiter = iter(testloader)
images, labels = next(dataiter)

(4)

In [ ]:
imshow(torchvision.utils.make_grid(images))
print("GroundTruth: ", "".join("%7s" % classes[labels[j]] for j in range(4)))

(5)

In [ ]:
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print("Predicted: ", "".join("%7s" % classes[predicted[j]] for j in range(4)))
In [ ]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

(6)

test set에 대해 evaluation을 진행했다. 9% 의 처참한 정확도가 나온걸로 봐서 Overfitting이 일어나고 있음을 확인할 수 있다.

In [ ]:
print("Accuracy of the network on the 10000 test images: %d %%" %(100 * correct/total))

(7)

test set에 대한 모델의 각 클래스 정확도가 출력되었다. 전체적으로 매우 낮은 것을 볼 수 있다.

In [ ]:
class_correct = [0. for i in range(10)]
class_total = [0. for i in range(10)]
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted= torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
            
for i in range(10):
    print("Accuracy of %5s: %2d %%" % (
    classes[i], 100 * class_correct[i] / class_total[i]
    ))

Problem 4

(W-F+2P)/S + 1

(a)

출력의 크기는 (32 - 5 + 22) / 1 + 1 = 32 이고 커널이 10개 이므로 3232*10이다. 매개변수의 수는 필터 당 (5 * 5 * 3 + 1) = 76 이고 커널이 10개 이므로 760이다.

(b)

출력의 크기는 (32 - 3 + 21) / 1 + 1 = 32 이고 커널이 64개 이므로 3232*64이다. 매개변수의 수는 필터 당 (3 * 3 * 3 + 1) = 28 이고 커널이 64개 이므로 1792이다. 출력의 depth가 6.4배 늘었으나 매개변수의 수는 약 2.3배 정도밖에 늘지 않은 것을 확인할 수 있다.

Problem 5

In [ ]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
In [ ]:
transform = transforms.Compose([transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
In [ ]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.bn2 = nn.BatchNorm2d(32)
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)
        self.conv4 = nn.Conv2d(32, 32, 3)
        self.bn4 = nn.BatchNorm2d(32)
        
        self.fc = nn.Linear(5 * 5 * 32, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.bn2(self.conv2(self.bn1(self.conv1(x))))))
        x = self.pool(F.relu(self.bn4(self.conv4(self.bn3(self.conv3(x))))))
        x = x.view(-1, 5 * 5 * 32)
        x = self.fc(x)
        return x
In [ ]:
net = Net()
if is_cuda:
    net.cuda()
optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(.9, .999), weight_decay=1e-5)
m = nn.LogSoftmax(dim=1)
outputs = net(inputs.cuda())
loss = F.nll_loss(m(outputs), labels.cuda())
In [ ]:

for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = F.nll_loss(m(outputs), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 1000 == 999:
            print("[%d, %5d] loss: %.3f" %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")
In [ ]:
correct = 0
total = 0
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
In [ ]:
print("Accuracy of the network on the 10000 train images: %d %%" %(100 * correct/total))
In [ ]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
In [ ]:
print("Accuracy of the network on the 10000 test images: %d %%" %(100 * correct/total))

네트워크의 파라미터 개수를 줄이기 위해 conv layer의 커널사이즈를 5에서 3으로 줄였고 dense layer를 3층에서 1층으로 줄였다. 또한 local optima 같은 곳에서 잘 빠져나오기 위해 Adam을 적용했고, data augmentation을 통한 데이터 증가 효과를 얻었고 , L2 regularlization으로 가중치를 골고루 쓰도록했다. MSE가 아닌 cross entropy가 아닌 log likelihood를 쓰도록 했다. 또, batch normalization을 적용하여 dying relu나 vanishing gradient 문제를 예방하려 한다.

여전히 epoch가 작아서 훈련 집합과 테스트 집합 전부에서 성능이 좋지 못하다.

In [ ]:
class_correct = [0. for i in range(10)]
class_total = [0. for i in range(10)]
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = net(images)
        _, predicted= torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
            
for i in range(10):
    print("Accuracy of %5s: %2d %%" % (
    classes[i], 100 * class_correct[i] / class_total[i]
    ))

Problem 6

In [ ]:
torch.Tensor([[.4, 2., .001, .32]]).softmax(dim=1)
In [ ]:
def softmax(x):
    y = x.exp()
    y_sum = y.sum()
    y = y / y_sum
    return y
In [ ]:
softmax(torch.Tensor([[.4, 2., .001, .32]]))

softmax의 수식을 함수로 구현하여 계산했다. Tensor의 method를 사용해서 계산한 결과와 직접 구현한 함수를 사용한 결과가 별 차이 없는 것을 확인할 수 있다.

Problem 7

In [ ]:
y_pred = torch.Tensor([.001, .9, .001, .098])
y = torch.Tensor([0, 0, 0, 1])
In [ ]:
def mean_squared_error(y_pred, y):
    loss = ((y_pred - y) ** 2).mean()
    return loss

def cross_entropy_error(y_pred, y):
    return -(y * y_pred.log2()).sum()

def log_likelihood(y_pred, y):
    return -y_pred[y.argmax()].log2()
In [ ]:
print(mean_squared_error(y_pred, y).item())
print(cross_entropy_error(y_pred, y).item())
print(F.cross_entropy(y_pred.unsqueeze(0), y.argmax().unsqueeze(0), reduction="")) 
print(log_likelihood(y_pred, y).item())