Learn practical skills, build real-world projects, and advance your career
Created 5 years ago
import numpy as np
import h5py
import matplotlib.pyplot as plt
import math
def load_data():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def sigmoid(Z):
A = 1/ ( 1 + np.exp(-Z))
cache = Z
return A, cache
def relu(Z):
A = np.maximum(0,Z)
assert(A.shape == Z.shape)
cache = Z
return A, cache
def initialize_parameters_deep(layer_dims):
np.random.seed(3)
parameters = {}
L = len(layer_dims)
for l in range(1,L):
parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.001
parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
return parameters
def initialize_parameters_deep_w(layer_dims):
np.random.seed(3)
parameters = {}
L = len(layer_dims)
for l in range(1,L-1):
parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 00.1
parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
parameters['W' + str(L-1)] = np.random.randn(layer_dims[L-1], layer_dims[L-2]) * np.sqrt(2/layer_dims[L-2])
parameters['b' + str(L-1)] = np.zeros((layer_dims[L-1],1))
return parameters
def linear_forward(A, W, b):
Z = np.dot(W, A) + b
cache = [A, W, b]
return Z, cache
def linear_activation_forward(A_prev, W, b, activation):
linear_cache = tuple()
activation_cache = tuple()
A = A_prev.shape
if activation == 'sigmoid':
Z, linear_cache = linear_forward(A_prev, W, b) # Z = A_prev.W + b, activation_cache = A_prev, W, b
A, activation_cache = sigmoid(Z) # A = sigmoid(Z) , activation_cache = Z
elif activation == 'relu':
Z, linear_cache = linear_forward(A_prev, W, b) # Z = A_prev.W + b, activation_cacahe = A_pre, W, b
A, activation_cache = relu(Z)
cache = [linear_cache, activation_cache]
return A, cache
def L_model_forward(X, para):
Caches = []
A = X
L = len(para) // 2
for l in range(1,L):
A_prev = A
A, cache = linear_activation_forward(A_prev, para['W'+ str(l)], para['b'+ str(l)], activation = 'relu')
Caches.append(cache)
AL, cache = linear_activation_forward(A, para['W' + str(L)], para['b' + str(L)], activation = 'sigmoid')
Caches.append(cache)
return AL, Caches
def L_model_forward_drop(X, para, dp):
Caches = []
A = X
L = len(para) // 2
for l in range(1,L):
A_prev = A
A, cache = linear_activation_forward(A_prev, para['W'+ str(l)], para['b'+ str(l)], activation = 'relu')
d = np.random.rand(A.shape[0], A.shape[1])
d = d < dp[l-1]
A = np.multiply(A, d)
A = A / dp[l-1]
Caches.append(cache)
AL, cache = linear_activation_forward(A, para['W' + str(L)], para['b' + str(L)], activation = 'sigmoid')
d = np.random.rand(AL.shape[0], AL.shape[1])
d = d < dp[L-1]
A = np.multiply(A, d)
A = A / dp[L-1]
Caches.append(cache)
return AL, Caches
def compute_cost(AL, Y):
m = Y.shape[1]
cost = (-1/m) * np.sum( np.multiply(Y, np.log(AL)) + np.multiply((1-Y), np.log(1-AL)))
cost = np.squeeze(cost)
return cost
def L2_reg_cost(para, Y, lam):
m = Y.shape[1]
L = len(para) // 2
L_cost = np.zeros((1,L))
for l in range(1,L+1):
tw = para['W' + str(l)]
L_cost[0][l-1] = np.sum(np.square([tw]))
COst = np.sum(L_cost)
COst = (lam/ (2*m))
return COst
def relu_backward(dA, cache):
Z = cache
dZ = np.array(dA, copy=True) # just converting dz to a correct object.
# When z <= 0, you should set dz to 0 as well.
dZ[Z <= 0] = 0
assert (dZ.shape == Z.shape)
return dZ
def sigmoid_backward(dA, cache):
Z = cache
s = 1/(1+np.exp(-Z))
dZ = dA * s * (1-s)
assert (dZ.shape == Z.shape)
return dZ
def linear_backward(dZ, a_prev_wb):
A_prev, W, b = a_prev_wb
m = A_prev.shape[1]
dW = (1/m) * np.dot(dZ, A_prev.T)
db = (1/m) * np.sum(dZ, axis =1 , keepdims = True)
dA_prev = np.dot(W.T, dZ)
assert (dA_prev.shape == A_prev.shape)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
return dA_prev, dW, db
def linear_backward_reg(dZ, a_prev_wb, lam):
A_prev, W, b = a_prev_wb
m = A_prev.shape[1]
dW = (1/m) * np.dot(dZ, A_prev.T) + (lam/m) * W
db = (1/m) * np.sum(dZ, axis =1 , keepdims = True)
dA_prev = np.dot(W.T, dZ)
assert (dA_prev.shape == A_prev.shape)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation):
linear_cache, Z = cache # linear_cache = a,w,b
if activation == 'relu':
dZ = relu_backward(dA, Z)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation == 'sigmoid':
dZ = sigmoid_backward(dA, Z)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db
def linear_activation_backward_reg(dA, cache,lam, activation):
linear_cache, Z = cache # linear_cache = a,w,b
if activation == 'relu':
dZ = relu_backward(dA, Z)
dA_prev, dW, db = linear_backward_reg(dZ, linear_cache,lam)
elif activation == 'sigmoid':
dZ = sigmoid_backward(dA, Z)
dA_prev, dW, db = linear_backward_reg(dZ, linear_cache,lam)
return dA_prev, dW, db
def L_model_backward(AL, Y, awb_z):
grads = {}
L = len(awb_z) # the number of layers
m = AL.shape[1]
Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
# Initializing the backpropagation
### START CODE HERE ### (1 line of code)
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
### END CODE HERE ###
grads["dA"+ str(L-1)], grads["dW"+str(L)], grads["db"+ str(L)] = linear_activation_backward(dAL, awb_z[L-1], activation='sigmoid')
for l in range(L-1,0,-1):
grads["dA"+ str(l-1)], grads["dW"+str(l)], grads["db"+ str(l)] = linear_activation_backward(grads["dA"+str(l)], awb_z[l-1], activation='relu')
return grads
def L_model_backward_reg(AL, Y, awb_z, lam):
grads = {}
L = len(awb_z) # the number of layers
m = AL.shape[1]
Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
# Initializing the backpropagation
### START CODE HERE ### (1 line of code)
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
### END CODE HERE ###
grads["dA"+ str(L-1)], grads["dW"+str(L)], grads["db"+ str(L)] = linear_activation_backward_reg(dAL, awb_z[L-1],lam, activation='sigmoid')
for l in range(L-1,0,-1):
grads["dA"+ str(l-1)], grads["dW"+str(l)], grads["db"+ str(l)] = linear_activation_backward_reg(grads["dA"+str(l)], awb_z[l-1],lam, activation='relu')
return grads
def update_parameters(parameters, grads, learning_rate):
L = len(parameters) // 2
nw_para = parameters.copy()
for l in range(L):
nw_para["W"+ str(l+1)] = parameters["W"+ str(l+1)] - learning_rate*(grads["dW"+ str(l+1)])
nw_para["b"+ str(l+1)] = parameters["b"+ str(l+1)] - learning_rate*(grads["db"+ str(l+1)])
return nw_para
#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
def update_parameters_with_gd(parameters, grads, learning_rate):
L = len(parameters) // 2
nw_para = parameters.copy()
for l in range(L):
nw_para["W"+ str(l+1)] = parameters["W"+ str(l+1)] - learning_rate*(grads["dW"+ str(l+1)])
nw_para["b"+ str(l+1)] = parameters["b"+ str(l+1)] - learning_rate*(grads["db"+ str(l+1)])
return nw_para
def random_mini_batches(X, Y, mini_batch_size, seed = 0):
np.random.seed(seed)
m = X.shape[1]
mini_batches = []
#shuffle data
permutation = list(np.random.permutation(m))
shu_x = X[:, permutation]
shu_y = Y[:, permutation].reshape((1,m))
num_compl = math.floor(m/mini_batch_size)
for k in range(0,num_compl):
mini_batch_X = shu_x[: , k * mini_batch_size : (k+1)* mini_batch_size]
mini_batch_Y = shu_y[: , k * mini_batch_size : (k+1)* mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
if m % mini_batch_size != 0:
mini_batch_X = shu_x[: , num_compl * mini_batch_size:]
mini_batch_Y = shu_y[: , num_compl * mini_batch_size:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
def initialize_velocity_mom(parameters):
L = len(parameters) //2
v = {}
for l in range(L):
v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
return v
def initialize_S_RMS_PROP(parameters):
L = len(parameters) //2
S = {}
for l in range(L):
S["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
S["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
return S
def Update_para_with_momentum(parameters, grads, v, beta1, learning_rate):
beta = beta1
L = len(parameters) // 2
for l in range(L):
v["dW" + str(l+1)] = beta * v["dW" + str(l+1)] + (1-beta) * grads["dW"+ str(l+1)]
v["db" + str(l+1)] = beta * v["db" + str(l+1)] + (1-beta) * grads["db"+ str(l+1)]
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v["dW" +str(l+1)]
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v["db" +str(l+1)]
return parameters, v
def Update_para_with_RMS(parameters, grads, S, beta2, learning_rate, epsilon):
L = len(parameters) // 2
s_c ={}
for l in range(L):
S["dW" + str(l+1)] = (beta2 * S["dW" + str(l+1)]) + ((1 - beta2) * (grads["dW" + str(l+1)] * grads["dW" + str(l+1)]))
S["db" + str(l+1)] = (beta2 * S["db" + str(l+1)]) + ((1 - beta2) * (grads["db" + str(l+1)] * grads["db" + str(l+1)]))
s_c["dW" + str(l+1)] = S["dW" + str(l+1)] / (1 - beta2)
s_c["db" + str(l+1)] = S["db" + str(l+1)] / (1 - beta2)
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - ((learning_rate * grads["dW" + str(l+1)]) / (np.sqrt(s_c["dW" + str(l+1)] + epsilon)))
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - ((learning_rate * grads["db" + str(l+1)]) / (np.sqrt(s_c["db" + str(l+1)] + epsilon)))
return parameters, S
def initialize_adam(parameters):
L = len(parameters) //2
v = {}
s = {}
for l in range(L):
v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
s["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
s["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
return v,s
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1 = 0.9,
beta2 = 0.999, epsilon = 1e-8):
L = len(parameters) // 2
v_corrected = {}
s_corrected = {}
for l in range(L):
v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)]
v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)]
v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (np.power((1 - beta1), l))
v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (np.power((1 - beta1), l))
s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * (grads["dW" + str(l+1)] * grads["dW" + str(l+1)])
s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1 - beta2) * (grads["db" + str(l+1)] * grads["db" + str(l+1)])
s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)] / (np.power((1 - beta2), l))
s_corrected["db" + str(l+1)] = s["db" + str(l+1)] / (np.power((1 - beta2), l))
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - (learning_rate * v_corrected["dW" + str(l+1)]) / (np.sqrt(s_corrected["dW" + str(l+1)] + epsilon))
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - (learning_rate * v_corrected["db" + str(l+1)]) / (np.sqrt(s_corrected["db" + str(l+1)] + epsilon))
return parameters, v, s
#########################################################################
import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
#from dnn_app_utils_v3 import *
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
np.random.seed(1)
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]
print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
train_x = train_x_flatten /255
test_x = test_x_flatten /255
Number of training examples: 209
Number of testing examples: 50
Each image is of size: (64, 64, 3)
train_x_orig shape: (209, 64, 64, 3)
train_y shape: (1, 209)
test_x_orig shape: (50, 64, 64, 3)
test_y shape: (1, 50)