本章节学习神经网络中的正则化

import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import scipy.io%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
def sigmoid(x):s = 1/(1+np.exp(-x))return sdef relu(x):s = np.maximum(0,x)return s#he 随机初始化
def initialize_parameters_he(layers_dims):np.random.seed(3)parameters = {}L = len(layers_dims)for l in range(1, L):parameters['W' + str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt(2./layers_dims[l-1])parameters['b' + str(l)] = np.zeros((layers_dims[l],1))return parametersdef compute_cost(a3, Y):m = Y.shape[1]logprobs = np.multiply(-np.log(a3),Y) + np.multiply(-np.log(1 - a3), 1 - Y)cost = 1./m * np.nansum(logprobs)return costdef load_2D_dataset():data = scipy.io.loadmat('datasets/data.mat')train_X = data['X'].Ttrain_Y = data['y'].Ttest_X = data['Xval'].Ttest_Y = data['yval'].Tplt.scatter(train_X[0, :], train_X[1, :], c=train_Y[0], s=40, cmap=plt.cm.Spectral);return train_X, train_Y, test_X, test_Y
#三层神经网络
def forward_propagation(X, parameters):W1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]W3 = parameters["W3"]b3 = parameters["b3"]# LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOIDZ1 = np.dot(W1, X) + b1A1 = relu(Z1)Z2 = np.dot(W2, A1) + b2A2 = relu(Z2)Z3 = np.dot(W3, A2) + b3A3 = sigmoid(Z3)cache = (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3)return A3, cache
#反向传播
def backward_propagation(X, Y, cache):m = X.shape[1](Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T)db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T)db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T)db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2, "dZ2": dZ2, "dW2": dW2, "db2": db2,"dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}return gradientsdef update_parameters(parameters, grads, learning_rate):n = len(parameters) // 2 for k in range(n):parameters["W" + str(k+1)] = parameters["W" + str(k+1)] - learning_rate * grads["dW" + str(k+1)]parameters["b" + str(k+1)] = parameters["b" + str(k+1)] - learning_rate * grads["db" + str(k+1)]return parametersdef predict(X, y, parameters):m = X.shape[1]p = np.zeros((1,m), dtype = np.int)a3, caches = forward_propagation(X, parameters)for i in range(0, a3.shape[1]):if a3[0,i] > 0.5:p[0,i] = 1else:p[0,i] = 0print("Accuracy: "  + str(np.mean((p[0,:] == y[0,:]))))return pdef predict_dec(parameters, X):a3, cache = forward_propagation(X, parameters)predictions = (a3>0.5)return predictionsdef load_planar_dataset(randomness, seed):np.random.seed(seed)m = 50N = int(m/2)D = 2 X = np.zeros((m,D))Y = np.zeros((m,1), dtype='uint8') a = 2 for j in range(2):ix = range(N*j,N*(j+1))if j == 0:t = np.linspace(j, 4*3.1415*(j+1),N) r = 0.3*np.square(t) + np.random.randn(N)*randomness if j == 1:t = np.linspace(j, 2*3.1415*(j+1),N) r = 0.2*np.square(t) + np.random.randn(N)*randomness X[ix] = np.c_[r*np.cos(t), r*np.sin(t)]Y[ix] = jX = X.TY = Y.Treturn X, Y

问题陈述:您刚刚被法国足球公司聘为AI专家。他们希望你推荐法国队的守门员应该踢球的位置,以便法国队的球员可以用他们的头球击球。

他们为您提供了法国过去10场比赛的以下2D数据集。

train_X, train_Y, test_X, test_Y = load_2D_dataset()

每个点对应足球场上的一个位置,在法国守门员从足球场地左侧射门后,足球运动员用头部击球。
-如果圆点是蓝色的,表示法国球员成功地用头部击球
-如果圆点是红色的,表示对方球员用头部击球

你的目标:使用一个深度学习模型来找到场上守门员应该踢球的位置。

数据集分析:这个数据集有点小噪音,但是它看起来像一条对角线,将左上半部(蓝色)和右下半部(红色)分隔开。

您将首先尝试一个非正则化模型。然后您将学习如何将其规范化,并决定您将选择哪种模型来解决法国足球队的问题。

1 -非正则化模型

您将使用下面的神经网络(已经在下面实现)。该模型可用于:
-在正则化模式下-通过将lambd输入设置为非零值。我们使用“lambd”而不是“lambda”,因为“lambda”是Python中的一个保留关键字。
-在退出模式-通过设置keep_prob的值小于1

首先,您将尝试不进行任何正则化的模型。然后,您将实现:

  • L2正则化-函数:“compute_cost_with_regulalization()”和“backward_propagation _with_regulalization()”
    函数:" forward_propagation_with_dropout() “和” backward_propagation_with_dropout() "

在每个部分中,您将使用正确的输入运行这个模型,以便它调用您已经实现的函数。查看下面的代码,熟悉模型。

def model(X, Y, learning_rate = 0.3, num_iterations = 30000, print_cost = True, lambd = 0, keep_prob = 1):grads = {}costs = []                           m = X.shape[1]                       layers_dims = [X.shape[0], 20, 3, 1]#初始化参数parameters = initialize_parameters_he(layers_dims)for i in range(0, num_iterations):# 前向传播: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.if keep_prob == 1:a3, cache = forward_propagation(X, parameters)elif keep_prob < 1:a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)# 成本函数if lambd == 0:cost = compute_cost(a3, Y)else:cost = compute_cost_with_regularization(a3, Y, parameters, lambd)assert(lambd==0 or keep_prob==1)   # 反向传播.                               if lambd == 0 and keep_prob == 1:grads = backward_propagation(X, Y, cache)elif lambd != 0:grads = backward_propagation_with_regularization(X, Y, cache, lambd)elif keep_prob < 1:grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)# 更新参数parameters = update_parameters(parameters, grads, learning_rate)# 打印if print_cost and i % 10000 == 0:print("Cost after iteration {}: {}".format(i, cost))if print_cost and i % 1000 == 0:costs.append(cost)# 绘制成本曲线plt.plot(costs)plt.ylabel('cost')plt.xlabel('iterations (x1,000)')plt.title("Learning rate =" + str(learning_rate))plt.show()return parameters
parameters = model(train_X, train_Y)
print ("On the training set:")
predictions_train = predict(train_X, train_Y, parameters)
print ("On the test set:")
predictions_test = predict(test_X, test_Y, parameters)

def plot_decision_boundary(model, X, y):x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1h = 0.01xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))Z = model(np.c_[xx.ravel(), yy.ravel()])Z = Z.reshape(xx.shape)plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)plt.ylabel('x2')plt.xlabel('x1')plt.scatter(X[0, :], X[1, :], c=y[0], cmap=plt.cm.Spectral)plt.show()
#绘制决策边界
plt.title("Model without regularization")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)


非正则化模型显然过渡拟合了训练集,现在让我们看看两种减少过渡拟合的技术。

2 - L2正则化

def compute_cost_with_regularization(A3, Y, parameters, lambd):m = Y.shape[1]W1 = parameters["W1"]W2 = parameters["W2"]W3 = parameters["W3"]cross_entropy_cost = compute_cost(A3, Y) #L2范数正则化也被称为 权重衰减 L2_regularization_cost = (1./m*lambd/2)*(np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3)))cost = cross_entropy_cost + L2_regularization_costreturn cost
#加了正则化的反向传播
def backward_propagation_with_regularization(X, Y, cache, lambd):m = X.shape[1](Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T) + lambd/m * W3db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T) + lambd/m * W2db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T) + lambd/m * W1db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients
parameters = model(train_X, train_Y, lambd = 0.7)
print ("On the train set:")
predictions_train = predict(train_X, train_Y, parameters)
print ("On the test set:")
predictions_test = predict(test_X, test_Y, parameters)

plt.title("Model with L2-regularization")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)


lambd 是一个l2的超参数,他使你的决策边界很平滑,如果lambd值过大 也可能过渡平滑
L2 正则化对一下因素的影响:

  • 成本计算:将正则化项添加到了成本中
  • 反向传播函数: 在权重矩阵的梯度中有额外的项 ,权重最终变小,

3 - dropout 正则化(Dropout Regularization)

它在每次迭代中随机关闭一些神经元

def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):np.random.seed(1)# 参数W1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]W3 = parameters["W3"]b3 = parameters["b3"]# LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOIDZ1 = np.dot(W1, X) + b1A1 = relu(Z1)#初始化随机矩阵 矩阵的维度和节点的维度相同   D1 = np.random.rand(A1.shape[0],A1.shape[1]) #转换成0或1(布尔类型)D1 = D1 < keep_prob  #关闭一些非true的节点A1 = A1 * D1 #测量未关闭节点的价值A1 = A1 / keep_prob                                     #第二层Z2 = np.dot(W2, A1) + b2A2 = relu(Z2)#同样四步D2 = np.random.rand(A2.shape[0],A2.shape[1])                                         D2 = D2 < keep_prob                                         A2 = A2 * D2                                         A2 = A2 / keep_prob  #第三层输出Z3 = np.dot(W3, A2) + b3A3 = sigmoid(Z3)cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)return A3, cache
#带dropout正则的反向传播
def backward_propagation_with_dropout(X, Y, cache, keep_prob):m = X.shape[1](Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache#正常求导dZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T)db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dA2 = dA2 * D2              dA2 = dA2 / keep_prob          dZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T)db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dA1 = dA1 * D1             dA1 = dA1 / keep_prob             dZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T)db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients
parameters = model(train_X, train_Y, keep_prob = 0.86, learning_rate = 0.3)print ("On the train set:")
predictions_train = predict(train_X, train_Y, parameters)
print ("On the test set:")
predictions_test = predict(test_X, test_Y, parameters)


plt.title("Model with dropout")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)


dropout 效果很好的提升了我们的精度,并且不会再度过渡拟合
关于dropout 应该注意:

  • 只在训练的时候使用dropout,在测试的时候不要使用
  • 在旋律期间keep_prob划分每个丢失层,以保持激活的预期值相同,例如,如果keep_prob为0.5,那么我们平均会关闭一半节点,因此输出将缩放0.5,因为只有剩下的一半对解决方案有贡献。除以0.5相当于乘以2,因此输出现在具有相同的期望值。即使keep_prob的值不是0.5,您也可以检查这是否有效

吴恩达深度学习编程作业 part 2-2相关推荐

  1. 吴恩达深度学习编程作业汇总

    以下列表为吴恩达的深度学习课程所对应的编程作业列表,都直接指向了github的连接地址:这些作业也是我在网上购买,可能与官方的内容有所出入:同时由于有的训练集和测试集以及预训练好的参数过大,不便上传, ...

  2. 吴恩达 深度学习 编程作业(2-2)- Optimization Methods

    吴恩达Coursera课程 DeepLearning.ai 编程作业系列,本文为<改善深层神经网络:超参数调试.正则化以及优化 >部分的第二周"优化算法"的课程作业,同 ...

  3. 【吴恩达深度学习编程作业】4.4特殊应用——人脸识别和神经风格转换(问题未解决)

    参考文章:1.人脸识别与神经风格转换 2.神经风格转换编程作业 神经网络风格中遇到的问题已经解决了并将解决方案写在了备注里面,但是人脸识别那里运行到database就出错了,目前仍没有找到解决方案.我 ...

  4. pytorch l2正则化_吴恩达深度学习 编程作业六 正则化(2)

    推荐守门员应该将球踢到哪个位置,才能让自己的队员用头击中. 1.无正则化模型 判别是否有正则化与调用其他计算函数. 准确率:0.948/0.915 明显过拟合overfiting了. 2.L2正则化 ...

  5. 吴恩达 深度学习 编程作业(2-3)- TensorFlow Tutorial

    TensorFlow Tutorial Welcome to this week's programming assignment. Until now, you've always used num ...

  6. 吴恩达 深度学习 编程作业(2-1.1) Initialization

    Initialization Welcome to the first assignment of "Improving Deep Neural Networks". Traini ...

  7. 吴恩达 深度学习 编程作业(1-2.1)- Python Basics with Numpy

    Python Basics with Numpy (optional assignment) Welcome to your first assignment. This exercise gives ...

  8. 吴恩达 深度学习 编程作业(1-3)- Planar data classification with one hidden layer(平面花形状)

    Python Basics with Numpy (optional assignment) Welcome to your first assignment. This exercise gives ...

  9. 吴恩达深度学习编程作业:TensorFlow

最新文章

  1. php 伪静态 page-18.html,PHP 伪静态实现技术原理讲解
  2. (转载)BPM流程管理的将才是你吗
  3. es2017 提供的针对字符串填充的函数:padStart、padEnd
  4. Linux修改hostname(临时或者永久)
  5. linux下weblogic版本,Linux下weblogic10.3.6(jar)版本安装详解
  6. 一款黑科技让普通屏幕秒变触摸屏,厉害了我的哥
  7. 「BZOJ2879」[Noi2012]美食节
  8. SAP License:糟糕的用户比任何系统问题都要危险
  9. mysql安装运行(centos)
  10. linux的sudo apt-get install 和dpkg -i package.deb命令
  11. 操作指南|JumpServer用户权限体系的使用实践
  12. 改变自己,永不会晚!
  13. Backordered even the inventory is sufficient在库存量满足的情况下PICK却BACKORDER
  14. r 语言计算欧氏距离_R语言-KNN算法
  15. 宝塔+云锁nginx自编译web防护 防御CC效果极佳
  16. 软件工作量评估方法(一)
  17. java盘盈盘亏_反映财产物资的盘盈、盘亏和毁损情况,应当设( )科目。
  18. 2020年“有史以来”全网最全1309道BAT大厂java面试题,mongodb原理知识
  19. 深度学习平台demo(一)- C#如何调用python文件
  20. 5分钟看懂│从深蓝到阿尔法狗,人机大战20年进化了什么?

热门文章

  1. Mac OS 10.12 - 如何能够像在Windows一样切换中英文输入法和大小写键?
  2. c语言实验教学软件,C语言实验教学法综述
  3. python画图双纵轴多张图折线柱状图
  4. 试题 算法训练 24点 蓝桥杯 Java
  5. JSP/Servlet构建三层管理信息系统
  6. 数据治理系列(三):主数据管理
  7. keil stm32标准库放在哪里_使用Keil MDK以及标准外设库创建STM32工程
  8. 电脑监控是真的吗?4个实验一探究竟
  9. 1. 批处理常用符号详解:
  10. 北京大学肖臻老师《区块链技术与应用》ETH笔记 - 5.0 ETH中GHOST协议篇