吴恩达深度学习1.3练习_Neural Networks and Deep Learning
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_42432468
学习心得:
1、每周的视频课程看一到两遍
2、做笔记
3、做每周的作业练习,这个里面的含金量非常高。掌握后一定要自己敲一遍,这样以后用起来才能得心应手。
# Package imports
import numpy as np
import matplotlib.pyplot as plt
from testCases import *
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets%matplotlib inlinenp.random.seed(1) # set a seed so that the results are consistent
X, Y = load_planar_dataset()
#数据集类型,形状,实例
print (type(X),X.shape,X[:,0])
print (type(Y),Y.shape,Y[:,:3])
# Visualize the data:
# plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral); #原来的会报错维度不匹配
plt.scatter(X[0, :], X[1, :],c=np.squeeze(Y[0,:]),s=40, cmap=plt.cm.Spectral);
#根据下面图形,可以看出数据都在(-4,4)之间,可以考虑不进行数据处理plt.figure()
plt.scatter(X[0, :200], X[1, :200],c='red',s=40, cmap=plt.cm.Spectral);plt.figure()
plt.scatter(X[0, 200:400], X[1, 200:400],c='blue',s=40, cmap=plt.cm.Spectral);'''
why the data looks like a "flower"
'''
正向传播:
(1)z[1]=W[1]X+b[1]z^{[1]} = W^{[1]}X + b^{[1]}\tag{1}z[1]=W[1]X+b[1](1)
(2)a[1]=g[1](z[1])a^{[1]} = g^{[1]}(z^{[1]})\tag{2}a[1]=g[1](z[1])(2)
(3)z[2]=W[2]a[1]+b[2]z^{[2]} = W^{[2]}a^{[1]} + b^{[2]}\tag{3}z[2]=W[2]a[1]+b[2](3)
(4)a[2]=σ(z[2])a^{[2]} = \sigma (z^{[2]})\tag{4}a[2]=σ(z[2])(4)
(5)J=−1m∑i=1m(y(i)log(a[2](i))+(1−y(i))log(1−a[2](i)))J =-\frac{1}{m} \sum_{i=1}^m (y^{(i)} \log(a^{[2](i)}) + (1-y^{(i)} ) \log(1-a^{[2](i)}))\tag{5}J=−m1i=1∑m(y(i)log(a[2](i))+(1−y(i))log(1−a[2](i)))(5)
反向传播:
(1)da[2]=∂J∂a[2]=1m∑i=1m[−ya[2]+1−y1−a[2]]da^{[2]} = \frac{\partial J}{\partial a^{[2]}} = \frac{1}{m} \sum_{i=1}^m[-\frac {y}{a^{[2]}} + \frac {1-y}{1-a^{[2]}}]\tag{1}da[2]=∂a[2]∂J=m1i=1∑m[−a[2]y+1−a[2]1−y](1)
(2)dz[2]=∂J∂a[2]×∂a[2]∂z[2]=1m∑i=1m[a[2]−y]\mathrm{d}z^{[2]} = \frac {\partial{J}}{\partial{a^{[2]}}} \times \frac {\partial{a^{[2]}}}{\partial{z^{[2]}}} = \frac{1}{m} \sum_{i=1}^m[a^{[2]}-y]\tag{2}dz[2]=∂a[2]∂J×∂z[2]∂a[2]=m1i=1∑m[a[2]−y](2)
(3)dw[2]=dz[2]×∂z[2]∂w[2]=dz[2]×a[1]T\mathrm{d}w^{[2]} = \mathrm{d}z^{[2]} \times \frac {\partial{z^{[2]}}}{\partial{w^{[2]}}} = \mathrm{d}z^{[2]} \times a^{[1]T}\tag{3}dw[2]=dz[2]×∂w[2]∂z[2]=dz[2]×a[1]T(3)
(4)db[2]=dz[2]×∂z[2]∂b[2]=dz[2]\mathrm{d}b^{[2]} = \mathrm{d}z^{[2]} \times \frac {\partial{z^{[2]}}}{\partial{b^{[2]}}} = \mathrm{d}z^{[2]}\tag{4}db[2]=dz[2]×∂b[2]∂z[2]=dz[2](4)
(5)da[1]=dz[2]×∂z[2]∂a[1]=w[2]T⋅dz[2]\mathrm{d}a^{[1]} = \mathrm{d}z^{[2]} \times \frac {\partial{z^{[2]}}}{\partial{a^{[1]}}} = w^{[2]T} \cdot \mathrm{d}z^{[2]} \tag{5}da[1]=dz[2]×∂a[1]∂z[2]=w[2]T⋅dz[2](5)
(6)dz[1]=da[1]×g[1]′(z[1])=w[2]T⋅dz[2]⋅g[1]′(z[1])\mathrm{d}z^{[1]} = \mathrm{d}a^{[1]} \times g^{[1]}\prime(z^{[1]}) = w^{[2]T} \cdot \mathrm{d}z^{[2]} \cdot g^{[1]}\prime(z^{[1]}) \tag{6}dz[1]=da[1]×g[1]′(z[1])=w[2]T⋅dz[2]⋅g[1]′(z[1])(6)
(7)dw[1]=dz[1]⋅XT\mathrm{d}w^{[1]} = \mathrm{d}z^{[1]} \cdot X^{T}\tag{7}dw[1]=dz[1]⋅XT(7)
(8)db[1]=dz[1]\mathrm{d}b^{[1]} = \mathrm{d}z^{[1]}\tag{8}db[1]=dz[1](8)
#初始化正向传播过程中的4个参数
def initialize_pars(num_in,num_h,num_out):np.random.seed(2)w1 = np.random.randn(num_h,num_in)*0.01#乘0.01是为了不让下降速度太慢b1 = np.zeros((num_h,1))w2 = np.random.randn(num_out,num_h)b2 = np.zeros((num_out,1))assert(w1.shape == (num_h,num_in))assert(b1.shape == (num_h,1))assert(w2.shape == (num_out,num_h))assert(b2.shape == (num_out,1))pars = {'w1':w1,'b1':b1,'w2':w2,'b2':b2}return pars
# Test initialize_pars function
pars_test = initialize_pars(2,4,1)
# print (pars_test['w1'])
print (pars_test['w1'],'\n',pars_test['b1'],'\n',pars_test['w2'],'\n',pars_test['b2'])
def sigmoid(x):'''arguments:x --> 自变量returns:s --> sigmoid(x)'''s = 1./(1+1/np.exp(x))return s
#正向传播
def forward_propagate(X,Y,pars):z1 = np.dot(pars['w1'],X) + pars['b1']# a1 = sigmoid(z1)a1 = np.tanh(z1)z2 = np.dot(pars['w2'],a1) + pars['b2']a2 = sigmoid(z2)# a2 = (np.array([[ 0.5002307 , 0.49985831, 0.50023963]])) #仅做检测J_cost用J_cost = -np.mean(Y*np.log(a2)+(1-Y)*np.log(1-a2))
# m = Y.shape[1]
# J_cost = -np.sum(Y*np.log(a2)+(1-Y)*np.log(1-a2))/mcache = {'z1':z1,'a1':a1,'z2':z2,'a2':a2}#反向传播中需要使用cache相关值return J_cost,cache
# Test forward_propagate function
x_test = np.array([1,2,5,6]).reshape(2,2)
y_test = np.array([[0,1]]).reshape(1,2)
J_test,cache_test = forward_propagate(x_test,y_test,pars_test)
print (J_test,'\n',cache_test)
# # 检查为什么与课件结果不一样# X_assess, Y_assess = layer_sizes_test_case()# np.random.seed(1)
# X_assess = np.random.randn(2, 3)# parameters = {'w1': np.array([[-0.00416758, -0.00056267],
# [-0.02136196, 0.01640271],
# [-0.01793436, -0.00841747],
# [ 0.00502881, -0.01245288]]),
# 'w2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
# 'b1': np.array([[ 0.],
# [ 0.],
# [ 0.],
# [ 0.]]),
# 'b2': np.array([[ 0.]])}
# J_test,cache_test = forward_propagate(X_assess,Y_assess,parameters)
# print(np.mean(cache_test['z1']) ,np.mean(cache_test['a1']),np.mean(cache_test['z2']),np.mean(cache_test['a2']))# #此处不一致是由于测试数据不一致导致的# np.random.seed(1)
# Y_assess = np.random.randn(1, 3)
# J_test,cache_test = forward_propagate(X_assess,Y_assess,parameters)
# print('cost = '+str(J_test))
def g1_prime(x):g1_prime = 1-np.power(x,2)return g1_prime
import sys
print (sys.maxsize)print (np.arange(10)**10) #default value is int32
print (np.arange(10, dtype=np.int8)**10)
print (np.arange(10, dtype=np.int16)**10)
print (np.arange(10, dtype=np.int32)**10)
print (np.arange(10, dtype=np.int64)**10)
print (np.arange(10, dtype=object)**20) #work on arbitrary-precision objects
#test g1_prime function
np.random.seed(1)
xx = np.random.randn(2,2)
print (xx)
g1_prime_test = g1_prime(xx)
print (g1_prime_test)
#反向传播
def backward_propagate(X,Y,pars,cache):m = Y.shape[1]dz2 = cache['a2']-Y
# print (dz2)dw2 = 1./m*np.dot(dz2,cache['a1'].T)# db2 = np.mean(dz2,axis=1,keepdims=True)db2 = np.sum(dz2, axis=1, keepdims=True)/m #与上面结果一样
# db2 = 1./m*np.sum(dz2)# print (db2)da1 = np.dot(pars['w2'].T,dz2)dz1 = np.multiply(np.dot(pars['w2'].T,dz2),g1_prime(cache['a1']))# dz1 = np.multiply(np.dot(pars['w2'].T,dz2),g1_prime(cache['z1'])) #导数公式搞错
# dw1 = np.dot(dz1,X.T) #没有除以m造成差异dw1 =1./m* np.dot(dz1,X.T)# db1 = np.mean(dz1,axis=1,keepdims=True)db1 = np.sum(dz1, axis=1, keepdims=True)/m
# db1 = 1./m*np.sum(dz1)assert(dw2.shape == pars['w2'].shape)assert(dw1.shape == pars['w1'].shape)grads = {'dw2':dw2,'db2':db2,'dw1':dw1,'db1':db1}return grads
grads_test = backward_propagate(x_test,y_test,pars_test,cache_test)
print (grads_test)
# #此处结果一样,之前结果不一样因为backward function dw1 = np.dot(dz1,X.T) 没有除以m造成差异
# np.random.seed(1)
# X_assess = np.random.randn(2, 3)
# Y_assess = np.random.randn(1, 3)
# parameters = {'w1': np.array([[-0.00416758, -0.00056267],
# [-0.02136196, 0.01640271],
# [-0.01793436, -0.00841747],
# [ 0.00502881, -0.01245288]]),
# 'w2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
# 'b1': np.array([[ 0.],
# [ 0.],
# [ 0.],
# [ 0.]]),
# 'b2': np.array([[ 0.]])}# cache = {'a1': np.array([[-0.00616578, 0.0020626 , 0.00349619],
# [-0.05225116, 0.02725659, -0.02646251],
# [-0.02009721, 0.0036869 , 0.02883756],
# [ 0.02152675, -0.01385234, 0.02599885]]),
# 'a2': np.array([[ 0.5002307 , 0.49985831, 0.50023963]]),
# 'z1': np.array([[-0.00616586, 0.0020626 , 0.0034962 ],
# [-0.05229879, 0.02726335, -0.02646869],
# [-0.02009991, 0.00368692, 0.02884556],
# [ 0.02153007, -0.01385322, 0.02600471]]),
# 'z2': np.array([[ 0.00092281, -0.00056678, 0.00095853]])}
# grads = backward_propagate(X_assess,Y_assess,parameters,cache)
# print ("dW1 = "+ str(grads["dw1"]))
# print ("db1 = "+ str(grads["db1"]))
# print ("dW2 = "+ str(grads["dw2"]))
# print ("db2 = "+ str(grads["db2"]))
def update_pars(pars,grads,learning_rate = 1.2):w2 = pars['w2'] - learning_rate*grads['dw2']b2 = pars['b2'] - learning_rate*grads['db2']w1 = pars['w1'] - learning_rate*grads['dw1']b1 = pars['b1'] - learning_rate*grads['db1']updat_pars = {'w1':w1,'b1':b1,'w2':w2,'b2':b2}return updat_pars
print (pars_test)
update_test = update_pars(pars_test,grads_test,learning_rate = .2)
print (update_test)
# #check update_paramenters function, it is ok
# parameters = {'w1': np.array([[-0.00615039, 0.0169021 ],
# [-0.02311792, 0.03137121],
# [-0.0169217 , -0.01752545],
# [ 0.00935436, -0.05018221]]),
# 'w2': np.array([[-0.0104319 , -0.04019007, 0.01607211, 0.04440255]]),
# 'b1': np.array([[ -8.97523455e-07],
# [ 8.15562092e-06],
# [ 6.04810633e-07],
# [ -2.54560700e-06]]),
# 'b2': np.array([[ 9.14954378e-05]])}# grads = {'dw1': np.array([[ 0.00023322, -0.00205423],
# [ 0.00082222, -0.00700776],
# [-0.00031831, 0.0028636 ],
# [-0.00092857, 0.00809933]]),
# 'dw2': np.array([[ -1.75740039e-05, 3.70231337e-03, -1.25683095e-03,
# -2.55715317e-03]]),
# 'db1': np.array([[ 1.05570087e-07],
# [ -3.81814487e-06],
# [ -1.90155145e-07],
# [ 5.46467802e-07]]),
# 'db2': np.array([[ -1.08923140e-05]])}# parameters = update_pars(parameters, grads)# print("W1 = " + str(parameters["w1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["w2"]))
# print("b2 = " + str(parameters["b2"]))
def nn_model(X,Y,n_h,learning_rate=1.2,num_iterations=10000, print_cost=False):np.random.seed(3)n_x = X.shape[0]n_y = Y.shape[0]# Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".### START CODE HERE ### (≈ 5 lines of code)pars = initialize_pars(n_x, n_h, n_y)costs = []import pdb #有啥作用?import copyfor i in range(num_iterations):
# pre_pars = {}cost,cache = forward_propagate(X,Y,pars)grads = backward_propagate(X,Y,pars,cache)pre_pars = copy.deepcopy(pars)pars = update_pars(pars,grads,learning_rate)if print_cost and i % 1000 == 0:costs.append(cost)print ("Cost after iteration %i: %f" %(i, cost))return pre_pars,costs
dict1 = {'a': 1, 'b':2, 'c':[1, 2]}
import copy
dict2 = copy.deepcopy(dict1)
dict1['a'] = 2
dict1['c'].append(3)
print (dict2)
dict2 = copy.deepcopy(dict1)
print (dict2)
# test nn_modle function
x_test = np.array([1,2,5,6]).reshape(2,2)
y_test = np.array([[0,1]]).reshape(1,2)
pars_test = initialize_pars(2,4,1)
print (pars_test)
par_test,costs_test = nn_model(x_test,y_test,4,learning_rate=0.2,num_iterations=2000, print_cost=True)
print (par_test,'\n',costs_test[-1])
plt.plot(costs_test);
# # check nn_model function ,case the difference is the difference of learning_rate
# np.random.seed(1)
# X_assess = np.random.randn(2, 3)
# Y_assess = np.random.randn(1, 3)
# par_test,costs_test = nn_model(X_assess,Y_assess,4,learning_rate=1.2,num_iterations=10000, print_cost=False)
# print("W1 = " + str(par_test["w1"]))
# print("b1 = " + str(par_test["b1"]))
# print("W2 = " + str(par_test["w2"]))
# print("b2 = " + str(par_test["b2"]))
def predict(pars,X):z1 = np.dot(pars['w1'],X) + pars['b1']a1 = np.tanh(z1)z2 = np.dot(pars['w2'],a1) + pars['b2']a2 = sigmoid(z2)
# y_hat = a2 #可删除predictions = np.array([ 0 if i <=0.5 else 1 for i in np.squeeze(a2) ]) #新加return predictions
np.random.seed(2)
X_predict = np.random.randn(2,3)
print (X_predict)
Y_predict = predict(par_test,X_predict)
print (Y_predict)
# #check predict function ,output --> predictions = np.array([ 0 if i <=0.5 else 1 for i in np.squeeze(a2) ])
# np.random.seed(1)
# X_assess = np.random.randn(2, 3)
# parameters = {'w1': np.array([[-0.00615039, 0.0169021 ],
# [-0.02311792, 0.03137121],
# [-0.0169217 , -0.01752545],
# [ 0.00935436, -0.05018221]]),
# 'w2': np.array([[-0.0104319 , -0.04019007, 0.01607211, 0.04440255]]),
# 'b1': np.array([[ -8.97523455e-07],
# [ 8.15562092e-06],
# [ 6.04810633e-07],
# [ -2.54560700e-06]]),
# 'b2': np.array([[ 9.14954378e-05]])}
# predictions = predict(parameters, X_assess)
# print("predictions mean = " + str(np.mean(predictions)))# #后一步输出的结果一样,为什么这一步结果输出会不一样?
# Build a model with a n_h-dimensional hidden layer
pars,costs = nn_model(X,Y,n_h = 4,learning_rate=1.2,num_iterations=10000, print_cost=True)# Plot the decision boundary
plot_decision_boundary(lambda x: predict(pars, x.T), X, np.squeeze(Y))
plt.title("Decision Boundary for hidden layer size " + str(4))
predictions = predict(pars,X)
print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%')
#and逻辑的代码实现,同正为正,同负为正
# This may take about 2 minutes to runplt.figure(figsize=(16, 32))
hidden_layer_sizes = [1, 2, 3, 4, 5, 20, 50]
for i, n_h in enumerate(hidden_layer_sizes):plt.subplot(5, 2, i+1)plt.title('Hidden Layer of size %d' % n_h)parameters,costs = nn_model(X, Y, n_h, num_iterations = 5000)
# pars,costs = nn_model(X,Y,n_h = 4,learning_rate=1.2,num_iterations=10000, print_cost=True)plot_decision_boundary(lambda x: predict(parameters, x.T), X, np.squeeze(Y))predictions = predict(parameters, X)accuracy = float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)print ("Accuracy for {} hidden units: {} %".format(n_h, accuracy))
# Datasets
noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure = load_extra_datasets()datasets = {"noisy_circles": noisy_circles,"noisy_moons": noisy_moons,"blobs": blobs,"gaussian_quantiles": gaussian_quantiles}### START CODE HERE ### (choose your dataset)
dataset = "noisy_moons"
### END CODE HERE ###X, Y = datasets[dataset]
X, Y = X.T, Y.reshape(1, Y.shape[0])# make blobs binary
if dataset == "blobs":Y = Y%2# Visualize the data
plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral);
吴恩达深度学习1.3练习_Neural Networks and Deep Learning相关推荐
- 吴恩达深度学习1.2练习_Neural Networks and Deep Learning
版权声明:本文为博主原创文章,未经博主允许不得转载. https://blog.csdn.net/weixin_42432468 学习心得: 1.每周的视频课程看一到两遍 2.做笔记 3.做每周的作业 ...
- 吴恩达深度学习1.4练习_Neural Networks and Deep Learning
版权声明:本文为博主原创文章,未经博主允许不得转载. https://blog.csdn.net/weixin_42432468 学习心得: 1.每周的视频课程看一到两遍 2.做笔记 3.做每周的作业 ...
- 吴恩达深度学习1.2笔记_Neural Networks and Deep Learning_神经网络基础
版权声明:本文为博主原创文章,未经博主允许不得转载. https://blog.csdn.net/weixin_42432468 前段时间在网易云课堂零散学习了吴恩达老师的机器学习和深度学习课程,能听 ...
- 吴恩达深度学习1.4笔记_Neural Networks and Deep Learning_深层神经网络
版权声明:本文为博主原创文章,未经博主允许不得转载. https://blog.csdn.net/weixin_42432468 学习心得: 1.每周的视频课程看一到两遍 2.做笔记 3.做每周的作业 ...
- 吴恩达深度学习1.3笔记_Neural Networks and Deep Learning_浅层神经网络
版权声明:本文为博主原创文章,未经博主允许不得转载. https://blog.csdn.net/weixin_42432468 学习心得: 1.每周的视频课程看一到两遍 2.做笔记 3.做每周的作业 ...
- 吴恩达深度学习CNN作业:Convolutional Neural Networks: Application
- 吴恩达深度学习CNN作业:Convolutional Neural Networks: Step by Step
- 吴恩达深度学习神经网络基础编程作业Building your Deep Neural Network Step by Step
- [转载]《吴恩达深度学习核心笔记》发布,黄海广博士整理!
红色石头 深度学习专栏 深度学习入门首推课程就是吴恩达的深度学习专项课程系列的 5 门课.该专项课程最大的特色就是内容全面.通俗易懂并配备了丰富的实战项目.今天,给大家推荐一份关于该专项课程的核心笔记 ...
最新文章
- php 一秒操作一次_php守护进程 加linux命令nohup实现任务每秒执行一次
- The Pediatric Cancer Genome Project 儿童癌症基因组计划
- 正则数字和小数点_Python中的正则表达式【不断补充~】
- Facebook 开启元宇宙模式!豪砸百亿是为什么?
- 面试问题_教资面试,结构化面试问题分享
- python json删除字段_如何使用python删除json对象?
- jq使用教程09_ 教程集合帖-伙伴们贡献,不断更新(4.17)
- Android通过堆栈信息解决ANR问题
- android接推流sdk,Android-SDK推流端说明
- 关于预编译和宏定义部分说明
- wordpress不登陆后台禁用插件
- 从“运维”处谈OFO失败
- au计算机内录音乐,电脑中如何使用 Audition 内录声音
- 【142】阿蛮歌霸使用技巧
- php 养宠物,养宠物必知的十个知识,关爱宠物注重细节
- 对时间序列做分段相关性分析(matlab代码)
- 定制Android开发者专属T恤
- 2021强网杯 ezmath writeup
- SAP PS 第2节 项目状态及字段选择
- cocos2dx+JAVA手游《全民挂机》全套源码支持安卓+IOS双端
热门文章
- HTML Table 冻结行列
- UVA 620 - Cellular Structure
- Windows Server 2003下配置IIS6.0+php5+MySql5+PHPMyAdmin环境
- 【python】日志模块以及日志组件使用
- 反射--获取当前子类父类的泛型类型
- 安装Selenium
- java最基础的小总结
- python输入数据的维度_python – Keras LSTM输入维度设置
- 360浏览器没有声音_你用的浏览器好用吗?该国产电脑浏览器,极速、安全,值得使用...
- keil c语言字符型变量的值,面向51单片机的Keil uVision4的四种基本数据类型