回归模块

回归模块中提供了批量梯度下降和随机梯度下降两种学习策略来训练模型：

# coding: utf-8
# linear_regression/regression.py
import numpy as np
import matplotlib as plt
import timedef exeTime(func):""" 耗时计算装饰器"""def newFunc(*args, **args2):t0 = time.time()back = func(*args, **args2)return back, time.time() - t0return newFuncdef loadDataSet(filename):""" 读取数据从文件中获取数据，在《机器学习实战中》，数据格式如下"feature1 TAB feature2 TAB feature3 TAB label"Args:filename: 文件名Returns:X: 训练样本集矩阵y: 标签集矩阵"""numFeat = len(open(filename).readline().split('\t')) - 1X = []y = []file = open(filename)for line in file.readlines():lineArr = []curLine = line.strip().split('\t')for i in range(numFeat):lineArr.append(float(curLine[i]))X.append(lineArr)y.append(float(curLine[-1]))return np.mat(X), np.mat(y).Tdef h(theta, x):"""预测函数Args:theta: 相关系数矩阵x: 特征向量Returns:预测结果"""return (theta.T*x)[0,0]def J(theta, X, y):"""代价函数Args:theta: 相关系数矩阵X: 样本集矩阵y: 标签集矩阵Returns:预测误差（代价）"""m = len(X)return (X*theta-y).T*(X*theta-y)/(2*m)@exeTime
def bgd(rate, maxLoop, epsilon, X, y):"""批量梯度下降法Args:rate: 学习率maxLoop: 最大迭代次数epsilon: 收敛精度X: 样本矩阵y: 标签矩阵Returns:(theta, errors, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count<=maxLoop:if(converged):breakcount = count + 1for j in range(n):deriv = (y-X*theta).T*X[:, j]/mtheta[j,0] = theta[j,0]+rate*derivthetas[j].append(theta[j,0])error = J(theta, X, y)errors.append(error[0,0])# 如果已经收敛if(error < epsilon):converged = Truereturn theta,errors,thetas@exeTime
def sgd(rate, maxLoop, epsilon, X, y):"""随机梯度下降法Args:rate: 学习率maxLoop: 最大迭代次数epsilon: 收敛精度X: 样本矩阵y: 标签矩阵Returns:(theta, error, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count <= maxLoop:if(converged):breakcount = count + 1errors.append(float('inf'))for i in range(m):if(converged):breakdiff = y[i,0]-h(theta, X[i].T)for j in range(n):theta[j,0] = theta[j,0] + rate*diff*X[i, j]thetas[j].append(theta[j,0])error = J(theta, X, y)errors[-1] = error[0,0]# 如果已经收敛if(error < epsilon):converged = Truereturn theta, errors, thetas

代码结合注释应该能看懂，借助于Numpy，只是复现了课上的公式。

测试程序

bgd测试程序

# coding: utf-8
# linear_regression/test_bgd.py
import regression
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 1500epsilon =0.01result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 绘制拟合曲线fittingFig = plt.figure()title = 'bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 绘制误差曲线errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 绘制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals,  rstride=2, cstride=2, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 绘制能量轮廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 绘制最优解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 绘制梯度下降过程ax.plot(thetas[0], thetas[1], 'rx', markersize=3, linewidth=1)ax.plot(thetas[0], thetas[1], 'r-')plt.show()

拟合状况：

可以看到，bgd 运行的并不慢，这是因为在 regression 程序中，我们采用了向量形式计算 θθθ ，计算机会通过并行计算的手段来优化速度。

误差随迭代次数的关系：

误差函数的下降曲面：

梯度下降过程：

sgd测试

# coding: utf-8
# linear_regression/test_sgd.py
import regression
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 100epsilon =0.01result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 绘制拟合曲线fittingFig = plt.figure()title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 绘制误差曲线errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 绘制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals,  rstride=8, cstride=8, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 绘制能量轮廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 绘制最优解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 绘制梯度下降过程ax.plot(thetas[0], thetas[1], 'r', linewidth=1)plt.show()

拟合状况：

误差随迭代次数的关系：

梯度下降过程：

在学习率为 α=0.01\alpha = 0.01α=0.01 时，随机梯度下降法出现了非常明显的抖动，同时，随机梯度下降法的速度优势也并未在此得到体现，一是样本容量不大，二是其自身很难通过并行计算去优化速度。

1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授相关推荐

1.2 线性回归与梯度下降-机器学习笔记-斯坦福吴恩达教授
线性回归首先,我们明确几个常用的数学符号: 特征(feature):xix_ixi , 比如,房屋的面积,卧室数量都算房屋的特征特征向量(输入):xxx ,一套房屋的信息就算一个特征向量,特征向 ...
10.2 梯度下降-机器学习笔记-斯坦福吴恩达教授
梯度下降批量梯度下降法(Batch gradient descent) 拥有了大数据,就意味着,我们的算法模型中得面临一个很大的 m 值.回顾到我们的批量梯度下降法: 重复直到收敛:重复直到收敛:重 ...
8.7 程序示例--异常检测-机器学习笔记-斯坦福吴恩达教授
程序示例–异常检测异常检测模型提供了一般高斯分布模型和多元高斯分布模型.其中,多元高斯分布模型被限制到了同轴分布: # coding: utf8 # anomaly_detection/anoma ...
7.3 程序示例--PCA 模型-机器学习笔记-斯坦福吴恩达教授
程序示例–PCA 模型 # coding: utf8 # pca/pca.pyimport numpy as npdef normalize(X):"""数据标准化处理A ...
5.10 程序示例--模型选择-机器学习笔记-斯坦福吴恩达教授
程序示例–模型选择在新的一组样本中,我们将通过交叉验证集选择模型,参数 CCC 和高斯核的参数 δδδ 我们都将在以下 8 个值中选取测试,则总共构成了 8×8=648×8=648×8=64 个模 ...
5.9 程序示例--非线性分类-机器学习笔记-斯坦福吴恩达教授
程序示例–非线性分类接下来,我们采用高斯核函数来解决非线性可分问题,由于数据集较大,我们使用性能更好的完整版 SMO 算法进行训练: # coding: utf8 # svm/test_non_li ...
5.8 程序示例--线性分类-机器学习笔记-斯坦福吴恩达教授
程序示例–线性分类首先,我们使用线性核函数来训练线性可分问题,这里,我们使用的是简化版 SMO 算法: # coding: utf8 # svm/test_linear import smo imp ...
4.7 程序示例--算法诊断-机器学习笔记-斯坦福吴恩达教授
程序示例–算法诊断我们手头有一份大坝水的流量与水位关系的数据,首先我们将其划分为训练集.交叉验证集和测试集: # coding: utf-8 # algorithm_analysis/diagnos ...
3.10 程序示例--神经网络设计-机器学习笔记-斯坦福吴恩达教授
神经网络设计在神经网络的结构设计方面,往往遵循如下要点: 输入层的单元数等于样本特征数. 输出层的单元数等于分类的类型数. 每个隐层的单元数通常是越多分类精度越高,但是也会带来计算性能的下降,因此, ...

1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授

回归模块

测试程序

bgd测试程序

拟合状况：

误差随迭代次数的关系：

误差函数的下降曲面：

梯度下降过程：

sgd测试

拟合状况：

误差随迭代次数的关系：

梯度下降过程：

1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授相关推荐

最新文章

热门文章