1.线性回归、梯度下降法、岭回归、LASSO回归、最小二乘法

文章目录

一、函数形式化表示
二、梯度下降算法
- 梯度下降法代码实现
- 使用梯度下降法求解线性回归问题
- 梯度下降算法变形
三、模型评价指标
- 代码实现
四、岭回归
- 岭回归代码实现
五、LASSO回归
- LASSO回归代码实现
六、Elastic Net回归
七、最小二乘法求线性回归
- 最小二乘法代码实现

一、函数形式化表示

二、梯度下降算法

当目标函数是凸函数时，梯度下降法是全局最优解。

梯度下降法代码实现

迭代次数控制

# alpha步长，过小—迭代步数过多，容易局部收敛；过大时，容易在最低点周围来回振荡
import numpy as np
import matplotlib.pyplot as plt# x、y坐标
x = np.linspace(-6, 4, 100)
y = x ** 2 + 2 * x + 5
# 绘图
fig, ax = plt.subplots()
ax.plot(x, y, 'r-', lw=3)
plt.show()# 初始化初始值x、步长alpha，迭代次数（可以通过设置精度来控制迭代次数）
x = 3
alpha = 0.8
iternum = 100
for i in range(iternum):x = x - alpha * (2 * x + 2)y = x ** 2 + 2 * x + 5print("迭代%d次后，最小值点为%d,对应的极小值为%d" % (iternum, x, y))
# 迭代100次后，最小值点为-1,对应的极小值为4

精度控制

# alpha步长，过小—迭代步数过多，容易局部收敛；过大时，容易在最低点周围来回振荡
import numpy as np
import matplotlib.pyplot as plt# x、y坐标
x = np.linspace(-6, 4, 100)
y = x ** 2 + 2 * x + 5
# 绘图
fig, ax = plt.subplots()
ax.plot(x, y, 'r-', lw=3)
plt.show()# 初始化初始值x、步长alpha，迭代次数（可以通过设置精度来控制迭代次数）
x = 3
alpha = 0.8
eta = 0
while np.abs(2 * x + 2) > eta:x = x - alpha * (2 * x + 2)y = x ** 2 + 2 * x + 5print("最小值点为%d,对应的极小值为%d" % (x, y))
# 最小值点为-1,对应的极小值为4

使用梯度下降法求解线性回归问题

import numpy as np
import matplotlib.pyplot as plt# 1.加载数据
def loaddata(filename):data = np.loadtxt(filename, delimiter=',')n = data.shape[1] - 1  # 特征数X = data[:, 0:n]y = data[:, n].reshape(-1, 1)return X, y# 2.标准化——减小异常点的影响
# 标准化即将每一个数据减去该列的平均值，再除以该列的方差
def featureNormized(x):avg = np.average(x, axis=0)std = np.std(x, axis=0, ddof=-1)  # ddof=-1时表示求方差时除的是n-1x = (x - avg) / stdreturn x, avg, std# 3.代价函数
def computeCost(X, y, theta):m = X.shape[0]  # 数据量# np.dot()表示一维向量点乘return np.sum(np.power(np.dot(X, theta) - y, 2)) / (2 * m)# 4.梯度下降法求导
def gradientDescent(X, y, theta, iternum, alpha):# 构建x0=1c = np.ones(X.shape[0]).transpose()X = np.insert(X, 0, values=c, axis=1)m = X.shape[0]  # 数据量n = X.shape[1]  # 特征数# 储存代价值costs = np.zeros(iternum)# 求导for i in range(iternum):for j in range(n):theta[j] = theta[j] + np.sum((y - np.dot(X, theta)) * X[:, j].reshape(-1, 1)) * alpha / mcosts[i] = computeCost(X, y, theta)return theta, costs# 5.预测值
def predict(x):x = (x - avg) / stdc = np.ones(x.shape[0]).transpose()X = np.insert(x, 0, values=c, axis=1)return np.dot(X, theta)# 6.模型评价-mse
def mse(y_true, y_test):return np.sum(np.power(y_true - y_test, 2)) / len(y_true)if __name__ == '__main__':filename = 'data/data1.txt'# 加载数据X_orign, y = loaddata(filename)# 标准化X, avg, std = featureNormized(X_orign)theta = np.zeros(X.shape[1] + 1).reshape(-1, 1)iternum = 100alpha = 0.8# 梯度下降求解theta, costs = gradientDescent(X, y, theta, iternum, alpha)# 预测值print(predict([[5.734]]))# 模型评价model_pred = predict(X_orign)print(model_pred)print('mse=', mse(y, model_pred))# 画图ax1 = plt.subplot(121)ax2 = plt.subplot(122)# 代价函数变化图x_ = np.linspace(1,iternum,iternum)ax1.plot(x_,costs)# 拟合图ax2.scatter(X,y)h_theta = theta[0]+theta[1]*Xax2.plot(X,h_theta)plt.show()

梯度下降算法变形

三、模型评价指标

代码实现

import numpy as np# mse
def mse(y_true, y_pred):return np.sum(np.power(y_true - y_pred, 2)) / (len(y_true))# rmse
def rmse(y_true, y_pred):return np.sqrt(np.sum(np.power(y_true - y_pred, 2)) / (len(y_true)))# mae
def mae(y_true, y_pred):return np.sum(np.abs(y_true - y_pred)) / (len(y_true))# mape
def mape(y_true, y_pred):return (100 / len(y_true) * np.sum(np.abs((y_true - y_pred) / y_true)))if __name__ == '__main__':y_true = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)y_pred = np.array([1.1, 2.1, 3.2, 3.9, 5]).reshape(-1, 1)print('mse = %.4f' % mse(y_true, y_pred))print('rmse = %.4f' % rmse(y_true, y_pred))print('mae = %.4f' % mae(y_true, y_pred))print('mape = %.4f' % mape(y_true, y_pred))mse = 0.0140
rmse = 0.1183
mae = 0.1000
mape = 4.8333

四、岭回归

岭回归代码实现

# 岭回归
import numpy as np
import matplotlib.pyplot as plt# 1.加载数据
def loaddata(filename):data = np.loadtxt(filename, delimiter=',')n = data.shape[1] - 1X = data[:, 0:n]y = data[:, n].reshape(-1, 1)return X, y# 2.标准化
def featureNormized(x):avg = np.average(x, axis=0)# 参数ddof=1时，表示求方差时除的是n-1std = np.std(x, axis=0, ddof=1)x = (x - avg) / stdreturn x, avg, std# 3.代价函数
def computeCosts(X, y, lamda, theta):m = X.shape[0]return np.sum(np.power((np.dot(X, theta) - y), 2)) / (2 * m) + lamda * np.sum(np.power(theta, 2))# 4.梯度下降法求解
def gradientDescent(X, y, alpha, iternum, lamda, theta):# 构造x0=1那一列c = np.ones(X.shape[0]).transpose()X = np.insert(X, 0, values=c, axis=1)m = X.shape[0]  # 数据量n = X.shape[1]  # 特征量costs = np.ones(iternum)for i in range(iternum):for j in range(n):theta[j] = theta[j] + np.sum((y - np.dot(X, theta)) * X[:, j].reshape(-1, 1)) * (alpha / m) - 2 * lamda * \theta[j]costs[i] = computeCosts(X, y, lamda, theta)return costs, theta# 5.预测
def predict(x):x = (x - avg) / stdc = np.ones(x.shape[0]).transpose()x = np.insert(x, 0, values=c, axis=1)return np.dot(x, theta)# 6.模型评价
def rmse(y_true, y_pred):return np.sqrt(np.sum(np.power((y_true - y_pred), 2)) / len(y_true))if __name__ == '__main__':filename = '../data/data1.txt'# 加载数据X_Orign, y = loaddata(filename)# 标准化X, avg, std = featureNormized(X_Orign)# 参数theta = np.zeros(X.shape[1] + 1).reshape(-1, 1)alpha = 0.01iternum = 400lamda = 0.001# 梯度下降法求解costs, theta = gradientDescent(X, y, alpha, iternum, lamda, theta)print(costs)# 预测print(predict([[5]]))# 模型评价y_pred = predict(X_Orign)print('rmse = %.4f' % rmse(y, y_pred))# 画图ax1 = plt.subplot(121)ax2 = plt.subplot(122)# 代价函数变化图x_ = np.linspace(1, iternum, iternum)ax1.plot(x_, costs, 'r-', lw=3)# 拟合图ax2.scatter(X, y)h_theta = theta[0] + theta[1] * Xax2.plot(X, h_theta, 'r-', lw=3)plt.show()[[1.71717285]]
rmse = 3.2595

五、LASSO回归

LASSO回归代码实现

# LASSO回归
import numpy as np
import matplotlib.pyplot as plt# 1.加载数据
def loaddata(filename):data = np.loadtxt(filename, delimiter=',')n = data.shape[1] - 1X = data[:, 0:n]y = data[:, n].reshape(-1, 1)return X, y# 2.标准化
def featureNorimized(x):avg = np.average(x, axis=0)std = np.std(x, axis=0, ddof=1)x = (x - avg) / stdreturn x, avg, std# 3.代价函数
def computerCosts(X, y, lamda, theta):m = X.shape[0]return np.sum(np.power((np.dot(X, theta) - y), 2)) / (2 * m) + lamda * np.sum(np.abs(theta))# 4.梯度下降求解函数
def gradientDescent(X, y, iternum, lamda):m, n = X.shapetheta = np.matrix(np.zeros((n, 1)))costs = np.zeros(iternum)# 循环for it in range(iternum):for k in range(n):  # n个特征# 计算z_k和p_kz_k = np.sum(np.power(X[:, k], 2))p_k = 0for i in range(m):p_k += X[i, k] * (y[i, 0] - np.sum([X[i, j] * theta[j, 0] for j in range(n) if j != k]))# 根据p_k的不同取值进行计算if p_k < -lamda / 2:w_k = (p_k + lamda / 2) / z_kelif p_k > lamda / 2:w_k = (p_k - lamda / 2) / z_kelse:w_k = 0theta[k, 0] = w_kcosts[it] = computerCosts(X, y, lamda, theta)return theta, costs# 5.预测
def predict(x):x = (x - avg) / stdc = np.ones(x.shape[0]).transpose()x = np.insert(x,0,values=c,axis=1)return np.dot(x, theta)if __name__ == '__main__':filename = '../data/data1.txt'iternum = 400# 加载数据X_orgin, y = loaddata(filename)# 标准化X, avg, std = featureNorimized(X_orgin)# 插入一列数值为1的数据X_1 = np.insert(X, 0, values=1, axis=1)# 梯度下降法theta, costs = gradientDescent(X_1, y, iternum, lamda=0.01)print(theta)# 预测print(predict([[5.55]]))# 画图ax1 = plt.subplot(121)ax2 = plt.subplot(122)# 画损失变化图x_ = np.linspace(1, iternum, iternum)ax1.plot(x_, costs, 'r-')# 画拟合图ax2.scatter(X, y)h_theta = theta[0, 0] + theta[1, 0] * Xax2.plot(X, h_theta, 'r-')plt.show()[[5.83908351][4.61684916]]
[[2.72553942]]

六、Elastic Net回归

Elastic Net是一种使用L1和L2作为正则化矩阵的线性回归模型。当多个特征和另一个特征相关的时候，弹性网络就非常好用。LASSO倾向于随机选择其中一个特征，而弹性网络更倾向于选择两个。
ElasticNetCV可以通过交叉验证来设置参数alpha和l1_ratio,l1_ratio可以用来调节L1和L2的凸组合。

七、最小二乘法求线性回归

最小二乘法代码实现

# 最小二乘法
import numpy as np
import matplotlib.pyplot as plt# 1.加载数据
def loaddata(filename):data = np.loadtxt(filename, delimiter=',')n = data.shape[1] - 1X = data[:, 0:n]y = data[:, n].reshape(-1, 1)return X, y# 2.标准化
def featureNorimized(x):avg = np.average(x, axis=0)std = np.std(x, axis=0, ddof=1)x = (x - avg) / stdreturn x, avg, std# 3.目标函数
def computerCosts(X, y, theta):return np.sum(np.power(np.dot(X, theta) - y, 2)) / 2# 4.最下二乘法求解——逆矩阵存在时
def LeastSquaresMethod(X, y):theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)costs = computerCosts(X, y, theta)return theta, costsif __name__ == '__main__':filename = '../data/data1.txt'# 加载数据X_orgin, y = loaddata(filename)# 构造x0=1的列X_1 = np.insert(X_orgin, 0, values=1, axis=1)# 最小二乘法求解线性回归# 逆矩阵存在时theta, costs = LeastSquaresMethod(X_1, y)print(theta)# 画图——散点图与直线图fig, ax = plt.subplots()ax.scatter(X_orgin, y)h_theta = theta[0] + theta[1] * X_orginax.plot(X_orgin, h_theta, 'r-', lw=2)plt.show()[[-3.89578088][ 1.19303364]]

如果对您有帮助，麻烦点赞关注，这真的对我很重要！！！如果需要互关，请评论留言！

1.线性回归、梯度下降法、岭回归、LASSO回归、最小二乘法相关推荐

Python梯度下降法实现二元逻辑回归
Python梯度下降法实现二元逻辑回归二元逻辑回归假设函数定义当函数值大于等于0.5时,结果为1,当函数值小于0.5时,结果为0.函数的值域是(0, 1). 二元逻辑回归的损失函数上图为二元逻辑 ...
深度学习原理-----线性回归+梯度下降法
系列文章目录深度学习原理-----线性回归+梯度下降法深度学习原理-----逻辑回归算法深度学习原理-----全连接神经网络深度学习原理-----卷积神经网络深度学习原理-----循环神经网 ...
吴恩达《机器学习》笔记（一）【线性回归梯度下降法】
通过在网易云课堂学习吴恩达先生的<机器学习>课程,为了巩固自己的学习且方便读者们共同交流学习,特此做此学习笔记,希望与大家共勉. 吴恩达<机器学习>课程链接:https://s ...
机器学习（七）线性回归与岭回归Lasso回归与弹性网络与正则化
机器学习(七) 线性回归最小二乘法(Least Squares Method,简记为LSE)是一个比较古老的方法,源于天文学和测地学上的应用需要.在早期数理统计方法的发展中,这两门科学起了很大的作用 ...
普通线性回归/岭回归/lasso回归[x.T/np.dot/lrg.coef_/lrg.intercept_/Xw = y/r2_score]
基础运算导包 import numpy as np 随机产生数据集 a = np.random.randint(0,10,size = (2,3)) a Out: array([[2, 8, 2], ...
岭回归Lasso回归
转自:https://blog.csdn.net/dang_boy/article/details/78504258 https://www.cnblogs.com/Belter/p/8536939. ...
python一元线性回归_Python实现——一元线性回归(梯度下降法)
2019/3/25 一元线性回归--梯度下降/最小二乘法又名:一两位小数点的悲剧感觉这个才是真正的重头戏,毕竟前两者都是更倾向于直接使用公式,而不是让计算机一步步去接近真相,而这个梯度下降就不一样了 ...
线性回归 --梯度下降法与标准方程法
线性回归简单线性回归机器学习三要素 – 模型策略算法线性回归输入空间为XXX 输出空间为Y" role="presentation" style=" ...
python实现牛顿法和梯度下降法求解对率回归_最优化问题中，牛顿法为什么比梯度下降法求解需要的迭代次数更少？...
多图预警本文讲你肯定能懂的机器学习多维极值求解,主要讲梯度下降和牛顿法的区别应该能够完美的回答题主的问题事先说明本文面向学习过高等数学统计学和线性代数基础知识的本科生,并假设读者拥有基本的矩阵运 ...
笔记︱范数正则化L0、L1、L2-岭回归Lasso回归（稀疏与特征工程）
机器学习中的范数规则化之(一)L0.L1与L2范数博客的学习笔记,对一些要点进行摘录.规则化也有其他名称,比如统计学术中比较多的叫做增加惩罚项:还有现在比较多的正则化. -------------- ...