Machine Leaning ex1:Linear Regression

Python实现了ex1线性回归
线性回归的核心函数是第2章。其余函数均为调用这章的函数来实现计算。

1. 简单练习

1.1 输出一个5*5的单位矩阵

a = np.eye(5, dtype=int)

2. 梯度下降：训练线性回归的参数θ

2.1 计算代价损失函数Cost

def computeCost(X, y, theta):m = len(X)  # m: 数据集规模h_theta_x = X * theta.T  # h_theta_x: 假设函数, 向量inner = np.power((h_theta_x - y), 2)J_theta = np.sum(inner) / (2 * m)return J_theta

2.2 梯度下降函数

def gradientDescent(X, y, theta, alpha, num_iters):temp = np.matrix(np.zeros(theta.shape))parameters = int(theta.shape[1])  # theta的列数,用于更新每代的全部thetacost = np.zeros(num_iters)  # 形成iter次数相同的数组,记录损失函数J_thetaall_theta = np.zeros((num_iters, parameters))for i in range(num_iters):cost[i] = computeCost(X, y, theta)all_theta[i] = thetahx_Sub_y = X * theta.T - y  # 假设函数-yfor j in range(parameters):diff = np.multiply(hx_Sub_y, X[:, j])  # 计算偏导数(hxi-yi)*xitemp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(diff))theta = tempreturn cost, all_theta

2.3 正规方程 normal equation

def normalequation(X, y):theta = (X.T * X)**-1 * X.T * y# computeCost的返回值theta形状统一return theta.T

3 线性回归

3.1 单变量线性回归

在本次练习中，需要实现一个单变量的线性回归。
假设有一组历史数据<城市人口，开店利润>，现需要预测在哪个城市中开店利润比较好？
历史数据如下：第一列表示城市人口数，单位为万人；第二列表示利润，单位为10,000$

3.1.1 读取数据,然后展示数据

    data_path = 'ex1data1.txt'data = pd.read_csv(data_path, header=None, names=['Population', 'Profit'])# 增加θ0data.insert(0, 'Theta0', 1)# 使用pd的绘图,预览原始数据data.plot(kind='scatter', x='Population', y='Profit')

3.1.2 线性回归函数

def liner_regression():return

3.2 多变量的线性回归函数

ex1data2.txt里的数据，第一列是房屋大小，第二列是卧室数量，第三列是房屋售价
根据已有数据，建立模型，预测房屋的售价

def multi_liner_regression():return

4 绘图

4.1 绘制线性回归

def plotLinerRegression(data, theta):X = np.linspace(data.Population.min(), data.Population.max(), 100)f = theta[0] + theta[1] * X# 绘制线性回归图fig, ax = plt.subplots()ax.plot(X, f, color='blue', alpha=0.4, label='Prediction')ax.scatter(data.Population, data.Profit, color='green', alpha=0.4, label='Training Data')ax.legend()ax.set_xlabel('Population')ax.set_ylabel('Profit')ax.set_title('Prediction vs Population size')# plt.show()

4.2 绘制J_theta

def plotJtheta(cost, num_iters, title):# 绘制J_thetafig, ax = plt.subplots()# 格式化x，y# 绘制J_theta更新ax.scatter(np.arange(num_iters), cost, alpha=0.4)ax.set_xlabel('num_iters')ax.set_ylabel('J(θ)')ax.set_title(title)# plt.show()

4.3 绘制3D图

def plotThreeD(X, Y, fig_title):fig = plt.figure()ax = fig.gca(projection='3d')# 格式化x，ytheta1 = np.linspace(-1, 4, 100)theta0 = np.linspace(-10, 10, 100)J_theta = np.zeros((len(theta0), len(theta1)))for i in range(len(theta0)):for j in range(len(theta1)):t = np.matrix([theta0[i], theta1[j]])J_theta[i][j] = computeCost(X, Y, t)# 绘制3Dx = theta0y = theta1z = J_thetax, y = np.meshgrid(x, y)# rainbow,gist_rainbow,hsvsurf = ax.plot_surface(x, y, z, cmap=cm.rainbow, linewidth=0, antialiased=False)ax.set_xlabel('theta0')ax.set_ylabel('theta1')ax.set_zlabel('J_theta')ax.set_title(fig_title)fig.colorbar(surf, shrink=0.5, aspect=5)# plt.show()

数据集

完整代码

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.preprocessing import StandardScaler
from mpl_toolkits.mplot3d import Axes3D# 1.简单练习
# 输出一个5*5的单位矩阵
a = np.eye(5, dtype=int)# print(a)# 2. 梯度下降：训练线性回归的参数θ
# 在本次练习中，需要实现一个单变量的线性回归。
# 假设有一组历史数据<城市人口，开店利润>，现需要预测在哪个城市中开店利润比较好？
# 历史数据如下：第一列表示城市人口数，单位为万人；第二列表示利润，单位为10,000$# 2.1 计算代价损失函数Cost,训练线性回归的参数θ,特征X=[x0, x1, x2, …, xm], 某个特征theta=[θ0, θ1, θ2, …, θm]
def computeCost(X, y, theta):m = len(X)  # m: 数据集规模h_theta_x = X * theta.T  # h_theta_x: 假设函数, 向量inner = np.power((h_theta_x - y), 2)J_theta = np.sum(inner) / (2 * m)return J_theta# 2.2 梯度下降函数
def gradientDescent(X, y, theta, alpha, num_iters):# cost = np.zeros(num_iters)  # 形成iter次数相同的数组,记录损失函数J_theta# all_theta = np.zeros((num_iters, theta.shape[1]))# for i in range(num_iters):#     cost[i] = computeCost(X, y, theta)#     inner = X * theta.T - y  # 假设函数-y#     print(type(inner))#     theta = theta - (np.sum(inner).T * X) * alpha / len(X)#     # alpha_m_inner = a * m * 矩阵每一列和#     # theta = theta - alpha_m_inner*X#     all_theta[i] = thetatemp = np.matrix(np.zeros(theta.shape))parameters = int(theta.shape[1])  # theta的列数,用于更新每代的全部thetacost = np.zeros(num_iters)  # 形成iter次数相同的数组,记录损失函数J_thetaall_theta = np.zeros((num_iters, parameters))for i in range(num_iters):cost[i] = computeCost(X, y, theta)all_theta[i] = thetahx_Sub_y = X * theta.T - y  # 假设函数-yfor j in range(parameters):diff = np.multiply(hx_Sub_y, X[:, j])  # 计算偏导数(hxi-yi)*xitemp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(diff))theta = tempreturn cost, all_theta# 2.3 正规方程, narmal equation
def normalequation(X, y):theta = (X.T * X) ** -1 * X.T * y# computeCost的返回值theta形状统一return theta.T# 4 绘图
# 4.1 绘制线性回归
def plotLinerRegression(data, theta):X = np.linspace(data.Population.min(), data.Population.max(), 100)f = theta[0] + theta[1] * X# 绘制线性回归图fig, ax = plt.subplots()ax.plot(X, f, color='blue', alpha=0.4, label='Prediction')ax.scatter(data.Population, data.Profit, color='green', alpha=0.4, label='Training Data')ax.legend()ax.set_xlabel('Population')ax.set_ylabel('Profit')ax.set_title('Prediction vs Population size')# plt.show()# 4.2 绘制J_theta
def plotJtheta(cost, num_iters, title):# 绘制J_thetafig, ax = plt.subplots()# 格式化x，y# 绘制J_theta更新ax.scatter(np.arange(num_iters), cost, alpha=0.4)ax.set_xlabel('num_iters')ax.set_ylabel('J(θ)')ax.set_title(title)# plt.show()# 4.3 绘制J_theta、3D图
def plotThreeD(X, Y, fig_title):fig = plt.figure()ax = fig.gca(projection='3d')# 格式化x，ytheta1 = np.linspace(-1, 4, 100)theta0 = np.linspace(-10, 10, 100)J_theta = np.zeros((len(theta0), len(theta1)))for i in range(len(theta0)):for j in range(len(theta1)):t = np.matrix([theta0[i], theta1[j]])J_theta[i][j] = computeCost(X, Y, t)# 绘制3Dx = theta0y = theta1z = J_thetax, y = np.meshgrid(x, y)# rainbow,gist_rainbow,hsvsurf = ax.plot_surface(x, y, z, cmap=cm.rainbow, linewidth=0, antialiased=False)ax.set_xlabel('theta0')ax.set_ylabel('theta1')ax.set_zlabel('J_theta')ax.set_title(fig_title)fig.colorbar(surf, shrink=0.5, aspect=5)# plt.show()# 3 线性回归
# 3.1 单变量线性回归
def liner_regression():# 1. 读取数据,然后展示数据data_path = 'ex1data1.txt'data = pd.read_csv(data_path, header=None, names=['Population', 'Profit'])# 增加θ0data.insert(0, 'X0', 1)# 使用pd的绘图,预览原始数据data.plot(kind='scatter', x='Population', y='Profit', alpha=0.4, color='green')# 2. 初始化num_iters = 400  # 迭代次数alpha = 0.01  # 学习速率fig_title = 'liner regression'  # 图像标题cols = data.shape[1]list_x = data.iloc[:, :-1]  # x包含全为1的第1列，以及剩余列（不包括最后一列y）list_y = data.iloc[:, cols - 1:cols]# 将列表x、y转换为矩阵X、YX = np.matrix(list_x.values)y = np.matrix(list_y.values)# 定义初始化Theta,单变量线性回归,Theta维度为2x1theta = np.matrix(np.zeros(X.shape[1], dtype=int))  # 定义theta# 3. 调用cost, all_theta = gradientDescent(X, y, theta, alpha, num_iters)theta = all_theta[-1]# 正规方程计算theta# theta1 = normalequation(X, y)# cost1 = computeCost(X, y, theta1)plotLinerRegression(data, theta)plotJtheta(cost, num_iters, fig_title)plotThreeD(X, y, fig_title)return# 3.2 多变量的线性回归函数
def multi_liner_regression():# 1. 读取数据path = 'ex1data2.txt'raw_data = pd.read_csv(path)# 2. 初始化num_iters = 1500  # 迭代次数alpha = 0.01  # 学习速率fig_title = 'multi_liner regression'  # 定义图像标题# 2.1 数据预处理# 使用sklearn.preprocessing的StandardScaler特征缩放scaler = StandardScaler()# 标准化rawdata, data当前格式被转换为numpy.ndarry类型，无须转换data = scaler.fit_transform(raw_data)# 在数组插入0列data = np.insert(data, obj=0, values=1, axis=1)cols = data.shape[1]X = data[:, 0:cols - 1]y = data[:, cols - 1:cols]# 转换为矩阵形式X = np.asmatrix(X)  # Unlike matrix, asmatrix does not make a copy if the input is already a matrix or an ndarray.y = np.asmatrix(y)  # Equivalent to matrix(data, copy=False).theta = np.matrix(np.zeros(X.shape[1], dtype=int))  # 定义theta# 3. 运行调用cost, all_theta = gradientDescent(X, y, theta, alpha, num_iters)theta = all_theta[-1]# 正规方程计算theta# theta1 = normalequation(X, y)# cost1 = computeCost(X, y, theta1)# 4. 画图plotJtheta(cost, num_iters, fig_title)if __name__ == '__main__':liner_regression()multi_liner_regression()plt.show()

Machine Leaning相关推荐

初步认识机器学习（Machine Leaning）
一.概述 1.人工智能>机器学习>深度学习|强化学习 2.机器学习(ML:machine leaning):一门通过优化方法(线性回归.逻辑回归.决策树.向量机.贝叶斯模型等)挖掘数据中规 ...
学习Machine Leaning In Action（四）：逻辑回归
第一眼看到逻辑回归(Logistic Regression)这个词时,脑海中没有任何概念,读了几页后,发现这非常类似于神经网络中单个神经元的分类方法. 书中逻辑回归的思想是用一个超平面将数据集分为两部 ...
Coursera Machine Leaning 课程总结
最近机器学习比较火热,身边很多同学都有兴趣,恰好Coursera上面有这门课.讲授这门课的Andrew教授任职斯坦福大学,是coursera的联合创建者,在机器学习领域颇有成就,身边的同学也有几位上这 ...
machine leaning 1
什么是机器学习? 机器学习就是从数据中自动分析获得规律,并利用规律对未知数据进行预测. 什么是模型? 特殊的对象.在对象内部已经被集成或者封装好了某种形式的方程式,只不过该方程还没有求出解.就是一堆数 ...
REAL-WORD MACHINE LEANING(翻译本--第一部分)
机器学习工作流程本书的第一部分介绍了机器学的基本流程,第一部分中的每一章节都是工作流程中的一部分: 第一章介绍了机器学习主要的用途,以及为什么要阅读本书. 第二章本章中您将深入了解ML领域中数据处理 ...
学习Machine Leaning In Action（二）：kNN
kNN算法又称为k最近邻算法,是各种分类算法中较简单的一种(有可能是最简单的).他的思路很好理解,即将待分类向量和所有已知向量求距离,再统计k个最小距离向量所属的类型,最多的类型即为待分类向量的类型. ...
Tianchi发布最新AI知识树！
↑↑↑关注后"星标"Datawhale 每日干货 & 每月组队学习,不错过 Datawhale干货来源:Tianchi,方向:AI内容近期Tianchi开放了9大训练营 ...
Galaxy Release (v 21.05)，众多核心技术栈变更
2021年6月初,Galaxy Project 正式发布了 release 21.05 版本:随后6月中旬,发布该版本的 announcement 文档.这里总结一下该版本一些主要的更新内容,为关注和 ...
RSS - 简单方便的follow资讯
2019独角兽企业重金招聘Python工程师标准>>> rss服务提供信息推送.主题订阅等服务,无论是网站.博客.还是各领域论文.主题消息,凡是提供rss服务的地方都可以订阅,并在更 ...

Machine Leaning