机器学习基础-逻辑回归-09

逻辑回归

正确率/召回率/F1指标

梯度下降法-逻辑回归

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
# 数据是否需要标准化
scale = True

# 载入数据
data = np.genfromtxt("LR-testSet.csv", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]def plot():x0 = []x1 = []y0 = []y1 = []# 切分不同类别的数据for i in range(len(x_data)):if y_data[i]==0:x0.append(x_data[i,0])y0.append(x_data[i,1])else:x1.append(x_data[i,0])y1.append(x_data[i,1])# 画图scatter0 = plt.scatter(x0, y0, c='b', marker='o')scatter1 = plt.scatter(x1, y1, c='r', marker='x')#画图例plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')plot()
plt.show()

# 数据处理，添加偏置项
x_data = data[:,:-1]
y_data = data[:,-1,np.newaxis]print(np.mat(x_data).shape)
print(np.mat(y_data).shape)
# 给样本添加偏置项
X_data = np.concatenate((np.ones((100,1)),x_data),axis=1)
print(X_data.shape)

def sigmoid(x):return 1.0/(1+np.exp(-x))def cost(xMat, yMat, ws):left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))return np.sum(left + right) / -(len(xMat))def gradAscent(xArr, yArr):if scale == True:xArr = preprocessing.scale(xArr)xMat = np.mat(xArr)yMat = np.mat(yArr)lr = 0.001epochs = 10000costList = []# 计算数据行列数# 行代表数据个数，列代表权值个数m,n = np.shape(xMat)# 初始化权值ws = np.mat(np.ones((n,1)))for i in range(epochs+1):             # xMat和weights矩阵相乘h = sigmoid(xMat*ws)   # 计算误差ws_grad = xMat.T*(h - yMat)/mws = ws - lr*ws_grad if i % 50 == 0:costList.append(cost(xMat,yMat,ws))return ws,costList

# 训练模型，得到权值和cost值的变化
ws,costList = gradAscent(X_data, y_data)
print(ws)

if scale == False:# 画图决策边界plot()x_test = [[-4],[3]]y_test = (-ws[0] - x_test*ws[1])/ws[2]plt.plot(x_test, y_test, 'k')plt.show()

# 画图 loss值的变化
x = np.linspace(0,10000,201)
plt.plot(x, costList, c='r')
plt.title('Train')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.show()

# 预测
def predict(x_data, ws):if scale == True:x_data = preprocessing.scale(x_data)xMat = np.mat(x_data)ws = np.mat(ws)return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]predictions = predict(X_data, ws)print(classification_report(y_data, predictions))

sklearn-逻辑回归

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn import linear_model
# 数据是否需要标准化
scale = False

# 载入数据
data = np.genfromtxt("LR-testSet.csv", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]def plot():x0 = []x1 = []y0 = []y1 = []# 切分不同类别的数据for i in range(len(x_data)):if y_data[i]==0:x0.append(x_data[i,0])y0.append(x_data[i,1])else:x1.append(x_data[i,0])y1.append(x_data[i,1])# 画图scatter0 = plt.scatter(x0, y0, c='b', marker='o')scatter1 = plt.scatter(x1, y1, c='r', marker='x')#画图例plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')plot()
plt.show()

logistic = linear_model.LogisticRegression()
logistic.fit(x_data, y_data)

if scale == False:# 画图决策边界plot()x_test = np.array([[-4],[3]])y_test = (-logistic.intercept_ - x_test*logistic.coef_[0][0])/logistic.coef_[0][1]plt.plot(x_test, y_test, 'k')plt.show()

predictions = logistic.predict(x_data)print(classification_report(y_data, predictions))

梯度下降法-非线性逻辑回归

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
# 数据是否需要标准化
scale = False

# 载入数据
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1,np.newaxis]def plot():x0 = []x1 = []y0 = []y1 = []# 切分不同类别的数据for i in range(len(x_data)):if y_data[i]==0:x0.append(x_data[i,0])y0.append(x_data[i,1])else:x1.append(x_data[i,0])y1.append(x_data[i,1])# 画图scatter0 = plt.scatter(x0, y0, c='b', marker='o')scatter1 = plt.scatter(x1, y1, c='r', marker='x')#画图例plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')plot()
plt.show()

# 定义多项式回归,degree的值可以调节多项式的特征
poly_reg  = PolynomialFeatures(degree=3)
# 特征处理
x_poly = poly_reg.fit_transform(x_data)

def sigmoid(x):return 1.0/(1+np.exp(-x))def cost(xMat, yMat, ws):left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))return np.sum(left + right) / -(len(xMat))def gradAscent(xArr, yArr):if scale == True:xArr = preprocessing.scale(xArr)xMat = np.mat(xArr)yMat = np.mat(yArr)lr = 0.03epochs = 50000costList = []# 计算数据列数，有几列就有几个权值m,n = np.shape(xMat)# 初始化权值ws = np.mat(np.ones((n,1)))for i in range(epochs+1):             # xMat和weights矩阵相乘h = sigmoid(xMat*ws)   # 计算误差ws_grad = xMat.T*(h - yMat)/mws = ws - lr*ws_grad if i % 50 == 0:costList.append(cost(xMat,yMat,ws))return ws,costList

# 训练模型，得到权值和cost值的变化
ws,costList = gradAscent(x_poly, y_data)
print(ws)


# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))# np.r_按row来组合array，
# np.c_按colunm来组合array
# >>> a = np.array([1,2,3])
# >>> b = np.array([5,2,5])
# >>> np.r_[a,b]
# array([1, 2, 3, 5, 2, 5])
# >>> np.c_[a,b]
# array([[1, 5],
#        [2, 2],
#        [3, 5]])
# >>> np.c_[a,[0,0,0],b]
# array([[1, 0, 5],
#        [2, 0, 2],
#        [3, 0, 5]])
z = sigmoid(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]).dot(np.array(ws)))# ravel与flatten类似，多维数据转一维。flatten不会改变原始数据，ravel会改变原始数据
for i in range(len(z)):if z[i] > 0.5:z[i] = 1else:z[i] = 0
z = z.reshape(xx.shape)# 等高线图
cs = plt.contourf(xx, yy, z)
plot()
plt.show()

# 预测
def predict(x_data, ws):
#     if scale == True:
#         x_data = preprocessing.scale(x_data)xMat = np.mat(x_data)ws = np.mat(ws)return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]predictions = predict(x_poly, ws)print(classification_report(y_data, predictions))

test = [[2,3]]
# 定义多项式回归,degree的值可以调节多项式的特征
poly_reg  = PolynomialFeatures(degree=3)
# 特征处理
x_poly = poly_reg.fit_transform(test)

x_poly

# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))plt.scatter(xx,yy)
plt.show()

sklearn-非线性逻辑回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.datasets import make_gaussian_quantiles
from sklearn.preprocessing import PolynomialFeatures

# 生成2维正态分布，生成的数据按分位数分为两类，500个样本,2个样本特征
# 可以生成两类或多类数据
x_data, y_data = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

logistic = linear_model.LogisticRegression()
logistic.fit(x_data, y_data)

# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))# np.r_按row来组合array，
# np.c_按colunm来组合array
# >>> a = np.array([1,2,3])
# >>> b = np.array([5,2,5])
# >>> np.r_[a,b]
# array([1, 2, 3, 5, 2, 5])
# >>> np.c_[a,b]
# array([[1, 5],
#        [2, 2],
#        [3, 5]])
# >>> np.c_[a,[0,0,0],b]
# array([[1, 0, 5],
#        [2, 0, 2],
#        [3, 0, 5]])
z = logistic.predict(np.c_[xx.ravel(), yy.ravel()])# ravel与flatten类似，多维数据转一维。flatten不会改变原始数据，ravel会改变原始数据
z = z.reshape(xx.shape)
# 等高线图
cs = plt.contourf(xx, yy, z)
# 样本散点图
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()print('score:',logistic.score(x_data,y_data))

# 定义多项式回归,degree的值可以调节多项式的特征
poly_reg  = PolynomialFeatures(degree=5)
# 特征处理
x_poly = poly_reg.fit_transform(x_data)
# 定义逻辑回归模型
logistic = linear_model.LogisticRegression()
# 训练模型
logistic.fit(x_poly, y_data)# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))# np.r_按row来组合array，
# np.c_按colunm来组合array
# >>> a = np.array([1,2,3])
# >>> b = np.array([5,2,5])
# >>> np.r_[a,b]
# array([1, 2, 3, 5, 2, 5])
# >>> np.c_[a,b]
# array([[1, 5],
#        [2, 2],
#        [3, 5]])
# >>> np.c_[a,[0,0,0],b]
# array([[1, 0, 5],
#        [2, 0, 2],
#        [3, 0, 5]])
z = logistic.predict(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]))# ravel与flatten类似，多维数据转一维。flatten不会改变原始数据，ravel会改变原始数据
z = z.reshape(xx.shape)
# 等高线图
cs = plt.contourf(xx, yy, z)
# 样本散点图
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()print('score:',logistic.score(x_poly,y_data))

机器学习基础-逻辑回归-09相关推荐

logit回归模型假设_机器学习基础---逻辑回归（假设函数与线性回归不同）
一:分类 (一)分类基础在分类问题中,你要预测的变量y是离散的值,我们将学习一种叫做逻辑回归 (Logistic Regression) 的算法,这是目前最流行使用最广泛的一种学习算法. 在分类问题 ...
机器学习实战-逻辑回归-19
机器学习实战-逻辑回归-用户流失预测 import numpy as np train_data = np.genfromtxt('Churn-Modelling.csv',delimiter=',' ...
机器学习_2逻辑回归
机器学习_逻辑回归分类问题二分类--Sigmoid函数 Sigmoid函数代码实现逻辑回归数学原理求解方式正则化逻辑回归数据集应用样例--代码实现样例1:有清晰的线性决策边界决策边界 ...
机器学习：逻辑回归(logistics regression)
title: 机器学习:逻辑回归(logistics regression) date: 2019-11-30 20:55:06 mathjax: true categories: 机器学习 tags ...
传统机器学习之逻辑回归的分类预测，以威斯康辛州乳腺癌数据集为例
传统机器学习之逻辑回归的分类预测,以威斯康辛州乳腺癌数据集为例文章目录传统机器学习之逻辑回归的分类预测,以威斯康辛州乳腺癌数据集为例 1导入基本库 2读取数据并且变换类型 3输出数据 4可视化数据 ...
吴恩达机器学习之逻辑回归(二分类)
吴恩达机器学习之逻辑回归逻辑回归二分类逻辑回归二分类逻辑回归案例 python代码实现(含详细代码注释): 案例中几个问题的解析不同于线性回归,逻辑回归的hθ(x)还需要用sigmoid函数处 ...
【机器学习】逻辑回归原理介绍
[机器学习]逻辑回归原理介绍 [机器学习]逻辑回归python实现 [机器学习]逻辑回归sklearn实现 Logistic 回归模型是目前广泛使用的学习算法之一,通常用来解决二分类问题,虽然名字中有 ...
【机器学习】逻辑回归-基础认识与鸢尾花分类实操案例
文章目录前言一.基本理解二.数学原理三.简单二元分类算法实现四.实战案例总结前言本文将会对逻辑回归的基础理解,数学原理,简单算法实现,鸢尾花分类问题实操案例去学习我们的逻辑回归. 一. ...
机器学习算法基础——逻辑回归
45.模型的保存与加载 sklearn模型的保存和加载 from sklearn.externals import joblib 保存和加载API joblib.dump(rf,"" ...

机器学习基础-逻辑回归-09

逻辑回归

正确率/召回率/F1指标

梯度下降法-逻辑回归

sklearn-逻辑回归

梯度下降法-非线性逻辑回归

sklearn-非线性逻辑回归

机器学习基础-逻辑回归-09相关推荐

最新文章

热门文章