逻辑回归实现鸢尾花分类

from sklearn.datasets import load_iris
import numpy as  np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

iris=load_iris()
#print(iris)
print(iris['target_names'])#分类名称
data=iris.data#样本数据150个样本每个样本4个维度
target=iris.target#分类
print(data.shape)
print(target.shape)

['setosa' 'versicolor' 'virginica']
(150, 4)
(150,)

1.使用前两个特征进行分类

对于多分类问题来说，由于逻辑回归一次只能对两个类别进行分类，所有如果有n个类别需要分类，则需要重新制作y数据集（以类别1为列，将类别1的设置为1，其他类别都设置为0），最终用n次分类得到的n个参数分别与x[i]相运算，那个概率最大则预测出来的y就是那个类别的。

#在这里为方便画图进选用两个特征
x=data[:,0:2]
y=target
a0=[]
b0=[]
a1=[]
b1=[]
a2=[]
b2=[]
for i in range(len(data)):if (y[i]==0):a0.append(x[i,0])b0.append(x[i,1])elif(y[i]==1):a1.append(x[i,0])b1.append(x[i,1])else:a2.append(x[i,0])b2.append(x[i,1])
scatter1=plt.scatter(a0,b0,c='b',marker='o')
scatter2=plt.scatter(a1,b1,c='r',marker='x')
scatter3=plt.scatter(a2,b2,c='y',marker='s')
plt.legend(handles=[scatter1,scatter2,scatter3],labels=['setosa','versicolor','virginica'],loc='best')
plt.show()

def sigmoid(x):#sigmoid函数return 1.0/(1+np.exp(-x))
def cost(x,y,theta):#代价函数x=np.matrix(x)y=np.matrix(y)theta=np.matrix(theta)first=np.multiply(y,np.log(sigmoid(x*theta)))second=np.multiply(1-y,np.log(1-sigmoid(x*theta)))return np.sum(first+second)/(-len(x))
def grad(x,y,theta,epochs=1000,lr=0.001):#进行梯度下降x=np.matrix(x)y=np.matrix(y)theta=np.matrix(theta)#print(x.shape,' ',theta.shape)m=x.shape[0]costList=[]for i in range(epochs+1):#print('i=',i,' x',x.shape,' theta',theta.shape)h=sigmoid(x*theta)#print('i=',i,'h ',h.shape,'x.T',x.T.shape,'y',y.shape)delta=x.T*(h-y)/m#print('i=',i,'delta ',delta.shape)theta=theta-lr*deltaif(i%50==0):costList.append(cost(x,y,theta))#计算损失值return theta,costList

1.对setosa分类，分出setosa和其他种类

x=np.concatenate((np.ones((len(x),1)),x),axis=1)
theta=np.ones((x.shape[1],1))
#print(theta.shape)
y1=[]#重新生成数据集y，
for i in range(len(x)):if y[i]!=0:y1.append([0])else:y1.append([1])
theta,costList=grad(x,y1,theta,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()

theta

matrix([[ 5.0040943 ],[-6.76191622],[10.16162326]])

plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
plt.plot(m,n,c='r')
plt.show()

可以看出，对于setosa分类与其他分类versicolor virginica可以用两个特征，就可以很好的将其分好类

2.对versicolor分类

theta2=np.ones((x.shape[1],1))
#print(theta.shape)
y2=[]
for i in range(len(x)):if y[i]!=1:y2.append([0])else:y2.append([1])
theta2,costList=grad(x,y2,theta2,8000,0.28)
#print(theta.shape)
a=np.linspace(0,6000,161)#生成61个数
plt.plot(a,costList,c='y')
plt.show()

theta2

matrix([[ 8.80218283],[ 0.25495515],[-3.45489597]])

plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
l=(-theta2[0]-m*theta2[1])/theta2[2]
plt.plot(m,n,c='b')
plt.plot(m,l,c='r')
plt.show()

3.对virginica分类

theta3=np.ones((x.shape[1],1))
#print(theta.shape)
y3=[]
for i in range(len(x)):if y[i]!=2:y3.append([0])else:y3.append([1])
theta3,costList=grad(x,y3,theta3,6000,0.1)
#print(theta.shape)
a=np.linspace(0,6000,121)#生成61个数
plt.plot(a,costList,c='y')
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-m6fIt9Sc-1642235863142)(output_14_0.png)]


theta3

matrix([[-6.42102857],[ 1.79488608],[-1.6847187 ]])

plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
l=(-theta2[0]-m*theta2[1])/theta2[2]
q=(-theta3[0]-m*theta3[1])/theta3[2]
plt.plot(m,n,c='b')
plt.plot(m,l,c='r')
plt.plot(m,q,c='y')
plt.show()

从setosa（0），versicolor（1） virginica（2）来看,0分类和其他两个分类可以用一条直线很好的将他们分开，但是对1和1分类来说，一条直线显然无法将其分隔开，所以需要我们考虑多个样本特征。

2.4个特征一起使用来评估模型

#x=np.concatenate((np.ones((len(x),1)),x),axis=1)
X=np.concatenate((np.ones((len(data),1)),data),axis=1)
Y=target
print(X.shape,Y.shape)

(150, 5) (150,)

Theta1=np.ones((X.shape[1],1))
#print(theta.shape)
Y1=[]#重新生成数据集y，
for i in range(len(X)):if Y[i]!=0:Y1.append([0])else:Y1.append([1])
Theta1,costList=grad(X,Y1,Theta1,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta1

matrix([[ 1.33220841],[ 0.52729505],[ 3.3411161 ],[-4.85587593],[-1.63431095]])

Theta2=np.ones((X.shape[1],1))
#print(theta.shape)
Y2=[]#重新生成数据集y，
for i in range(len(X)):if Y[i]!=1:Y2.append([0])else:Y2.append([1])
Theta2,costList=grad(X,Y2,Theta2,8000,0.1)
#print(theta.shape)
a=np.linspace(0,3000,161)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta2

matrix([[ 4.75032157],[ 0.11228482],[-2.45581587],[ 1.16623831],[-2.70084823]])

Theta3=np.ones((X.shape[1],1))
#print(theta.shape)
Y3=[]#重新生成数据集y，
for i in range(len(X)):if Y[i]!=2:Y3.append([0])else:Y3.append([1])
Theta3,costList=grad(X,Y3,Theta3,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta3

matrix([[-5.40077764],[-4.5160182 ],[-4.88791515],[ 6.77915362],[ 8.57906837]])

predict_y1=sigmoid(X*Theta1)
predict_y2=sigmoid(X*Theta2)
predict_y3=sigmoid(X*Theta3)
predict_y=[]
for i in range(len(X)):if max(predict_y1[i],predict_y2[i],predict_y3[i])==predict_y1[i]:predict_y.append([0])elif max(predict_y1[i],predict_y2[i],predict_y3[i])==predict_y2[i]:predict_y.append([1])else :predict_y.append([2])
for i in range(len(X)):print(predict_y1[i],' ',predict_y2[i],' ',predict_y3[i],predict_y[i])

print(classification_report(Y,predict_y))#正确率有0.96

              precision    recall  f1-score   support0       1.00      1.00      1.00        501       0.96      0.96      0.96        502       0.96      0.96      0.96        50accuracy                           0.97       150macro avg       0.97      0.97      0.97       150
weighted avg       0.97      0.97      0.97       150

3.用sklearn进行逻辑回归

from sklearn.linear_model import LogisticRegressionmodel=LogisticRegression()
model.fit(data,target)
prediction=model.predict(data)
print(model.intercept_)
print(model.coef_)

[  9.84186228   2.21913963 -12.06100191]
[[-0.41943756  0.96749376 -2.5205723  -1.084326  ][ 0.53147635 -0.3150198  -0.20094963 -0.94785159][-0.11203879 -0.65247397  2.72152193  2.03217759]]

print(classification_report(target,prediction))#准确率0.97

              precision    recall  f1-score   support0       1.00      1.00      1.00        501       0.98      0.94      0.96        502       0.94      0.98      0.96        50accuracy                           0.97       150macro avg       0.97      0.97      0.97       150
weighted avg       0.97      0.97      0.97       150

逻辑回归实现鸢尾花分类相关推荐

基于逻辑回归的鸢尾花分类预测
基于逻辑回归的分类预测 1 逻辑回归的介绍和应用 1.1 逻辑回归的介绍 1.2 逻辑回归的应用 2 学习目标 3 代码流程 4 算法实战 4.1 Demo实践 Step1:库函数导入 Step2:模 ...
基于逻辑回归的鸢尾花分类
二分类实现辨别是否是鸢尾花尽管名为逻辑回归,但实际上是一个分类模型,尤其是在我们只有两个类时.逻辑回归的名称来源于将输入的任意实值x转换成值在0到1 采用sigmoid,划为[0,1]之间的数据 , ...
课堂笔记：逻辑回归和鸢尾花数据集
一.机器学习机器学习过程: 二.逻辑回归 1.作用:(1)估计某事物的可能性 :(2)适用于流行病学资料的危险因素分析. 2.介绍:简单来说它是线性回归的一种,事实上它是一个被logistic方程归 ...
用逻辑回归实现鸢尾花数据集分类（1）
鸢尾花数据集的分类问题指导 -- 对数几率回归(逻辑回归)问题研究 (1) 这一篇Notebook是应用对数几率回归(Logit Regression)对鸢尾花数据集进行品种分类的.首先会带大家探索一 ...
鸢尾花分类python_二元逻辑回归实现鸢尾花数据分类（python）
说明: 本文利用python实现二元逻辑回归,没有加正则项.挑选iris数据前100个样本作为训练集,他们分属于两个类别,样本特征选择第1列(花萼长度x1)和第2列(花萼宽度x2). 程序以函数形式实 ...
【机器学习基础】(三)：理解逻辑回归及二分类、多分类代码实践
本文是机器学习系列的第三篇,算上前置机器学习系列是第八篇.本文的概念相对简单,主要侧重于代码实践. 上一篇文章说到,我们可以用线性回归做预测,但显然现实生活中不止有预测的问题还有分类的问题.我们可以从 ...
机器学习：理解逻辑回归及二分类、多分类代码实践
作者 | caiyongji 责编 | 张红月来源 | 转载自 caiyongji(ID:cai-yong-ji) 本文的概念相对简单,主要侧重于代码实践.现实生活中不止有预测的问题还有分类的问 ...
机器学习(三)：理解逻辑回归及二分类、多分类代码实践
本文是机器学习系列的第三篇,算上前置机器学习系列是第八篇.本文的概念相对简单,主要侧重于代码实践. 上一篇文章说到,我们可以用线性回归做预测,但显然现实生活中不止有预测的问题还有分类的问题.我们可以从 ...
Keras【Deep Learning With Python】逻辑回归·softmax多分类与交叉熵
文章目录 1 逻辑回归 1.2 Sigmod函数 1.2 逻辑回归损失函数 2 交叉熵 3 softmax分类 1 逻辑回归回答0或1 1.2 Sigmod函数 0.9是好人 0.1是坏人二分类 ...
stanford coursera 机器学习编程作业 exercise 3（逻辑回归实现多分类问题）
本作业使用逻辑回归(logistic regression)和神经网络(neural networks)识别手写的阿拉伯数字(0-9) 关于逻辑回归的一个编程练习,可参考:http://www.cnb ...