注:数据集在文章末尾

(1)决策树–线性二分类

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import tree# 载入数据
data = np.genfromtxt("LR-testSet.csv", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]plt.scatter(x_data[:,0],x_data[:,1],c=y_data)
plt.show()

输出:

# 创建决策树模型
model = tree.DecisionTreeClassifier()
# 输入数据建立模型
model.fit(x_data, y_data)# 导出决策树
import graphviz # http://www.graphviz.org/dot_data = tree.export_graphviz(model, out_file = None, feature_names = ['x','y'],class_names = ['label0','label1'],filled = True,rounded = True,special_characters = True)
graph = graphviz.Source(dot_data)# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel与flatten类似,多维数据转一维。flatten不会改变原始数据,ravel会改变原始数据
z = z.reshape(xx.shape)
# 等高线图
cs = plt.contourf(xx, yy, z)
# 样本散点图
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

输出:

# 预测
predictions = model.predict(x_data)
print(classification_report(predictions,y_data))

输出:

(2)决策树–非线性二分类

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import tree
from sklearn.model_selection import train_test_split# 载入数据
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]plt.scatter(x_data[:,0],x_data[:,1],c=y_data)
plt.show()

输出:

#分割数据
x_train,x_test,y_train,y_test = train_test_split(x_data, y_data) # 创建决策树模型
# max_depth,树的深度
# min_samples_split 内部节点再划分所需最小样本数
model = tree.DecisionTreeClassifier(max_depth=7,min_samples_split=4)
# 输入数据建立模型
model.fit(x_train, y_train)# 导出决策树
import graphviz # http://www.graphviz.org/dot_data = tree.export_graphviz(model, out_file = None, feature_names = ['x','y'],class_names = ['label0','label1'],filled = True,rounded = True,special_characters = True)
graph = graphviz.Source(dot_data)# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel与flatten类似,多维数据转一维。flatten不会改变原始数据,ravel会改变原始数据
z = z.reshape(xx.shape)
# 等高线图
cs = plt.contourf(xx, yy, z)
# 样本散点图
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

# 训练集预测
predictions = model.predict(x_train)
print(classification_report(predictions,y_train))

# 测试集预测
predictions = model.predict(x_test)
print(classification_report(predictions,y_test))

数据集:“LR-testSet.csv”:

-0.017612,14.053064,0
-1.395634,4.662541,1
-0.752157,6.53862,0
-1.322371,7.152853,0
0.423363,11.054677,0
0.406704,7.067335,1
0.667394,12.741452,0
-2.46015,6.866805,1
0.569411,9.548755,0
-0.026632,10.427743,0
0.850433,6.920334,1
1.347183,13.1755,0
1.176813,3.16702,1
-1.781871,9.097953,0
-0.566606,5.749003,1
0.931635,1.589505,1
-0.024205,6.151823,1
-0.036453,2.690988,1
-0.196949,0.444165,1
1.014459,5.754399,1
1.985298,3.230619,1
-1.693453,-0.55754,1
-0.576525,11.778922,0
-0.346811,-1.67873,1
-2.124484,2.672471,1
1.217916,9.597015,0
-0.733928,9.098687,0
-3.642001,-1.618087,1
0.315985,3.523953,1
1.416614,9.619232,0
-0.386323,3.989286,1
0.556921,8.294984,1
1.224863,11.58736,0
-1.347803,-2.406051,1
1.196604,4.951851,1
0.275221,9.543647,0
0.470575,9.332488,0
-1.889567,9.542662,0
-1.527893,12.150579,0
-1.185247,11.309318,0
-0.445678,3.297303,1
1.042222,6.105155,1
-0.618787,10.320986,0
1.152083,0.548467,1
0.828534,2.676045,1
-1.237728,10.549033,0
-0.683565,-2.166125,1
0.229456,5.921938,1
-0.959885,11.555336,0
0.492911,10.993324,0
0.184992,8.721488,0
-0.355715,10.325976,0
-0.397822,8.058397,0
0.824839,13.730343,0
1.507278,5.027866,1
0.099671,6.835839,1
-0.344008,10.717485,0
1.785928,7.718645,1
-0.918801,11.560217,0
-0.364009,4.7473,1
-0.841722,4.119083,1
0.490426,1.960539,1
-0.007194,9.075792,0
0.356107,12.447863,0
0.342578,12.281162,0
-0.810823,-1.466018,1
2.530777,6.476801,1
1.296683,11.607559,0
0.475487,12.040035,0
-0.783277,11.009725,0
0.074798,11.02365,0
-1.337472,0.468339,1
-0.102781,13.763651,0
-0.147324,2.874846,1
0.518389,9.887035,0
1.015399,7.571882,0
-1.658086,-0.027255,1
1.319944,2.171228,1
2.056216,5.019981,1
-0.851633,4.375691,1
-1.510047,6.061992,0
-1.076637,-3.181888,1
1.821096,10.28399,0
3.01015,8.401766,1
-1.099458,1.688274,1
-0.834872,-1.733869,1
-0.846637,3.849075,1
1.400102,12.628781,0
1.752842,5.468166,1
0.078557,0.059736,1
0.089392,-0.7153,1
1.825662,12.693808,0
0.197445,9.744638,0
0.126117,0.922311,1
-0.679797,1.22053,1
0.677983,2.556666,1
0.761349,10.693862,0
-2.168791,0.143632,1
1.38861,9.341997,0
0.317029,14.739025,0

数据集:“LR-testSet.2csv”:

0.051267,0.69956,1
-0.092742,0.68494,1
-0.21371,0.69225,1
-0.375,0.50219,1
-0.51325,0.46564,1
-0.52477,0.2098,1
-0.39804,0.034357,1
-0.30588,-0.19225,1
0.016705,-0.40424,1
0.13191,-0.51389,1
0.38537,-0.56506,1
0.52938,-0.5212,1
0.63882,-0.24342,1
0.73675,-0.18494,1
0.54666,0.48757,1
0.322,0.5826,1
0.16647,0.53874,1
-0.046659,0.81652,1
-0.17339,0.69956,1
-0.47869,0.63377,1
-0.60541,0.59722,1
-0.62846,0.33406,1
-0.59389,0.005117,1
-0.42108,-0.27266,1
-0.11578,-0.39693,1
0.20104,-0.60161,1
0.46601,-0.53582,1
0.67339,-0.53582,1
-0.13882,0.54605,1
-0.29435,0.77997,1
-0.26555,0.96272,1
-0.16187,0.8019,1
-0.17339,0.64839,1
-0.28283,0.47295,1
-0.36348,0.31213,1
-0.30012,0.027047,1
-0.23675,-0.21418,1
-0.06394,-0.18494,1
0.062788,-0.16301,1
0.22984,-0.41155,1
0.2932,-0.2288,1
0.48329,-0.18494,1
0.64459,-0.14108,1
0.46025,0.012427,1
0.6273,0.15863,1
0.57546,0.26827,1
0.72523,0.44371,1
0.22408,0.52412,1
0.44297,0.67032,1
0.322,0.69225,1
0.13767,0.57529,1
-0.0063364,0.39985,1
-0.092742,0.55336,1
-0.20795,0.35599,1
-0.20795,0.17325,1
-0.43836,0.21711,1
-0.21947,-0.016813,1
-0.13882,-0.27266,1
0.18376,0.93348,0
0.22408,0.77997,0
0.29896,0.61915,0
0.50634,0.75804,0
0.61578,0.7288,0
0.60426,0.59722,0
0.76555,0.50219,0
0.92684,0.3633,0
0.82316,0.27558,0
0.96141,0.085526,0
0.93836,0.012427,0
0.86348,-0.082602,0
0.89804,-0.20687,0
0.85196,-0.36769,0
0.82892,-0.5212,0
0.79435,-0.55775,0
0.59274,-0.7405,0
0.51786,-0.5943,0
0.46601,-0.41886,0
0.35081,-0.57968,0
0.28744,-0.76974,0
0.085829,-0.75512,0
0.14919,-0.57968,0
-0.13306,-0.4481,0
-0.40956,-0.41155,0
-0.39228,-0.25804,0
-0.74366,-0.25804,0
-0.69758,0.041667,0
-0.75518,0.2902,0
-0.69758,0.68494,0
-0.4038,0.70687,0
-0.38076,0.91886,0
-0.50749,0.90424,0
-0.54781,0.70687,0
0.10311,0.77997,0
0.057028,0.91886,0
-0.10426,0.99196,0
-0.081221,1.1089,0
0.28744,1.087,0
0.39689,0.82383,0
0.63882,0.88962,0
0.82316,0.66301,0
0.67339,0.64108,0
1.0709,0.10015,0
-0.046659,-0.57968,0
-0.23675,-0.63816,0
-0.15035,-0.36769,0
-0.49021,-0.3019,0
-0.46717,-0.13377,0
-0.28859,-0.060673,0
-0.61118,-0.067982,0
-0.66302,-0.21418,0
-0.59965,-0.41886,0
-0.72638,-0.082602,0
-0.83007,0.31213,0
-0.72062,0.53874,0
-0.59389,0.49488,0
-0.48445,0.99927,0
-0.0063364,0.99927,0
0.63265,-0.030612,0

【机器学习】监督学习--(分类)决策树②相关推荐

  1. 机器学习——监督学习之决策树分类模型

    概念 a.一种树形结构的分类器. b.通过顺序询问分类点的属性决定分类点的最终类别. c.决策树的构建通常根据特征的信息增益或其他指标. d.分类时,只需要按照决策树中的结点依次进行判断,即可得到样本 ...

  2. 机器学习之分类决策树与回归决策树—基于python实现

    大家好,我是带我去滑雪! 本期为大家介绍决策树算法,它一种基学习器,广泛应用于集成学习,用于大幅度提高模型的预测准确率.决策树在分区域时,会考虑特征向量对响应变量的影响,且每次仅使用一个分裂变量,这使 ...

  3. 【机器学习】分类决策树与回归决策树案例

    一.回顾 什么是决策树,信息熵 构建决策树的过程 ID3.C4.5和CRAT算法 上面三篇,主要介绍了相关的理论知识,其中构建决策树的过程可以很好地帮助我们理解决策树的分裂属性的选择. 本篇所有源代码 ...

  4. 机器学习之分类-决策树随机森林

    决策树 原理(信息论) 信息熵 信息:消除随机不定性的东西 信息熵公式,单位bit H ( X ) = − ∑ i = 1 n ( P ( x i ) log ⁡ b P ( x i ) ) H(X) ...

  5. 机器学习之分类决策树节点划分指标

    目录 信息熵 基尼Gini 指数 传送门 信息熵 基尼Gini 指数 其它情况以此类推 传送门 集成学习:XGBoost, lightGBM 通俗理解信息熵

  6. 监督学习--分类之决策树

    监督学习-分类-决策树 决策树使用树形分支结构分类事物 例: 小丽找对象,要求:高.帅.富 小明找对象,要求:美美美 if height >= 172:if hansom = '帅':if ri ...

  7. [Python从零到壹] 十四.机器学习之分类算法五万字总结全网首发(决策树、KNN、SVM、分类对比实验)

    欢迎大家来到"Python从零到壹",在这里我将分享约200篇Python系列文章,带大家一起去学习和玩耍,看看Python这个有趣的世界.所有文章都将结合案例.代码和作者的经验讲 ...

  8. 机器学习----监督学习算法之决策树(Decision Tree)

    感谢Jack-Cui大佬的知识分享 机器学习专栏点击这里 目录 感谢Jack-Cui大佬的知识分享 0. 概述 1. 使用决策树做预测需要以下过程: 2. 决策树构建步骤 2.1 特征选择 2.1.1 ...

  9. 【机器学习基础】CH2 - 监督学习(5)决策树

    2.5 决策树 到目前为止,我们已经研究了线性或线性基模型及其核变量. 在本节中,我们将考虑一种不同的分类或回归方法,在这种方法中,分类器是分段常数函数. 这类方法中最简单的是决策树,它将输入空间 分 ...

  10. [监督学习] 分类(决策树)

    决策树 决策树(decision tree) 是一种基本的分类与回归方法.本博客主要讨论用于分类的决策树.决策树模型呈树形结构,在分类问题中,表示基于特征对实例进行分类的过程.学习时,利用训练数据,根 ...

最新文章

  1. .NET手撸绘制TypeScript类图——下篇
  2. php auth和rbac区别,php中比rbac更好的权限认证的方式auth类认证
  3. 怎么查询共享使用人_企业微信微盘怎么共享使用?企业微信如何设置微盘权限?...
  4. [转载] 动态口令,动态密码生成(OTP)
  5. 无线路由不能上网问题的解决的方法
  6. mybatis 3.2.3 maven dependency pom.xml 配置
  7. iOS底层探索之Block(五)——Block源码分析(__block 底层都做了什么?)
  8. 中文版Latex常用语法大全教程
  9. java实习简历_怎么样写一份比较好的Java实习生的简历?
  10. 使用MediaRecorder录制音频和视频(Camera1)
  11. python requests ‘latin-1‘ codec can‘t encode characters in position 374-379: ordinal not in
  12. C语言笔记 隐藏光标函数 带注释(详细)
  13. 17-11-01模拟赛
  14. 《MySQL DBA修炼之道》——2.2 官方版本的安装
  15. labview文件写入与读取
  16. 【ANSYS APDL】如何将变量、矩阵等数据导出到TXT文件?
  17. 场景编程集锦 - 吉米的总统梦想
  18. WSL2 中 docker volume 的位置
  19. 结构体 struct 的深入理解
  20. prefix-list前缀列表

热门文章

  1. html 中avi视频插件,JDG让一追二击败V5!Kanavi降维打击,逆版本选英雄不按常理出牌...
  2. 我喜欢c语言用英文版,说说我喜欢英语的几个理由
  3. efcore 有值才加where_lol手游怎么加好友 日服英雄联盟手游邀请好友一起玩方法[多图]...
  4. popen() 函数 讲解
  5. python的csv标准库,Python标准库: csv模块——CSV文件的读写
  6. 【LeetCode】剑指 Offer 09. 用两个栈实现队列
  7. GIT 自动转换行符的案例
  8. Linux基础_Hadoop环境搭建必备
  9. Windows 下python的tab自动补全
  10. QT中使用全局变量在多个源程序中传递变量