极限森林

from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
#决策树,进行裂分时候,根据信息增益最大进行列分
#极限森林 1、样本随机 2、分裂条件随机(不是最好的裂分条件)
#像在随机森林中一样,使用候选特征的随机子集,但不是寻找最有区别的阈值
#而是为每个候选特征随机绘制阈值
#并选择这些随机生成的阈值中的最佳阈值作为划分规则X,y = datasets.load_wine(True)
clf = DecisionTreeClassifier()
cross_val_score(clf,X,y,cv=6,scoring="accuracy").mean()forest = RandomForestClassifier(n_estimators=100)
cross_val_score(forest,X,y,cv=6,scoring="accuracy").mean()extra = ExtraTreesClassifier(n_estimators=100)
cross_val_score(extra,X,y,cv=6,scoring="accuracy").mean()

结果:

0.86532567049808420.97777777777777790.9833333333333334

梯度提升树的使用

import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
X,y = datasets.load_iris(True)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
gbdt = GradientBoostingClassifier()
gbdt.fit(X_train,y_train)
gbdt.score(X_test,y_test)

结果:

0.9666666666666667
import numpy as np
import matplotlib.pyplot as plt#回归时分类的极限思想
#分类的类别多到一定程度,那么就是回归
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn import tree# X数据:上网时间和购物金额
# y目标:14(高一),16(高三),24(大学毕业),26(工作两年)
X = np.array([[800,3],[1200,1],[1800,4],[2500,2]])
y = np.array([14,16,24,26])
gbdt = GradientBoostingRegressor(n_estimators=10)
gbdt.fit(X,y)gbdt.predict(X)

结果:

array([16.09207064, 17.39471376, 22.60528624, 23.90792936])
plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[0,0],filled=True,feature_names=["消费","上网"])

friedman_mse = ((y[:2]-y[:2].mean())**2).mean() =1

value是14,16,24,26和20的差,即残差,残差越小——>越好——>越准确

plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[1,0],filled=True,feature_names=["消费","上网"])
#learning_rate = 0.1
gbdt1 = np.array([-6,-4,6,4])
#梯度提升 学习率0.1
gbdt1 - gbdt1*0.1

结果:

array([-5.4, -3.6,  5.4,  3.6])

#learning_rate = 0.1
gbdt2 = np.array([-5.4,-3.6,5.4,3.6])
#梯度提升 学习率0.1
gbdt2 - gbdt2*0.1

结果:

array([-4.86, -3.24,  4.86,  3.24])
plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[2,0],filled=True,feature_names=["消费","上网"])

最后一棵树

plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[-1,0],filled=True,feature_names=["消费","上网"])

#learning_rate = 0.1
gbdt3 = np.array([-2.325,-1.55,2.325,1.55])
#梯度提升 学习率0.1
gbdt3 - gbdt3*0.1

结果:

array([-2.0925,-1.395,2.0925,1.395])
array([-2.0925,-1.395,1.395,2.0925])

14,16,24,26下减上

16.0925,17.395,22.605,23.9075

gbdt.predict(X)

结果:

array([16.09207064, 17.39471376, 22.60528624, 23.90792936])

梯度上升梯度下降

下降——减法求最小值;上升——加法求最大值

import numpy as np
import matplotlib.pyplot as plt
f = lambda x:(x-3)**2 + 2.5*x -7.5
f#导数 = 梯度
2(x-3)+2.5 = 0
x = 1.75x = np.linspace(-2,5,100)
y = f(x)
plt.plot(x,y)

import numpy as np
import matplotlib.pyplot as plt
f = lambda x:(x-3)**2 + 2.5*x -7.5
f#导数 = 梯度x = np.linspace(-2,5,100)
y = f(x)
plt.plot(x,y)learning_rate = 0.1#导数函数
d = lambda x:2*(x-3) + 2.5
min_value = np.random.randint(-3,5,size=1)[0]print("---------------",min_value)
#记录数据更新了,原来的值,上一步的值,退出条件
min_value_last = min_value +0.1
tol = 0.0001count = 0
while True:if np.abs(min_value-min_value_last)<tol:break
#梯度下降min_value_last = min_value
#更新值:梯度下降min_value = min_value - learning_rate*d(min_value)print("+++++++++++++++++%d"%(count),min_value)count = count + 1
print("****************",min_value)

结果:

----------------- 4
+++++++++++++++++0 3.55
+++++++++++++++++1 3.19
+++++++++++++++++2 2.902
+++++++++++++++++3 2.6716
+++++++++++++++++4 2.48728
+++++++++++++++++5 2.339824
+++++++++++++++++6 2.2218592
+++++++++++++++++7 2.12748736
+++++++++++++++++8 2.051989888
+++++++++++++++++9 1.9915919104
+++++++++++++++++10 1.94327352832
+++++++++++++++++11 1.904618822656
+++++++++++++++++12 1.8736950581248
+++++++++++++++++13 1.84895604649984
+++++++++++++++++14 1.829164837199872
+++++++++++++++++15 1.8133318697598977
+++++++++++++++++16 1.8006654958079182
+++++++++++++++++17 1.7905323966463347
+++++++++++++++++18 1.7824259173170678
+++++++++++++++++19 1.7759407338536541
+++++++++++++++++20 1.7707525870829233
+++++++++++++++++21 1.7666020696663387
+++++++++++++++++22 1.763281655733071
+++++++++++++++++23 1.760625324586457
+++++++++++++++++24 1.7585002596691655
+++++++++++++++++25 1.7568002077353324
+++++++++++++++++26 1.755440166188266
+++++++++++++++++27 1.7543521329506127
+++++++++++++++++28 1.7534817063604902
+++++++++++++++++29 1.7527853650883922
+++++++++++++++++30 1.7522282920707137
+++++++++++++++++31 1.751782633656571
+++++++++++++++++32 1.7514261069252568
+++++++++++++++++33 1.7511408855402055
+++++++++++++++++34 1.7509127084321645
+++++++++++++++++35 1.7507301667457316
+++++++++++++++++36 1.7505841333965853
+++++++++++++++++37 1.7504673067172682
+++++++++++++++++38 1.7503738453738147
***************** 1.7503738453738147
import numpy as np
import matplotlib.pyplot as pltf2 = lambda x : -(x - 3)**2 + 2.5*x -7.5# 梯度提升 导数函数
result = []
d2 = lambda x : -2*(x - 3) + 2.5
learning_rate = 0.1
# max_value瞎蒙的值,方法,最快的速度找到最优解(梯度下降)
# 梯度消失,梯度爆炸(因为学习率太大)
max_value = np.random.randint(2,8,size = 1)[0]
# max_value = 1000result.append(max_value)print('-------------------',max_value)
# 记录数据更新了,原来的值,上一步的值,退出条件
max_value_last = max_value + 0.001
# tollerence容忍度,误差,在万分之一,任务结束
# precision精确度,精度达到了万分之一,任务结束
precision = 0.0001
count = 0
while True:if np.abs(max_value - max_value_last) < precision:break
#     梯度上升max_value_last = max_value
#     更新值:梯度上升max_value = max_value + learning_rate*d2(max_value)result.append(max_value)count +=1print('+++++++++++++++++++++%d'%(count),max_value)
print('**********************',max_value)# 观察一下变化
plt.figure(figsize=(12,9))
x = np.linspace(4,8,100)
y = f2(x)
plt.plot(x,y)
result = np.asarray(result)
plt.plot(result,f2(result),'*')

结果:

------------------- 5
+++++++++++++++++++++1 4.85
+++++++++++++++++++++2 4.7299999999999995
+++++++++++++++++++++3 4.6339999999999995
+++++++++++++++++++++4 4.5572
+++++++++++++++++++++5 4.49576
+++++++++++++++++++++6 4.4466079999999994
+++++++++++++++++++++7 4.407286399999999
+++++++++++++++++++++8 4.37582912
+++++++++++++++++++++9 4.350663296
+++++++++++++++++++++10 4.3305306368
+++++++++++++++++++++11 4.31442450944
+++++++++++++++++++++12 4.301539607552
+++++++++++++++++++++13 4.2912316860416
+++++++++++++++++++++14 4.2829853488332805
+++++++++++++++++++++15 4.276388279066625
+++++++++++++++++++++16 4.2711106232533
+++++++++++++++++++++17 4.26688849860264
+++++++++++++++++++++18 4.263510798882112
+++++++++++++++++++++19 4.260808639105689
+++++++++++++++++++++20 4.2586469112845515
+++++++++++++++++++++21 4.256917529027641
+++++++++++++++++++++22 4.255534023222113
+++++++++++++++++++++23 4.254427218577691
+++++++++++++++++++++24 4.2535417748621525
+++++++++++++++++++++25 4.252833419889722
+++++++++++++++++++++26 4.252266735911777
+++++++++++++++++++++27 4.251813388729422
+++++++++++++++++++++28 4.251450710983538
+++++++++++++++++++++29 4.251160568786831
+++++++++++++++++++++30 4.250928455029465
+++++++++++++++++++++31 4.250742764023572
+++++++++++++++++++++32 4.250594211218858
+++++++++++++++++++++33 4.250475368975087
+++++++++++++++++++++34 4.2503802951800695
********************** 4.2503802951800695

机器学习Sklearn实战——极限森林、梯度提升树算法相关推荐

  1. 机器学习sklearn实战-----随机森林调参乳腺癌分类预测

    机器学习sklearn随机森林乳腺癌分类预测 机器学习中调参的基本思想: 1)非常正确的调参思路和方法 2)对模型评估指标有深入理解 3)对数据的感觉和经验 文章目录 机器学习sklearn随机森林乳 ...

  2. gtb分类器参数调节_Ensemble Learning——随机森林\极限森林\梯度提升树\GBDT

    文章目录 Bagging (套袋法) Boosting(提升法) Bagging/Boosting的主要区别 1.随机森林 算法 API 2.极限森林 算法 API 3.Adaboost 算法 基本流 ...

  3. 菜菜的机器学习sklearn实战-----sklearn入门与决策树

    菜菜的机器学习sklearn实战-----sklearn入门与决策树 菜菜的机器学习sklearn实战-----sklearn入门与决策树 sklearn入门 决策树 概述 决策树是如何工作的 skl ...

  4. svd降维 python案例_菜菜的机器学习sklearn实战-----sklearn中的降维算法PCA和SVD

    菜菜的机器学习sklearn实战-----sklearn中的降维算法PCA和SVD 概述 从什么叫维度说开来 简单讲,shape中返回了几个数字就是几维. 一张表最多就是一维 当一个数组中存在2张3行 ...

  5. 机器学习Sklearn实战——梯度提升树二分类原理

    一.算法使用 (一)创建 (二)参数调整 cross_val_score:求单一参数最合适的值(KNN) GridSearchCV网格搜索:多参数组合最优的值 标准:准确率,精确率,召回率,F1 (三 ...

  6. HuaPu在学:机器学习——sklearn【随机森林】

    随机森林 文章目录 随机森林 一.集成算法 二.RandomForestClassifier [控制基评估器的参数] [n_estimators] [random_state] [bootstrap ...

  7. 梯度提升树算法原理小结

    点击上方"小白学视觉",选择加"星标"或"置顶" 重磅干货,第一时间送达 本文转自|机器学习算法那些事 前言 本文介绍了boosting族的 ...

  8. 【机器学习】集成学习之梯度提升树GBDT

    Boosting方法的核心思想是对错分类的样本给予更高关注度,也就是样本权重,在提升树中与之对应的就是残差,在梯度提升树中就是梯度了. Regression Decision Tree:回归树 回归树 ...

  9. 机器学习Sklearn实战——adaboost

    pandas批量处理体测成绩 import numpy as np import pandas as pd from pandas import Series,DataFrame import mat ...

最新文章

  1. FT报源检测到目标无法恢复解决过程
  2. jvm垃圾内存回收问题
  3. 解决ERROR: cannot download default sources list from:https://raw.githubusercontent.com/ros/rosdistro/m
  4. python获取文件夹下文件_Python 获取目录下的文件列表与内容
  5. 美团DSP广告策略实践
  6. uva 11374(Dijkstra) HappyNewYear!!!
  7. nocount on_在SQL Server中设置NOCOUNT ON语句的用法和性能优势
  8. KICKSTART无人值守安装系统
  9. Debian更新软件源提示There is no public key available for the following key IDs的解决方法
  10. 易筋经:现代化支付系统脉络梳理
  11. 淘宝镜像(浏览器驱动等等等)
  12. 简单的C语言顺序结构例题介绍
  13. 【opencv】支付宝AR实景红包领取方法
  14. linux双显卡配置_linux双显卡解决方案
  15. quill富文本编辑器——修改默认图片、视频上传功能
  16. windows下补丁手动下载和安装
  17. CentOS 7安装指南
  18. mysql建表语句增加注释_mysql建表语句加注释
  19. 第一章 MEMS惯性器件-加速度计误差分析
  20. FFmpeg解封装、解码音频和视频(分别使用OpenGL和OpenAL播放)

热门文章

  1. 资源共享冲突问题概述
  2. adnroidstudio debug手机就自动退出程序_苹果官方表示 iPhone关闭后台程序或将缩短电池寿命...
  3. MySQL是自主可控的吗_国产处理器那么多,究竟有哪些,是真正的“自主可控”?...
  4. 解决Cannot convert a symbolic Tensor (lstm/strided_slice:0) to a numpy array.
  5. php的wsgi框架结构,理解 WSGI 框架
  6. python 笔记 pickle json
  7. NTU课程笔记 MAS714(8) 分治与排序
  8. python库整理: Collections.Counter
  9. 【毕业求职季】-听说你想去大厂看学妹,带你看看阿里后端实习面经长啥样?
  10. 【毕业求职季】-听说你想去大厂看学妹,带你看看字节跳动抖音电商后端面试长啥样?