《机器学习实战》代码记录--决策树

数据格式：

	no surfacing	flippers	labels
1	1	1	yes
2	1	1	yes
3	1	0	no
4	0	1	no
5	0	1	no

输出样例：

打印决策树并对[1,1]分类

代码：

decision_tree.py

# -*- coding:utf-8 -*-
from math import log
import operator
import sys
#按照给定特征划分数据集
def splitDataSet(dataSet,axis,value):retDataSet=[]for featVec in dataSet:if featVec[axis]==value:reducedFeatVec=featVec[:axis]reducedFeatVec.extend(featVec[axis+1:])retDataSet.append(reducedFeatVec)return retDataSet#计算数据集的熵
def calcShannonEnt(dataSet):numEntries=len(dataSet)labelCounts={}for featVec in dataSet:currentLabel=featVec[-1]if currentLabel not in labelCounts.keys():labelCounts[currentLabel]=0labelCounts[currentLabel]+=1shannonEnt=0.0for key in labelCounts:prob=float(labelCounts[key])/numEntriesshannonEnt-=prob*log(prob,2)return shannonEnt#选择最好的数据集划分方式（最大信息增益）
def chooseBestFeatureToSplit(dataSet):numFeatures=len(dataSet[0])-1baseEntropy=calcShannonEnt(dataSet)bestInfoGain=0.0;bestFeature=-1for i in range(numFeatures):featList=[example[i] for example in dataSet]uniqueVals=set(featList)newEntropy=0.0for value in uniqueVals:subDataSet=splitDataSet(dataSet,i,value)prob=len(subDataSet)/float(len(dataSet))newEntropy+=prob*calcShannonEnt(subDataSet)infoGain=baseEntropy-newEntropy
#               print i,infoGainif(infoGain>bestInfoGain):bestInfoGain=infoGainbestFeature=ireturn bestFeature#投票表决部分
def majorityCnt(classList):classCount={}for vote in classList:if vote not in classCount.keys():classCount[vote]=0classCount[vote]+=1sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)return sortedClassCount[0][0]#创建决策树
def createTree(dataSet,labels):classList=[example[-1] for example in dataSet]if classList.count(classList[0])==len(classList):#类别完全相同则停止划分return classList[0]if len(dataSet[0])==1:#遍历完所有特征时返回出现次数最多的       return majoryCnt(classList)bestFeat=chooseBestFeatureToSplit(dataSet)bestFeatLabel=labels[bestFeat]myTree={bestFeatLabel:{}}del(labels[bestFeat])featValues=[example[bestFeat] for example in dataSet]uniqueVals=set(featValues)for value in uniqueVals:subLabels=labels[:]myTree[bestFeatLabel][value]=createTree(splitDataSet(dataSet,bestFeat,value),subLabels)return myTreedef classify(inputTree,featLabels,testVec):firstStr=inputTree.keys()[0]secondDict=inputTree[firstStr]featIndex=featLabels.index(firstStr)for key in secondDict.keys():if testVec[featIndex]==key:if type(secondDict[key]).__name__=='dict':classLabel=classify(secondDict[key],featLabels,testVec)else:classLabel=secondDict[key]return classLabeldef storeTree(inputTree,filename):import picklefw=open(filename,'w')pickle.dump(inputTree,fw)fw.close()def grabTree(filename):import picklefr=open(filename)return pickle.load(fr)if __name__=='__main__':dataset = [[1, 1, 'yes'],[1, 1, 'yes'],[1, 0, 'no'],[0, 1, 'no'],[0, 1, 'no']]labels = ['no surfacing','flippers']tree=createTree(dataset,labels)print treestoreTree(tree,'firstTry.txt')tree2=grabTree('firstTry.txt')print classify(tree2,['no surfacing','flippers'],eval(sys.argv[1]))

treePLotter.py

#-*-coding:utf-8 -*-
import matplotlib.pyplot as plt
import matplotlibimport decision_treedecisionNode=dict(boxstyle="sawtooth",fc="0.8")
leafNode=dict(boxstyle="round4",fc="0.8")
arrow_args=dict(arrowstyle="<-")def plotNode(nodeTxt,centerPt,parentPt,nodeType):zwfont=matplotlib.font_manager.FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc')createPlot.ax1.annotate(nodeTxt,xy=parentPt,xycoords='axes fraction',xytext=centerPt,textcoords='axes fraction',va="center",ha="center",bbox=nodeType,arrowprops=arrow_args,fontproperties=zwfont)def createPlot(inTree):fig=plt.figure(1,facecolor='white')fig.clf()axprops=dict(xticks=[],yticks=[])createPlot.ax1=plt.subplot(111,frameon=False,**axprops)plotTree.totalW=float(getNumLeafs(inTree))plotTree.totalD=float(getTreeDepth(inTree))plotTree.xOff=-0.5/plotTree.totalW;plotTree.yOff=1.0;plotTree(inTree,(0.5,1.0),'')plt.show()def getNumLeafs(myTree):numLeafs=0firstStr=myTree.keys()[0]print 'firstStr',firstStrsecondDict=myTree[firstStr]print 'secondDict',secondDictfor key in secondDict.keys():if type(secondDict[key]).__name__=='dict':numLeafs+=getNumLeafs(secondDict[key])else:numLeafs+=1return numLeafsdef getTreeDepth(myTree):maxDepth=0firstStr=myTree.keys()[0]secondDict=myTree[firstStr]for key in secondDict.keys():if type(secondDict[key]).__name__=='dict':thisDepth=1+getTreeDepth(secondDict[key])else:thisDepth=1if thisDepth>maxDepth:maxDepth=thisDepthreturn maxDepthdef plotMidTex(cntrPt,parentPt,txtString):xMid=(parentPt[0]-cntrPt[0])/2.0+cntrPt[0]yMid=(parentPt[1]-cntrPt[1])/2.0+cntrPt[1]createPlot.ax1.text(xMid,yMid,txtString)def plotTree(myTree,parentPt,nodeTxt):numLeafs=getNumLeafs(myTree)depth=getTreeDepth(myTree)firstStr=myTree.keys()[0]cntrPt=(plotTree.xOff+(1.0+float(numLeafs))/2.0/plotTree.totalW,plotTree.yOff)plotMidTex(cntrPt,parentPt,nodeTxt)plotNode(firstStr,cntrPt,parentPt,decisionNode)secondDict=myTree[firstStr]plotTree.yOff=plotTree.yOff-1.0/plotTree.totalDfor key in secondDict.keys():if type(secondDict[key]).__name__=='dict':plotTree(secondDict[key],cntrPt,str(key))else:plotTree.xOff=plotTree.xOff+1.0/plotTree.totalWplotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),cntrPt,leafNode)plotMidTex((plotTree.xOff,plotTree.yOff),cntrPt,str(key))plotTree.yOff=plotTree.yOff+1.0/plotTree.totalDif  __name__=='__main__':
#       createPlot()dataset = [[1, 1, 'yes'],[1, 1, 'yes'],[1, 0, 'no'],[0, 1, 'no'],[0, 1, 'no']]labels = ['no surfacing','flippers']tree=decision_tree.createTree(dataset,labels)createPlot(tree)

转载于:https://my.oschina.net/daimeng/blog/374360

《机器学习实战》代码记录--决策树相关推荐

机器学习实战代码注释svm_使用经典机器学习模型动手进行毒性分类并最大程度地减少注释的意外偏见...
机器学习实战代码注释svm In this blog, I will try to explain a Toxicity polarity problem solution implementatio ...
机器学习实战3.4决策树项目案例03：使用Sklearn预测隐形眼镜类型
搜索微信公众号:'AI-ming3526'或者'计算机视觉这件小事' 获取更多人工智能.机器学习干货 csdn:https://blog.csdn.net/baidu_31657889/ github ...
机器学习实战学习记录（4-5章）
参考:机器学习实战Peter Harrington (11条消息) 机器学习实战教程(13篇)_chenyanlong_v的博客-CSDN博客_机器学习实战四.朴素贝叶斯:(1)选择具有最高概率的决 ...
机器学习实战2（决策树篇）
目录 1.决策树 2.决策树的构造 3.决策树的可视化 4.测试和存储决策树 1.决策树你是否玩过二十个问题的游戏,游戏的规则很简单:参与游戏的一方在脑海里想某个事物,其他参与者向他提问题,只允许提 ...
机器学习实战笔记：决策树（Decision Tree）
PS 该部分内容所设计到的程序源码已经存在我的github上,地址奉上: https://github.com/AdventureSJ/ML-Notes/tree/master/DecisionTre ...
机器学习实战6-sklearn训练决策树实现分类和回归
简介: 与SVM一样,决策树也是一种多功能的机器学习算法,它可以实现分类和回归任务,甚至是多输出任务.它们功能强大,能够拟合复杂的数据集.决策树同时也是随机森林(参见第7章)的基本组成部分,后者是现今 ...
机器学习实战ch03: 使用决策树预测隐形眼镜类型
决策树的一般流程 1.收集数据 2.准备数据:树构造算法只适用标称型数据,因此数据值型数据必须离散化 3.分析数据 4.训练算法 5.测试数据 6.使用算法决策树的优点 1.数据形式非常容易理解 2 ...
机器学习实战3.3决策树项目案例02：预测隐形眼镜类型
搜索微信公众号:'AI-ming3526'或者'计算机视觉这件小事' 获取更多人工智能.机器学习干货 csdn:https://blog.csdn.net/baidu_31657889/ github ...
机器学习实战——3.1 决策树的构造
目录 1. 信息增益 2. 划分数据集 2.1 按照给定特征划分数据集 2.2 选择最好的数据集划分方式 3. 递归构建决策树 3.1 多数表决的方法 3.2 创建树 1. 信息增益在划分数据集之前 ...

《机器学习实战》代码记录--决策树

《机器学习实战》代码记录--决策树相关推荐

最新文章

热门文章