% mtry  = number of predictors sampled for spliting at each node.

% votes (classification only) a matrix with one row for each input data point and one

%       column for each class, giving the fraction or number of ?votes? from the random

%       forest.

% oob_times number of times cases are 'out-of-bag' (and thus used in computing OOB error

%       estimate)

% proximity if proximity=TRUE when randomForest is called, a matrix of proximity

%       measures among the input (based on the frequency that pairs of data points are

%       in the same terminal nodes).

% errtr = first column is OOB Err rate, second is for class 1 and so on

function model =classRF_train(X,Y,ntree,mtry, extra_options)

DEFAULTS_ON =0;

%DEBUG_ON=0;

TRUE=1;

FALSE=0;

orig_labels = sort(unique(Y));

Y_new = Y;

new_labels = 1:length(orig_labels);

for i=1:length(orig_labels)

Y_new(find(Y==orig_labels(i)))=Inf;

Y_new(isinf(Y_new))=new_labels(i);

end

Y = Y_new;

if exist('extra_options','var')

if isfield(extra_options,'DEBUG_ON');  DEBUG_ON = extra_options.DEBUG_ON;    end

if isfield(extra_options,'replace');  replace = extra_options.replace;       end

if isfield(extra_options,'classwt');  classwt = extra_options.classwt;       end

if isfield(extra_options,'cutoff');  cutoff = extra_options.cutoff;       end

if isfield(extra_options,'strata');  strata = extra_options.strata;       end

if isfield(extra_options,'sampsize');  sampsize = extra_options.sampsize;       end

if isfield(extra_options,'nodesize');  nodesize = extra_options.nodesize;       end

if isfield(extra_options,'importance');  importance = extra_options.importance;       end

if isfield(extra_options,'localImp');  localImp = extra_options.localImp;       end

if isfield(extra_options,'nPerm');  nPerm = extra_options.nPerm;       end

if isfield(extra_options,'proximity');  proximity = extra_options.proximity;       end

if isfield(extra_options,'oob_prox');  oob_prox = extra_options.oob_prox;       end

%if isfield(extra_options,'norm_votes');  norm_votes = extra_options.norm_votes;       end

if isfield(extra_options,'do_trace');  do_trace = extra_options.do_trace;       end

%if isfield(extra_options,'corr_bias');  corr_bias = extra_options.corr_bias;       end

if isfield(extra_options,'keep_inbag');  keep_inbag = extra_options.keep_inbag;       end

end

keep_forest=1; %always save the trees :)

%set defaults if not already set

if ~exist('DEBUG_ON','var')     DEBUG_ON=FALSE; end

if ~exist('replace','var');     replace = TRUE; end

%if ~exist('classwt','var');     classwt = []; end %will handle these three later

%if ~exist('cutoff','var');      cutoff = 1; end

%if ~exist('strata','var');      strata = 1; end

if ~exist('sampsize','var');

if (replace)

sampsize = size(X,1);

else

sampsize = ceil(0.632*size(X,1));

end;

end

if ~exist('nodesize','var');    nodesize = 1; end %classification=1, regression=5

if ~exist('importance','var');  importance = FALSE; end

if ~exist('localImp','var');    localImp = FALSE; end

if ~exist('nPerm','var');       nPerm = 1; end

%if ~exist('proximity','var');   proximity = 1; end  %will handle these two later

%if ~exist('oob_prox','var');    oob_prox = 1; end

%if ~exist('norm_votes','var');    norm_votes = TRUE; end

if ~exist('do_trace','var');    do_trace = FALSE; end

%if ~exist('corr_bias','var');   corr_bias = FALSE; end

if ~exist('keep_inbag','var');  keep_inbag = FALSE; end

if ~exist('ntree','var') | ntree<=0

ntree=500;

DEFAULTS_ON=1;

end

if ~exist('mtry','var') | mtry<=0 | mtry>size(X,2)

mtry =floor(sqrt(size(X,2)));

end

addclass =isempty(Y);

if (~addclass && length(unique(Y))<2)

error('need atleast two classes for classification');

end

[N D] = size(X);

if N==0; error(' data (X) has 0 rows');end

if (mtry <1 || mtry > D)

DEFAULTS_ON=1;

end

mtry = max(1,min(D,round(mtry)));

if DEFAULTS_ON

fprintf('\tSetting to defaults %d trees and mtry=%d\n',ntree,mtry);

end

if ~isempty(Y)

if length(Y)~=N,

error('Y size is not the same as X size');

end

addclass = FALSE;

else

if ~addclass,

addclass=TRUE;

end

error('have to fill stuff here')

end

if ~isempty(find(isnan(X)));  error('NaNs in X');   end

if ~isempty(find(isnan(Y)));  error('NaNs in Y');   end

%now handle categories. Problem is that categories in R are more

%enhanced. In this i ask the user to specify the column/features to

%consider as categories, 1 if all the values are real values else

%specify the number of categories here

if exist ('extra_options','var') && isfield(extra_options,'categories')

ncat = extra_options.categories;

else

ncat = ones(1,D);

end

maxcat = max(ncat);

if maxcat>32

error('Can not handle categorical predictors with more than 32 categories');

end

%classRF - line 88 in randomForest.default.R

nclass = length(unique(Y));

if ~exist('cutoff','var')

cutoff = ones(1,nclass)* (1/nclass);

else

if sum(cutoff)>1 || sum(cutoff)<0 || length(find(cutoff<=0))>0 || length(cutoff)~=nclass

error('Incorrect cutoff specified');

end

end

if ~exist('classwt','var')

classwt = ones(1,nclass);

ipi=0;

else

if length(classwt)~=nclass

error('Length of classwt not equal to the number of classes')

end

if ~isempty(find(classwt<=0))

error('classwt must be positive');

end

ipi=1;

end

if ~exist('proximity','var')

proximity = addclass;

oob_prox = proximity;

end

if ~exist('oob_prox','var')

oob_prox = proximity;

end

%i handle the below in the mex file

%     if proximity

%         prox = zeros(N,N);

%         proxts = 1;

%     else

%         prox = 1;

%         proxts = 1;

%     end

%i handle the below in the mex file

if localImp

importance = TRUE;

%        impmat = zeors(D,N);

else

%        impmat = 1;

end

if importance

if (nPerm<1)

nPerm = int32(1);

else

nPerm = int32(nPerm);

end

%classRF

%        impout = zeros(D,nclass+2);

%        impSD  = zeros(D,nclass+1);

else

%        impout = zeros(D,1);

%        impSD =  1;

end

%i handle the below in the mex file

%somewhere near line 157 in randomForest.default.R

if addclass

%        nsample = 2*n;

else

%        nsample = n;

end

Stratify = (length(sampsize)>1);

if (~Stratify && sampsize>N)

error('Sampsize too large')

end

if Stratify

if ~exist('strata','var')

strata = Y;

end

nsum = sum(sampsize);

if ( ~isempty(find(sampsize<=0)) || nsum==0)

error('Bad sampsize specification');

end

else

nsum = sampsize;

end

%i handle the below in the mex file

%nrnodes = 2*floor(nsum/nodesize)+1;

%xtest = 1;

%ytest = 1;

%ntest = 1;

%labelts = FALSE;

%nt = ntree;

%[ldau,rdau,nodestatus,nrnodes,upper,avnode,mbest,ndtree]=

%keyboard

if Stratify

strata = int32(strata);

else

strata = int32(1);

end

Options = int32([addclass, importance, localImp, proximity, oob_prox, do_trace, keep_forest, replace, Stratify, keep_inbag]);

if DEBUG_ON

%print the parameters that i am sending in

fprintf('size(x) %d\n',size(X));

fprintf('size(y) %d\n',size(Y));

fprintf('nclass %d\n',nclass);

fprintf('size(ncat) %d\n',size(ncat));

fprintf('maxcat %d\n',maxcat);

fprintf('size(sampsize) %d\n',size(sampsize));

fprintf('sampsize[0] %d\n',sampsize(1));

fprintf('Stratify %d\n',Stratify);

fprintf('Proximity %d\n',proximity);

fprintf('oob_prox %d\n',oob_prox);

fprintf('strata %d\n',strata);

fprintf('ntree %d\n',ntree);

fprintf('mtry %d\n',mtry);

fprintf('ipi %d\n',ipi);

fprintf('classwt %f\n',classwt);

fprintf('cutoff %f\n',cutoff);

fprintf('nodesize %f\n',nodesize);

end

[nrnodes,ntree,xbestsplit,classwt,cutoff,treemap,nodestatus,nodeclass,bestvar,ndbigtree,mtry ...

outcl, counttr, prox, impmat, impout, impSD, errtr, inbag] ...

= mexClassRF_train(X',int32(Y_new),length(unique(Y)),ntree,mtry,int32(ncat), ...

int32(maxcat), int32(sampsize), strata, Options, int32(ipi), ...

classwt, cutoff, int32(nodesize),int32(nsum));

model.nrnodes=nrnodes;

model.ntree=ntree;

model.xbestsplit=xbestsplit;

model.classwt=classwt;

model.cutoff=cutoff;

model.treemap=treemap;

model.nodestatus=nodestatus;

model.nodeclass=nodeclass;

model.bestvar = bestvar;

model.ndbigtree = ndbigtree;

model.mtry = mtry;

model.orig_labels=orig_labels;

model.new_labels=new_labels;

model.nclass = length(unique(Y));

model.outcl = outcl;

model.counttr = counttr;

if proximity

model.proximity = prox;

else

model.proximity = [];

end

model.localImp = impmat;

model.importance = impout;

model.importanceSD = impSD;

model.errtr = errtr';

model.inbag = inbag;

model.votes = counttr';

model.oob_times = sum(counttr)';

clear mexClassRF_train

%keyboard

1;

随机森林降维matlab代码,随机森林代码实现问题相关推荐

  1. 随机森林降维matlab,随机森林在高光谱遥感数据中降维与分类的应用

    高光谱遥感图像技术(hyperspectral images)能够获取地物在几十甚至几百个波段的光谱信息.与多光谱数据相比,高光谱数据具有图谱合一.可识别更多地物等优势,它在环境监测.植被的精细分类. ...

  2. 随机森林算法Matlab实现

    随机森林算法Matlab实现 瞎BB 代码 计算当前自身gini系数 求最优划分点及其gini系数 对data中按decision属性值从小到大排列 生成结点 生成随机采样样本数据 生成决策树 评价函 ...

  3. 数学建模_随机森林分类模型详解Python代码

    数学建模_随机森林分类模型详解Python代码 随机森林需要调整的参数有: (1) 决策树的个数 (2) 特征属性的个数 (3) 递归次数(即决策树的深度)''' from numpy import ...

  4. 基于python的随机森林回归实现_随机森林理论与python代码实现

    1,初品随机森林 随机森林,森林就是很多决策树放在一起一起叫森林,而随机体现在数据集的随机采样中和特征的随机选取中,具体下面再讲.通俗的说随机森林就是建立多颗决策树(CART),来做分类(回归),以多 ...

  5. 随机森林用matlab实现,matlab实现随机森林

    MATLAB中的分类器_金融/投资_经管营销_专业资料.MATLAB 中的分类器目前了解到的 MATLAB 中分类器有:K 近邻分类器,随机森林分类器,朴素贝叶斯,集成学习 方法,鉴别...... 机 ...

  6. Matlab实现随机森林、神经网络、Lasso回归

    实现之前所说的上传Matlab实现随机森林.神经网络.Lasso回归的承诺. Lasso lasso具有降维的功能,但区别于PCA,lasso直接减少特征数,做的是特征选择,PCA是通过空间转换将特征 ...

  7. matlab中随机森林实现,随机森林实现 MATLAB

    matlab 中随机森林工具箱的下载地址: http://code.google.com/p/randomforest-matlab/downloads/detail?name=Windows-Pre ...

  8. 使用matlab实现随机森林仿真

    这几天在网上看到一篇关于随机森林的文章,感觉挺有趣的,就在这简单地总结了一下随机森林,并使用matlab来仿真实现. 随机森林 随机森林使用了元胞自动机模型,其中每个元胞有三种状态:无树.着火.有树. ...

  9. 随机森林图像分类实战:随机森林分类聚类(Kmeans)降维后的数据、随机森林分类聚类(Kmeans)降维后的合成(append)数据

    随机森林图像分类实战:随机森林分类聚类(Kmeans)降维后的数据.随机森林分类聚类(Kmeans)降维后的合成(append)数据 目录

最新文章

  1. 在博客中加入“花絮”效果
  2. qq在线咨询代码,MSN在线代码,贸易通在线留言源代码!
  3. windows 下搭建python虚拟环境
  4. 2017.0613.《计算机组成原理》总线控制-通信控制
  5. [密码学基础][每个信息安全博士生应该知道的52件事][Bristol Cryptography][第26篇]描述NAF标量乘法算法
  6. android不能在主线程,android.os.NetworkOnMainThreadException 在4.0之后谷歌强制要求连接网络不能在主线程进行访问(示例代码)...
  7. angularjs--控制器的显示与隐示使用
  8. php 内置mail 包,PHP使用pear自带的mail类库发邮件的方法
  9. 大数运算(Java)
  10. java 读取本地文件_java 读取本地文件实例详解
  11. vi 方向键 ABC
  12. access 更新整列数据_创建和运行更新查询
  13. 数字电路课程设计---电子钟
  14. 一:LAMP 架构简介
  15. WM8978音频模块梳理
  16. php挂马攻击,PHP批量挂马脚本
  17. iconfont是什么?
  18. 100天成就卓越领导力:新晋领导者的First100训练法
  19. 三星Galaxy S20:打开手势并更改导航栏按钮顺序
  20. CouchDB与CouchBase的比较

热门文章

  1. java tostring方法_Java虚拟机如执行方法调用的(二)?
  2. 查找会议论文的会议地址
  3. halconC++类:HDevWindowStack
  4. Android之IPC机制
  5. Ts_半分查找猜数字游戏(初始版),
  6. Spring Data JPA 从入门到精通~思维导图
  7. MC缓存序列化php,PHP serialize()序列化的使用
  8. java正则匹配的坑_java正则表达式入坑指南
  9. 两个oracle数据库外网同步,利用DBLink+JOB实现两个Oracle数据库之间的数据同步
  10. linux用u盘上传文件,linux如何挂载U盘和文件系统(或需要用到).doc