sklearn--各分类算法简单应用
KNN
from sklearn.neighbors import KNeighborsClassifier
import numpy as npdef KNN(X,y,XX):#X,y 分别为训练数据集的数据和标签,XX为测试数据model = KNeighborsClassifier(n_neighbors=10)#默认为5model.fit(X,y)predicted = model.predict(XX)return predicted
SVM
from sklearn.svm import SVCdef SVM(X,y,XX):model = SVC(c=5.0)model.fit(X,y)predicted = model.predict(XX)return predicted
SVM Classifier using cross validation
def svm_cross_validation(train_x, train_y): from sklearn.grid_search import GridSearchCV from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]} grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1) grid_search.fit(train_x, train_y) best_parameters = grid_search.best_estimator_.get_params() for para, val in list(best_parameters.items()): print(para, val) model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True) model.fit(train_x, train_y) return model
LR
from sklearn.linear_model import LogisticRegressiondef LR(X,y,XX):model = LogisticRegression()model.fit(X,y)predicted = model.predict(XX)return predicted
决策树(CART)
from sklearn.tree import DecisionTreeClassifierdef CTRA(X,y,XX):model = DecisionTreeClassifier()model.fit(X,y)predicted = model.predict(XX)return predicted
随机森林
from sklearn.ensemble import RandomForestClassifierdef CTRA(X,y,XX):model = RandomForestClassifier()model.fit(X,y)predicted = model.predict(XX)return predicted
GBDT(Gradient Boosting Decision Tree)
from sklearn.ensemble import GradientBoostingClassifier def CTRA(X,y,XX):model = GradientBoostingClassifier()model.fit(X,y)predicted = model.predict(XX)return predicted
朴素贝叶斯:一个是基于高斯分布求概率,一个是基于多项式分布求概率,一个是基于伯努利分布求概率。
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNBdef GNB(X,y,XX):model =GaussianNB()model.fit(X,y)predicted = model.predict(XX)return predicteddef MNB(X,y,XX):model = MultinomialNB()model.fit(X,y)predicted = model.predict(XXreturn predicteddef BNB(X,y,XX):model = BernoulliNB()model.fit(X,y)predicted = model.predict(XXreturn predicted
# coding=gbk
'''''
Created on 2016年6月4日 @author: bryan
''' import time
from sklearn import metrics
import pickle as pickle
import pandas as pd # Multinomial Naive Bayes Classifier
def naive_bayes_classifier(train_x, train_y): from sklearn.naive_bayes import MultinomialNB model = MultinomialNB(alpha=0.01) model.fit(train_x, train_y) return model # KNN Classifier
def knn_classifier(train_x, train_y): from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier() model.fit(train_x, train_y) return model # Logistic Regression Classifier
def logistic_regression_classifier(train_x, train_y): from sklearn.linear_model import LogisticRegression model = LogisticRegression(penalty='l2') model.fit(train_x, train_y) return model # Random Forest Classifier
def random_forest_classifier(train_x, train_y): from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=8) model.fit(train_x, train_y) return model # Decision Tree Classifier
def decision_tree_classifier(train_x, train_y): from sklearn import tree model = tree.DecisionTreeClassifier() model.fit(train_x, train_y) return model # GBDT(Gradient Boosting Decision Tree) Classifier
def gradient_boosting_classifier(train_x, train_y): from sklearn.ensemble import GradientBoostingClassifier model = GradientBoostingClassifier(n_estimators=200) model.fit(train_x, train_y) return model # SVM Classifier
def svm_classifier(train_x, train_y): from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) model.fit(train_x, train_y) return model # SVM Classifier using cross validation
def svm_cross_validation(train_x, train_y): from sklearn.grid_search import GridSearchCV from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]} grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1) grid_search.fit(train_x, train_y) best_parameters = grid_search.best_estimator_.get_params() for para, val in list(best_parameters.items()): print(para, val) model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True) model.fit(train_x, train_y) return model def read_data(data_file): data = pd.read_csv(data_file) train = data[:int(len(data)*0.9)] test = data[int(len(data)*0.9):] train_y = train.label train_x = train.drop('label', axis=1) test_y = test.label test_x = test.drop('label', axis=1) return train_x, train_y, test_x, test_y if __name__ == '__main__': data_file = "H:\\Research\\data\\trainCG.csv" thresh = 0.5 model_save_file = None model_save = {} test_classifiers = ['NB', 'KNN', 'LR', 'RF', 'DT', 'SVM','SVMCV', 'GBDT'] classifiers = {'NB':naive_bayes_classifier, 'KNN':knn_classifier, 'LR':logistic_regression_classifier, 'RF':random_forest_classifier, 'DT':decision_tree_classifier, 'SVM':svm_classifier, 'SVMCV':svm_cross_validation, 'GBDT':gradient_boosting_classifier } print('reading training and testing data...') train_x, train_y, test_x, test_y = read_data(data_file) for classifier in test_classifiers: print('******************* %s ********************' % classifier) start_time = time.time() model = classifiers[classifier](train_x, train_y) print('training took %fs!' % (time.time() - start_time)) predict = model.predict(test_x) if model_save_file != None: model_save[classifier] = model precision = metrics.precision_score(test_y, predict) recall = metrics.recall_score(test_y, predict) print('precision: %.2f%%, recall: %.2f%%' % (100 * precision, 100 * recall)) accuracy = metrics.accuracy_score(test_y, predict) print('accuracy: %.2f%%' % (100 * accuracy)) if model_save_file != None: pickle.dump(model_save, open(model_save_file, 'wb'))
结果:
reading training and testing data...
******************* NB ********************
training took 0.004986s!
precision: 78.08%, recall: 71.25%
accuracy: 74.17%
******************* KNN ********************
training took 0.017545s!
precision: 97.56%, recall: 100.00%
accuracy: 98.68%
******************* LR ********************
training took 0.061161s!
precision: 89.16%, recall: 92.50%
accuracy: 90.07%
******************* RF ********************
training took 0.040111s!
precision: 96.39%, recall: 100.00%
accuracy: 98.01%
******************* DT ********************
training took 0.004513s!
precision: 96.20%, recall: 95.00%
accuracy: 95.36%
******************* SVM ********************
training took 0.242145s!
precision: 97.53%, recall: 98.75%
accuracy: 98.01%
******************* SVMCV ********************
Fitting 3 folds for each of 14 candidates, totalling 42 fits
[Parallel(n_jobs=1)]: Done 42 out of 42 | elapsed: 6.8s finished
probability True
verbose False
coef0 0.0
degree 3
tol 0.001
shrinking True
cache_size 200
gamma 0.001
max_iter -1
C 1000
decision_function_shape None
random_state None
class_weight None
kernel rbf
training took 7.434668s!
precision: 98.75%, recall: 98.75%
accuracy: 98.68%
******************* GBDT ********************
training took 0.521916s!
precision: 97.56%, recall: 100.00%
accuracy: 98.68%
sklearn--各分类算法简单应用相关推荐
- python分类算法的应用_Python基于sklearn库的分类算法简单应用示例
Python基于sklearn库的分类算法简单应用示例 来源:中文源码网 浏览: 次 日期:2018年9月2日 [下载文档: Python基于sklearn库的分类算法简单应用示例.tx ...
- python分类算法的应用_Python使用sklearn库实现的各种分类算法简单应用小结
本文实例讲述了Python使用sklearn库实现的各种分类算法简单应用.分享给大家供大家参考,具体如下: KNN from sklearn.neighbors import KNeighborsCl ...
- python使用欧氏距离knn_python运用sklearn实现KNN分类算法
KNN(K-Nearest-Neighbours Classiflication)分类算法,供大家参考,具体内容如下 最简单的分类算法,易于理解和实现 实现步骤:通过选取与该点距离最近的k个样本,在这 ...
- sklearn分类算法-决策树、随机森林
sklearn分类算法-决策树.随机森林 一.决策树 1.概念 决策树思想的来源非常朴素,程序设计中的条件分支结构就是if-then结构,最早的决策树就是利用这类结构分割数据的一种分类学习方法 比如: ...
- 简单明了的分类算法:OneR。
在之前介绍的kNN算法属于一种分类算法,之后会介绍的决策树也是属于分类算法.分类算法的目的就是根据训练集的特征将新的数据进行预测,当然能够找到特征之间的联系越多那么最后的分类结果也就应该越准确.但是有 ...
- sklearn实现GBDT算法(分类)
阿喽哈~小天才们,今天我们聊一聊GBDT 上一篇文章我们详细地说了GBDT算法原理,包括为什么拟合负梯度.负梯度为何可以替代残差.二分类GBDT算法公式和实例演算,感兴趣的童鞋请移步GBDT算法详解& ...
- 几种sklearn库直接实现分类算法
机器学习入门--直接调用sklearn实现几种简单算法 刚学习机器学习,希望大佬们勿喷,望指点 几种分类算法针对鸢尾花数据的分析 1. LR线性回归分类算法 # 引入数据集,sklearn包含众多数据 ...
- 机器学习Sklearn总结2——分类算法
目录 一.转换器与估计器 二.分类算法 K-近邻算法 案例代码: 模型选择与调优 案例代码: 朴素贝叶斯算法: 朴素贝叶斯算法总结 案例代码: 决策树总结: 案例代码: 使用随机森林来实现: 随机森林 ...
- 15分钟带你入门sklearn与机器学习——分类算法篇
作者 | 何从庆 本文转载自AI算法之心(ID:AIHeartForYou) [导读]众所周知,Scikit-learn(以前称为scikits.learn)是一个用于Python编程语言的免费软件机 ...
- 15 分钟带你入门 sklearn 与机器学习(分类算法篇)
众所周知,Scikit-learn(以前称为scikits.learn)是一个用于Python编程语言的免费软件机器学习库.它具有各种分类,回归和聚类算法,包括支持向量机,随机森林,梯度增强,k-me ...
最新文章
- golang etcd 报错 undefined: resolver.BuildOption 解决方案
- 4g模块注册上网 移远_Openwrt实现4G模块上网功能
- Confluence 6 Windows 中以服务方式自动重启的原因
- 记录gulp报错The following tasks did not complete: cssmin或类似任务
- 11-使用NSPersistentContainer搭建CoreData Stack
- C语言创建map,遍历map
- html中加入js,html嵌入js
- 【渝粤教育】国家开放大学2018年秋季 0273-22T中国现代文学 参考试题
- 【转】开机出现 error:file “/boot/grub/i386-pc/normal.mod“ not found 错误提示
- Win7+VMware10.0+CentOS 6.4+Tomcat,Win7访问不了CentOS6.4上的Tomcat
- Swift - 数组排序方法(附样例)
- html5 canvas签字,HTML5 canvas实现电子签名
- java old区_一次Jvm old过高的排查过程实战记录
- 还不会动效?优秀的可临摹素材,给你做个示范
- 对于spring的一些巩固一些难点的理解 2021-04-18
- 细节真的能决定成败么?
- 二叉查找树之 Java的实现
- 京东的交易系统 之 高并发架构分享
- 中兴通讯和江苏电信携手推进SDN IPRAN创新进程
- 手机计算机不支持此操作系统,Win10计算机投影屏幕此设备不支持Miracast