Sklearn_LearningCurve

SklearnLearningCurveSklearn_LearningCurveSklearnLearningCurve

# 运行 xgboost安装包中的示例程序
import xgboost as xgb
from xgboost import XGBClassifier# 加载LibSVM格式数据模块
from sklearn.datasets import load_svmlight_filefrom sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_scorefrom matplotlib import pyplot

# read in data，数据在xgboost安装的路径下的demo目录,现在copy到代码目录下的data目录
my_workpath = './data/'
X_train,y_train = load_svmlight_file(my_workpath + 'agaricus.txt.train')
X_test,y_test = load_svmlight_file(my_workpath + 'agaricus.txt.test')X_train.shape

训练集-校验集分离

假设我们取1/3的训练数据做为校验数据
ps: 为什么要校验？

# split data into train and test sets, 1/3的训练数据作为校验数据
seed = 7
test_size = 0.33
X_train_part, X_validate, y_train_part, y_validate= train_test_split(X_train, y_train, test_size=test_size,random_state=seed)X_train_part.shape
X_validate.shape

训练模型

# 设置boosting迭代计算次数
num_round = 100#bst = XGBClassifier(param)
#bst = XGBClassifier()
bst =XGBClassifier(max_depth=2, learning_rate=0.1, n_estimators=num_round, silent=True, objective='binary:logistic')eval_set = [(X_train_part, y_train_part), (X_validate, y_validate)]
bst.fit(X_train_part, y_train_part, eval_metric=["error", "logloss"], eval_set=eval_set, verbose=True)

模型在每次校验集上的性能存在模型中，可用来进一步进行分析 model.evals result() 返回一个字典：评估数据集和分数

显示学习曲线

# retrieve performance metrics
results = bst.evals_result()
#print(results)epochs = len(results['validation_0']['error'])
x_axis = range(0, epochs)# plot log loss
fig, ax = pyplot.subplots()
ax.plot(x_axis, results['validation_0']['logloss'], label='Train')
ax.plot(x_axis, results['validation_1']['logloss'], label='Test')
ax.legend()
pyplot.ylabel('Log Loss')
pyplot.title('XGBoost Log Loss')
pyplot.show()# plot classification error
fig, ax = pyplot.subplots()
ax.plot(x_axis, results['validation_0']['error'], label='Train')
ax.plot(x_axis, results['validation_1']['error'], label='Test')
ax.legend()
pyplot.ylabel('Classification Error')
pyplot.title('XGBoost Classification Error')
pyplot.show()

测试

模型训练好后，可以用训练好的模型对测试数据进行预测

XGBoost预测的输出是概率，输出值是样本为第一类的概率。我们需要将概率值转换为0或1。

# make prediction
preds = bst.predict(X_test)
predictions = [round(value) for value in preds]test_accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))

Sklearn_LearningCurve

SklearnLearningCurveSklearn_LearningCurveSklearnLearningCurve

训练集-校验集分离

训练模型

测试

Sklearn_LearningCurve相关推荐

最新文章

热门文章

Sklearn_LearningCurve

SklearnLearningCurveSklearn_LearningCurveSklearnL​earningCurve

训练集-校验集分离

训练模型

测试

Sklearn_LearningCurve相关推荐

最新文章

热门文章

SklearnLearningCurveSklearn_LearningCurveSklearnLearningCurve