


进入xgboost.plot_importance函数定义, plotting.py , 把 booster.get_score(importance_type=importance_type) 改成 booster.get_score(importance_type=importance_type, fmap=fmap) 亲测好使


# 调用文件
import xgboost as xgb
import numpy as np
import json
import matplotlib.pyplot as plt
from matplotlib import pyplot
from xgboost import plot_treemodel_file = "data/xgboost.model"
fmap = model_file + ".fmap"
bst = xgb.Booster(model_file = model_file)
print bst.get_fscore(fmap=fmap)
xgb.to_graphviz(bst, num_trees=10, fmap=fmap)
xgb.plot_importance(bst, fmap=fmap)


# plot_importance修改
# Python/2.7/lib/python/site-packages/xgboost/plotting.py
def plot_importance(booster, ax=None, height=0.2,xlim=None, ylim=None, title='Feature importance',xlabel='F score', ylabel='Features',importance_type='weight', max_num_features=None,grid=True, show_values=True, fmap='', **kwargs):"""Plot importance based on fitted trees.Parameters----------booster : Booster, XGBModel or dictBooster or XGBModel instance, or dict taken by Booster.get_fscore()ax : matplotlib Axes, default NoneTarget axes instance. If None, new figure and axes will be created.grid : bool, Turn the axes grids on or off.  Default is True (On).importance_type : str, default "weight"How the importance is calculated: either "weight", "gain", or "cover"* "weight" is the number of times a feature appears in a tree* "gain" is the average gain of splits which use the feature* "cover" is the average coverage of splits which use the featurewhere coverage is defined as the number of samples affected by the splitmax_num_features : int, default NoneMaximum number of top features displayed on plot. If None, all features will be displayed.height : float, default 0.2Bar height, passed to ax.barh()xlim : tuple, default NoneTuple passed to axes.xlim()ylim : tuple, default NoneTuple passed to axes.ylim()title : str, default "Feature importance"Axes title. To disable, pass None.xlabel : str, default "F score"X axis title label. To disable, pass None.ylabel : str, default "Features"Y axis title label. To disable, pass None.show_values : bool, default TrueShow values on plot. To disable, pass False.kwargs :Other keywords passed to ax.barh()Returns-------ax : matplotlib Axes"""# TODO: move this to compat.pytry:import matplotlib.pyplot as pltexcept ImportError:raise ImportError('You must install matplotlib to plot importance')if isinstance(booster, XGBModel):importance = booster.get_booster().get_score(importance_type=importance_type)elif isinstance(booster, Booster):# 只是在这里添加了fmap 其余都一样 只改这个函数这一行importance = booster.get_score(importance_type=importance_type, fmap=fmap)elif isinstance(booster, dict):importance = boosterelse:raise ValueError('tree must be Booster, XGBModel or dict instance')


features2 = ['core_id', 'what_id', 'where_id', 'extra_id', 'category_group_id', 'chain_id', 'address_id']if __name__=="__main__":f1 = open(sys.argv[1], "w") for i, feat in enumerate(features2):      f1.write('{0}\t{1}\tq\n'.format(i, feat))        #feature type, use i for indicator and q for quantity  outfile.close()print("Done!")


  1. 成功解决 将xgboost的plot_importance绘图时出现的f0、f1、f2、f3、f4、f5等改为对应特征的字段名

