


# -*- coding: utf-8 -*-import numpy as np
import urllib
from sklearn import preprocessing
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
import timedef main(): #数据加载# load the CSV file as a numpy matrixdataset = np.loadtxt('D:\sample.csv', delimiter=",")# separate the data from the target attributesX = dataset[:,0:4]y = dataset[:,4]#数据标准化# normalize the data attributesnormalized_X = preprocessing.normalize(X)# standardize the data attributesstandardized_X = preprocessing.scale(X)#特征选取#model = LogisticRegression()#create the RFE model and select 3 attributes#rfe = RFE(model, 4)#rfe = rfe.fit(X, y)# summarize the selection of the attributes#print(rfe.support_)#print(rfe.ranking_)model = ExtraTreesClassifier()model.fit(X, y)# display the relative importance of each attributeprint(model.feature_importances_)#模型训练model = LogisticRegression()model.fit(X, y)print(model)#模型预测# make predictionsexpected = ypredicted = model.predict(X)# summarize the fit of the modelprint(metrics.classification_report(expected, predicted))print(metrics.confusion_matrix(expected, predicted))#执行
if __name__ == '__main__':  start = time.clock()  main()  end = time.clock()  print('finish all in %s' % str(end - start))  




[ 0.15453444  0.00727297  0.63061708  0.2075755 ]
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,penalty='l2', random_state=None, solver='liblinear', tol=0.0001,verbose=0, warm_start=False)precision    recall  f1-score   support0.0       0.66      0.87      0.75    2684981.0       0.66      0.35      0.45    188407avg / total       0.66      0.66      0.63    456905[[234680  33818][123182  65225]]
finish all in 12.8994016037


