作者简介Introduction

苏高生,西南财经大学统计学硕士毕业,现就职于中国电信,主要负责企业存量客户大数据分析、数据建模。研究方向:机器学习,最喜欢的编程语言:R语言,没有之一。

E-mail:sugs01@outlook.com

往期回顾:

Xgboost算法——Kaggle案例

The rxfastforest algorithm case of kaggle


紧接上文:lightgbm algorithm case of kaggle(上)

各位看客,请继续......

五、二次调参

1.调试weight参数

grid_search <- expand.grid(

learning_rate = .125,

num_leaves = 600,

max_bin = 30,

min_data_in_bin = 64,

feature_fraction = .64,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5)

perf_weight_2 <- numeric(length = nrow(grid_search))

for(i in 1:20){

lgb_weight <- (lgb_tr$TARGET * i + 1) / sum(lgb_tr$TARGET * i + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[1, 'learning_rate'],

num_leaves = grid_search[1, 'num_leaves'],

max_bin = grid_search[1, 'max_bin'],

min_data_in_bin = grid_search[1, 'min_data_in_bin'],

feature_fraction = grid_search[1, 'feature_fraction'],

min_sum_hessian = grid_search[1, 'min_sum_hessian'],

lambda_l1 = grid_search[1, 'lambda_l1'],

lambda_l2 = grid_search[1, 'lambda_l2'],

drop_rate = grid_search[1, 'drop_rate'],

max_drop = grid_search[1, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

learning_rate = .1,

num_threads = 2,

early_stopping_rounds = 10

)

perf_weight_2[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

ggplot(data.frame(num = 1:length(perf_weight_2), perf = perf_weight_2), aes(x = num, y = perf)) +

geom_point() +

geom_smooth()

结论:从此图可知auc值在weight>=2时auc 趋于稳定, weight=8时取最大值

2.调试learning_rate参数

grid_search <- expand.grid(

learning_rate = seq(.05, .5, .01),

num_leaves = 600,

max_bin = 30,

min_data_in_bin = 64,

feature_fraction = .64,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5)

perf_learning_rate_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_learning_rate_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_learning_rate_1

ggplot(data = grid_search, aes(x = learning_rate, y = perf)) +

geom_point() +

geom_smooth()

结论:learning_rate=.2时,auc最大

3.调试num_leaves参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = seq(50, 800, 50),

max_bin = 30,

min_data_in_bin = 64,

feature_fraction = .64,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5

)

perf_num_leaves_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_num_leaves_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_num_leaves_1

ggplot(data = grid_search, aes(x = num_leaves, y = perf)) +

geom_point() +

geom_smooth()

结论:num_leaves=300时,auc最大

4.调试max_bin参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = seq(30, 150, 10),

min_data_in_bin = 64,

feature_fraction = .64,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5

)

perf_max_bin_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_max_bin_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_max_bin_1

ggplot(data = grid_search, aes(x = max_bin, y = perf)) +

geom_point() +

geom_smooth()

结论:max_bin=120时,auc最大

5.调试min_data_in_bin参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = seq(20, 100, 5),

feature_fraction = .64,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5

)

perf_min_data_in_bin_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_min_data_in_bin_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_min_data_in_bin_1

ggplot(data = grid_search, aes(x = min_data_in_bin, y = perf)) +

geom_point() +

geom_smooth()

结论:min_data_in_bin=20时,auc最大

5.调试feature_fraction参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = 20,

feature_fraction = .5,

min_sum_hessian = .004,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5)perf_feature_fraction_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_feature_fraction_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_feature_fraction_1

ggplot(data = grid_search, aes(x = feature_fraction, y = perf)) +

geom_point() +

geom_smooth()

结论:feature_fraction=.5时,auc最大,=.62时也较好

6.调试min_sum_hessian参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = 20,

feature_fraction = .5,

min_sum_hessian = 0,

lambda_l1 = .002,

lambda_l2 = .008,

drop_rate = .3,

max_drop = 5

)

perf_min_sum_hessian_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_min_sum_hessian_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_min_sum_hessian_1

ggplot(data = grid_search, aes(x = min_sum_hessian, y = perf)) +

geom_point() +

geom_smooth()

结论:min_sum_hessian与auc呈负相关,min_sum_hessian=0时,取min_sum_hessian=0

6.调试lambda参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = 20,

feature_fraction = .5,

min_sum_hessian = 0,

lambda_l1 = seq(0, .01, .002),

lambda_l2 = seq(0, .01, .002),

drop_rate = .3,    max_drop = 5

)

perf_lambda_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_lambda_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_lamda_1

ggplot(data = grid_search, aes(x = lambda_l1, y = perf)) +

geom_point() +

facet_wrap(~ lambda_l2, nrow = 5)

结论:lambda与auc呈负相关,取lambda_l1=.002, lambda_l2 = .01

7.调试drop_rate参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = 20,

feature_fraction = .5,

min_sum_hessian = 0,

lambda_l1 = .002,

lambda_l2 = .01,

drop_rate = seq(0, .5, .05),

max_drop = 5)perf_drop_rate_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_drop_rate_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}

grid_search$perf <- perf_drop_rate_1

ggplot(data = grid_search, aes(x = drop_rate, y = perf)) +

geom_point()

结论:drop_rate=.3时取到最大值,与第一次调参没有变化

8.调试max_drop参数

grid_search <- expand.grid(

learning_rate = .2,

num_leaves = 300,

max_bin = 120,

min_data_in_bin = 20,

feature_fraction = .5,

min_sum_hessian = 0,

lambda_l1 = .002,

lambda_l2 = .01,

drop_rate = .3,

max_drop = seq(19, 29, 2)

)

perf_max_drop_1 <- numeric(length = nrow(grid_search))

for(i in 1:nrow(grid_search)){

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search[i, 'learning_rate'],

num_leaves = grid_search[i, 'num_leaves'],

max_bin = grid_search[i, 'max_bin'],

min_data_in_bin = grid_search[i, 'min_data_in_bin'],

feature_fraction = grid_search[i, 'feature_fraction'],

min_sum_hessian = grid_search[i, 'min_sum_hessian'],

lambda_l1 = grid_search[i, 'lambda_l1'],

lambda_l2 = grid_search[i, 'lambda_l2'],

drop_rate = grid_search[i, 'drop_rate'],

max_drop = grid_search[i, 'max_drop']

)

# 交叉验证

lgb_tr_mod <- lgb.cv(

params,

data = lgb_train,

nrounds = 300,

stratified = TRUE,

nfold = 10,

num_threads = 2,

early_stopping_rounds = 10

)

perf_max_drop_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}grid_search$perf <- perf_max_drop_1ggplot(data = grid_search, aes(x = max_drop, y = perf)) +    geom_point()

结论:max_drop=23时取到最大值

六、集成学习

1)参数

set.seed(1)

grid_search <- expand.grid(

learning_rate = sample(115:125, 10, replace = FALSE) / 100,

num_leaves = sample(250:350, 10, replace = FALSE),

max_bin = sample(115:125, 5, replace = FALSE),

min_data_in_bin = sample(18:22, replace = FALSE),

feature_fraction = c(.5, .62),

min_sum_hessian = 0,

lambda_l1 = .002,

lambda_l2 = c(.008, .009, .01),

drop_rate = sample(126:134, 4, replace = FALSE) / 1000,

max_drop = c(23, 27, 29)

)

sample_ind <- sample(dim(grid_search)[1], 100, replace = FALSE)

lgb.pred <- list()grid_search2 <- grid_search[sample_ind, ]rm(grid_search)

2)权重

lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)

3)训练数据集

lgb_train <- lgb.Dataset(

data = data.matrix(lgb_tr[, 1:137]),

label = lgb_tr$TARGET,

free_raw_data = FALSE,

weight = lgb_weight

)

4)训练

for (i in 1:nrow(grid_search2)[1]){

# 参数

params <- list(

objective = 'binary',

metric = 'auc',

learning_rate = grid_search2[i, 'learning_rate'],

num_leaves = grid_search2[i, 'num_leaves'],

max_bin = grid_search2[i, 'max_bin'],

min_data_in_bin = grid_search2[i, 'min_data_in_bin'],

feature_fraction = grid_search2[i, 'feature_fraction'],

min_sum_hessian = grid_search2[i, 'min_sum_hessian'],

lambda_l1 = grid_search2[i, 'lambda_l1'],

lambda_l2 = grid_search2[i, 'lambda_l2'],

drop_rate = grid_search2[i, 'drop_rate'],

max_drop = grid_search2[i, 'max_drop']

)

# 模型

lgb_mod <- lightgbm(

params = params,

data = lgb_train,

nrounds = 300,

early_stopping_rounds = 10,

num_threads = 2

)

# 预测

lgb.pred[[i]] <- predict(lgb_mod, data.matrix(lgb_te))}

5)结果

lgb.pred2 <- matrix(unlist(lgb.pred), ncol = 100)

lgb.pred3 <- data.frame(prob1 = apply(lgb.pred2, 1, mean))

6)输出

write.csv(lgb.pred3, "C:/Users/Administrator/Documents/kaggle/scs_lgb/lgb.pred1.csv"

 往期精彩内容整理合集 

2017年R语言发展报告(国内)

R语言中文社区历史文章整理(作者篇)

R语言中文社区历史文章整理(类型篇)

公众号后台回复关键字即可学习

回复 R                  R语言快速入门及数据挖掘 
回复 Kaggle案例  Kaggle十大案例精讲(连载中)
回复 文本挖掘      手把手教你做文本挖掘
回复 可视化          R语言可视化在商务场景中的应用 
回复 大数据         大数据系列免费视频教程 
回复 量化投资      张丹教你如何用R语言量化投资 
回复 用户画像      京东大数据,揭秘用户画像
回复 数据挖掘     常用数据挖掘算法原理解释与应用
回复 机器学习     人工智能系列之机器学习与实践
回复 爬虫            R语言爬虫实战案例分享

lightgbm algorithm case of kaggle(下)相关推荐

  1. (To Learn More) ML Lecture 1: Regression - Case Study(下)

    ML Lecture 1: Regression - Case Study(下) 视频链接: https://www.youtube.com/watch?v=fegAeph9UaA \qquad li ...

  2. mysql case默认_linux下安装mysql

    检查 mysql 是否安装 yum list installed | grep mysql cat /etc/passwd|grep mysql(查看机器上是否有mysql用户) find / -na ...

  3. mysql case when sum count_SQL语句为什么在这种case when情况下要用sum而不是count

    如下这个表TEST:category|commdity|price----------+----------+-------衣服|T恤|1000办公用品|打孔器|500厨房用具|菜刀|3000厨房用具 ...

  4. 数据分析-R语言资料整理

    独家分享--48页PPT解密数据可视化! Excel图表快捷操作小技巧 基于随机森林的分类与回归 R语言制作网页 ggplot2:可视化设计师的神器,了解一下 [译]R包介绍:Online Rando ...

  5. 精心整理 | R语言中文社区历史文章整理(类型篇)

    2018年过去一半了~又到了盘点的时间~感谢长时间来各位好友的关注,我们的成长与你们的爱护是分不开的.更感谢各位老师的投稿,支撑起了我们的这个社区,让更多R语言的爱好者和从业者获得最棒的知识!本文选取 ...

  6. LightGBM综述

    根据以往的经验梯度提升树(gradient boosted tree)可谓横扫Kaggle,不使用GBT感觉都没法再Kaggle混了.决策树相对深度学习来说可谓优点多多:好理解,易解释,对非平衡数据友 ...

  7. 机器学习论文:《LightGBM: A Highly Efficient Gradient Boosting Decision Tree》

    翻译自<LightGBM: A Highly Efficient Gradient Boosting Decision Tree> 摘要 Gradient Boosting Decisio ...

  8. 《On the Momentum Term in Gradient Descent Learning Algorithm》原文解读

    ############博主前言####################### 我写这篇文章的目的: 想必很多人听过神经网络中的momentum算法, 但是为啥叫momentum(动量)算法呢? 和物 ...

  9. 集成学习——LightGBM原理理解

    LightGBM(Light Gradient Boosting Machine)是梯度提升框架下的适用于大规模数据的又一boosting学习框架,它由微软亚洲研究院分布式机器学习工具包(DMTK)团 ...

  10. linux启停was命令,linux下的启停脚本

    linux下的根据项目名称,进行进程的启停脚本 #!/bin/bash JAVA=/usr/bin/java APP_HOME=/opt/program/qa/wechat APP_NAME=prog ...

最新文章

  1. 在哪里能收到python实例代码-Python分类测试代码实例汇总
  2. linux查看文件只会用vi?除了vi,这几个文件查看的命令,让你爱不释手!
  3. Flex 3 和PHP连接mysql数据库
  4. echarts 雷达图_如何把Echarts用成在线数据可视化工具
  5. 竹签子毕竟是_毕竟是什么程序和功能?
  6. 2021湖北高考成绩查询热线,湖北招生考试网:2021年湖北高考成绩查询入口、查分系统...
  7. plsql能连mysql吗_面试官:能给我讲讲用代码实现MySQL的读写分离的思路吗?
  8. 阿里一面,说说你了解zookeeper的应用场景有哪些?
  9. [C语言循环应用]--打印字符金字塔
  10. 机器学习非平衡数据集概述
  11. Python这些操作,逆天且实用
  12. 误删阿里云mysql恢复数据恢复_阿里云数据库表数据误删恢复
  13. 莫队算法 --算法竞赛专题解析(26)
  14. PS图层混合算法之五(饱和度,色相,颜色,亮度)
  15. 清除Internet临时文件
  16. 理解Play框架线程池
  17. vscode代码格式管理插件prettier-Code formatter安装和设置
  18. sequoia部署模型
  19. 中国空气质量在线监測分析平台
  20. 万马齐喑究可哀-中文编程的又一波quot;讨论quot;

热门文章

  1. 如何合理地决定线程池大小?
  2. 程序员必看—程序员如何高效提升自己?
  3. python 参数带星号_python 函数参数的传递(参数带星号的说明)
  4. 新手redis集群搭建
  5. hibernate--生成正向和逆向工程
  6. 4G手机网络通信是如何被黑客远程劫持的?
  7. WIN7、WIN8 右键在目录当前打开命令行Cmd窗口(图文)
  8. TatukGIS - GisDefs - CheckFileWriteAccess 函数
  9. 今天提前回去吧,整理一下,为下周做好准备。
  10. 防止电子眼拍到车牌的秘籍