python sklearn库 rnn_如何使用Tensorflow计算RNN和LSTM模型的AUC并生成ROC曲线？

我使用一个自定义的预定义函数trainDNN运行RNN和LSTM模型import tensorflow as tf

from tensorflow.contrib.layers import fully_connected

import h5py

import time

from sklearn.utils import shuffle

def trainDNN(path, n_days, n_features, n_neurons,

train_sequences, train_lengths, train_y,

test_sequences, test_y, test_lengths,

lstm=False, n_epochs=50, batch_size=256,

learning_rate=0.0003, TRAIN_REC=8, TEST_REC=8):

# we're doing binary classification

n_outputs = 2

# this is the initial learning rate

# adam optimzer decays the learning rate automatically

# learning_rate = 0.0001

#learning rate decay is determined by epsilon

epsilon = 0.001

# setup the graph

tf.reset_default_graph()

# inputs to the network

X = tf.placeholder(tf.float32, [None, n_days, n_features])

y = tf.placeholder(tf.int32, [None])

seq_length = tf.placeholder(tf.int32, [None])

# the network itself

cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) if lstm else tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32, sequence_length=seq_length)

logits = fully_connected(states[-1] if lstm else states, n_outputs)

# the training process (minimize loss) including the training operatin itself

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

loss = tf.reduce_mean(xentropy)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=epsilon)

training_op = optimizer.minimize(loss)

# hold onto the accuracy for the logwriter

correct = tf.nn.in_top_k(logits, y, 1)

accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# this saves the network for later querying

# currently only saves after all epochs are complete

# but we could for example save checkpoints on a

# regular basis

saver = tf.train.Saver()

# this is where we save the log files for tensorboard

now = int(time.time())

name = 'lstm' if lstm else 'rnn'

root_logdir = path+"tensorflow_logs/{}/{}-{}/".format(name.upper(), name, now)

train_logdir = "{}train".format(root_logdir)

eval_logdir = "{}eval".format(root_logdir)

print('train_logdir', train_logdir)

print('eval_logdir', eval_logdir)

# scalars that are written to the log files

loss_summary = tf.summary.scalar('loss', loss)

acc_summary = tf.summary.scalar('accuracy', accuracy)

# summary operation and writer for the training data

train_summary_op = tf.summary.merge([loss_summary, acc_summary])

train_writer = tf.summary.FileWriter(train_logdir, tf.get_default_graph())

# summary operation and writer for the validation data

eval_summary_op = tf.summary.merge([loss_summary, acc_summary])

eval_writer = tf.summary.FileWriter(eval_logdir, tf.get_default_graph())

# initialize variables

init = tf.global_variables_initializer()

n_batches = len(train_sequences) // batch_size

print(n_batches, 'batches of size', batch_size, n_epochs, 'epochs,', n_neurons, 'neurons')

with tf.Session() as sess:

# actually run the initialization

init.run()

start_time = time.time()

for epoch in range(n_epochs):

# at the beginning of each epoch, shuffle the training data

train_sequences, train_y, train_lengths = shuffle(train_sequences, train_y, train_lengths)

for iteration in range(n_batches):

# extract the batch of training data for this iteration

start = iteration*batch_size

end = start+batch_size

X_batch = train_sequences[start:end]

y_batch = train_y[start:end]

y_batch = y_batch.ravel()

seq_length_batch = train_lengths[start:end]

# every TRAIN_REC steps, save a summary of training accuracy & loss

if iteration % TRAIN_REC == 0:

train_summary_str = train_summary_op.eval(

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

step = epoch * n_batches + iteration

train_writer.add_summary(train_summary_str, step)

# without this flush, tensorboard isn't always current

train_writer.flush()

# every TEST_REC steps, save a summary of validation accuracy & loss

# TODO: this runs all validation data at once. if validation is

# sufficiently large, this will fail. better would be to either

# pick a random subset of validation data, or even better, run

# validation in multiple batches and save the validation accuracy

# & loss based on the aggregation of all of the validation batches.

if iteration % TEST_REC == 0:

summary_str = eval_summary_op.eval(

feed_dict = {X: test_sequences, y: test_y.ravel(), seq_length: test_lengths}

)

step = epoch * n_batches + iteration

eval_writer.add_summary(summary_str, step)

# without this flush, tensorboard isn't always current

eval_writer.flush()

# run training.

# this is where the network learns.

sess.run(

training_op,

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

# after every epoch, calculate the accuracy of the last seen training batch

acc_train = accuracy.eval(

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

# after each epoch, calculate the accuracy of the test data

acc_test = accuracy.eval(

feed_dict = {X: test_sequences, y: test_y.ravel(), seq_length: test_lengths}

)

# print the training & validation accuracy to the console

print(epoch, time.strftime('%m/%d %H:%M:%S'), "Accuracy train:", acc_train, "test:", acc_test)

# save the model (for more training or inference) after all

# training is complete

save_path = saver.save(sess, root_logdir+"model_final.ckpt")

# close the writers

train_writer.close()

eval_writer.close()

log(["{}-{} model score".format(name.upper(), now), percent(acc_test)])

上述函数对时序数据训练RNN和LSTM模型，并输出二进制分类分数。训练和测试的分数都打印出来了，但是我想知道如何计算AUC并生成ROC曲线，以便对RNN和LSTM进行二元分类。在

更新：

我使用以下脚本评估了逻辑和预测：

^{pr2}$

这将返回me probs，它基本上是一个矩阵，行数等于测试用例数，2列包含2个二进制类中每个类的概率。predictions对象包含预测是否正确。

我对此持怀疑态度，因为ReLU函数的概率分数不像sigmoid函数的分数那样直观，因为它不再基于正预测和负预测的默认0.5截止值。相反，预测是基于哪个类的概率更大。真的可以从ReLu输出生成ROC曲线吗？在

python sklearn库 rnn_如何使用Tensorflow计算RNN和LSTM模型的AUC并生成ROC曲线？相关推荐

Python: sklearn库——数据预处理
Python: sklearn库 -- 数据预处理数据集转换之预处理数据: 将输入的数据转化成机器学习算法可以使用的数据.包含特征提取和标准化. 原因:数据集的标准化(服从均 ...
Python——sklearn库的安装
Python--sklearn库的安装做个笔记,时间久了,都忘了sklearn库的标准名字--scikit-learn.所以,不要用 pip install sklearn 应该是 pip inst ...
分类模型-评估指标（2）：ROC曲线、 AUC值（ROC曲线下的面积）【只能用于二分类模型的评价】【不受类别数量不平衡的影响；不受阈值取值的影响】【AUC的计算方式：统计所有正负样本对中的正序对】
评价二值分类器的指标很多,比如precision.recall.F1 score.P-R曲线等.但这些指标或多或少只能反映模型在某一方面的性能.相比而言,ROC曲线则有很多优点,经常作为评估二值分类器 ...
逻辑回归模型混淆矩阵评价与ROC曲线最佳阈值的好处（附Accuracy,TPR,FPR计算函数）
一.得到阈值在0.5和0.8下模型的混淆矩阵 y_prob=result.predict(X_test)#得到概率值y_predict1=pd.DataFrame(y_prob>0.5).ast ...
Python Sklearn库源码学习--kmeans
前言: 分析体检数据希望不拘泥于Sklearn库中已有的聚类算法,想着改一下Kmeans算法.本着学习的目的,现在开始查看sklearn的源代码.希望能够写成一个通用的包. 有必要先交代一下我使用的p ...
使用FDDB人脸样本检测库，测试自己的人脸检测算法性能并生成ROC曲线。
一,说明及环境网上有关FDDB人脸检测库的使用以及ROC文件生成的文章太少,并且都无法检测opencv中自带的人脸检测算法.最近工作的原因,需要用到FDDB库检测我们自己的人脸检测算法性能.所以认 ...
【Python 标准库学习】数据科学计算库 — math
欢迎加入 Python 官方文档翻译团队:https://www.transifex.com/python-doc/ math 模块官方文档:https://docs.python.org/3/lib ...
python扩展库不是用于科学计算的有_有哪些科学计算的开源库
2017-03-19 回答比较火爆的android开源库有以下: 1.volley 主要有以下模块: (1) json,图像等的异步下载: (2) 网络请求的排序(scheduling) (3) 网 ...
python sklearn库silhouette_score轮廓系数接口调用
官方接口说明:https://scikit-learn.org/stable/modules/generated/sklearn.metrics.silhouette_score.html#sklea ...

python sklearn库 rnn_如何使用Tensorflow计算RNN和LSTM模型的AUC并生成ROC曲线？

python sklearn库 rnn_如何使用Tensorflow计算RNN和LSTM模型的AUC并生成ROC曲线？相关推荐

最新文章

热门文章