我使用一个自定义的预定义函数trainDNN运行RNN和LSTM模型import tensorflow as tf

from tensorflow.contrib.layers import fully_connected

import h5py

import time

from sklearn.utils import shuffle

def trainDNN(path, n_days, n_features, n_neurons,

train_sequences, train_lengths, train_y,

test_sequences, test_y, test_lengths,

lstm=False, n_epochs=50, batch_size=256,

learning_rate=0.0003, TRAIN_REC=8, TEST_REC=8):

# we're doing binary classification

n_outputs = 2

# this is the initial learning rate

# adam optimzer decays the learning rate automatically

# learning_rate = 0.0001

#learning rate decay is determined by epsilon

epsilon = 0.001

# setup the graph

tf.reset_default_graph()

# inputs to the network

X = tf.placeholder(tf.float32, [None, n_days, n_features])

y = tf.placeholder(tf.int32, [None])

seq_length = tf.placeholder(tf.int32, [None])

# the network itself

cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) if lstm else tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32, sequence_length=seq_length)

logits = fully_connected(states[-1] if lstm else states, n_outputs)

# the training process (minimize loss) including the training operatin itself

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

loss = tf.reduce_mean(xentropy)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=epsilon)

training_op = optimizer.minimize(loss)

# hold onto the accuracy for the logwriter

correct = tf.nn.in_top_k(logits, y, 1)

accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# this saves the network for later querying

# currently only saves after all epochs are complete

# but we could for example save checkpoints on a

# regular basis

saver = tf.train.Saver()

# this is where we save the log files for tensorboard

now = int(time.time())

name = 'lstm' if lstm else 'rnn'

root_logdir = path+"tensorflow_logs/{}/{}-{}/".format(name.upper(), name, now)

train_logdir = "{}train".format(root_logdir)

eval_logdir = "{}eval".format(root_logdir)

print('train_logdir', train_logdir)

print('eval_logdir', eval_logdir)

# scalars that are written to the log files

loss_summary = tf.summary.scalar('loss', loss)

acc_summary = tf.summary.scalar('accuracy', accuracy)

# summary operation and writer for the training data

train_summary_op = tf.summary.merge([loss_summary, acc_summary])

train_writer = tf.summary.FileWriter(train_logdir, tf.get_default_graph())

# summary operation and writer for the validation data

eval_summary_op = tf.summary.merge([loss_summary, acc_summary])

eval_writer = tf.summary.FileWriter(eval_logdir, tf.get_default_graph())

# initialize variables

init = tf.global_variables_initializer()

n_batches = len(train_sequences) // batch_size

print(n_batches, 'batches of size', batch_size, n_epochs, 'epochs,', n_neurons, 'neurons')

with tf.Session() as sess:

# actually run the initialization

init.run()

start_time = time.time()

for epoch in range(n_epochs):

# at the beginning of each epoch, shuffle the training data

train_sequences, train_y, train_lengths = shuffle(train_sequences, train_y, train_lengths)

for iteration in range(n_batches):

# extract the batch of training data for this iteration

start = iteration*batch_size

end = start+batch_size

X_batch = train_sequences[start:end]

y_batch = train_y[start:end]

y_batch = y_batch.ravel()

seq_length_batch = train_lengths[start:end]

# every TRAIN_REC steps, save a summary of training accuracy & loss

if iteration % TRAIN_REC == 0:

train_summary_str = train_summary_op.eval(

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

step = epoch * n_batches + iteration

train_writer.add_summary(train_summary_str, step)

# without this flush, tensorboard isn't always current

train_writer.flush()

# every TEST_REC steps, save a summary of validation accuracy & loss

# TODO: this runs all validation data at once. if validation is

# sufficiently large, this will fail. better would be to either

# pick a random subset of validation data, or even better, run

# validation in multiple batches and save the validation accuracy

# & loss based on the aggregation of all of the validation batches.

if iteration % TEST_REC == 0:

summary_str = eval_summary_op.eval(

feed_dict = {X: test_sequences, y: test_y.ravel(), seq_length: test_lengths}

)

step = epoch * n_batches + iteration

eval_writer.add_summary(summary_str, step)

# without this flush, tensorboard isn't always current

eval_writer.flush()

# run training.

# this is where the network learns.

sess.run(

training_op,

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

# after every epoch, calculate the accuracy of the last seen training batch

acc_train = accuracy.eval(

feed_dict = {X: X_batch, y: y_batch, seq_length: seq_length_batch}

)

# after each epoch, calculate the accuracy of the test data

acc_test = accuracy.eval(

feed_dict = {X: test_sequences, y: test_y.ravel(), seq_length: test_lengths}

)

# print the training & validation accuracy to the console

print(epoch, time.strftime('%m/%d %H:%M:%S'), "Accuracy train:", acc_train, "test:", acc_test)

# save the model (for more training or inference) after all

# training is complete

save_path = saver.save(sess, root_logdir+"model_final.ckpt")

# close the writers

train_writer.close()

eval_writer.close()

log(["{}-{} model score".format(name.upper(), now), percent(acc_test)])

上述函数对时序数据训练RNN和LSTM模型,并输出二进制分类分数。训练和测试的分数都打印出来了,但是我想知道如何计算AUC并生成ROC曲线,以便对RNN和LSTM进行二元分类。在

更新:

我使用以下脚本评估了逻辑和预测:

^{pr2}$

这将返回me probs,它基本上是一个矩阵,行数等于测试用例数,2列包含2个二进制类中每个类的概率。predictions对象包含预测是否正确。

我对此持怀疑态度,因为ReLU函数的概率分数不像sigmoid函数的分数那样直观,因为它不再基于正预测和负预测的默认0.5截止值。相反,预测是基于哪个类的概率更大。真的可以从ReLu输出生成ROC曲线吗?在

python sklearn库 rnn_如何使用Tensorflow计算RNN和LSTM模型的AUC并生成ROC曲线?相关推荐

  1. Python: sklearn库——数据预处理

    Python: sklearn库 -- 数据预处理 数据集转换之预处理数据:       将输入的数据转化成机器学习算法可以使用的数据.包含特征提取和标准化.       原因:数据集的标准化(服从均 ...

  2. Python——sklearn库的安装

    Python--sklearn库的安装 做个笔记,时间久了,都忘了sklearn库的标准名字--scikit-learn.所以,不要用 pip install sklearn 应该是 pip inst ...

  3. 分类模型-评估指标(2):ROC曲线、 AUC值(ROC曲线下的面积)【只能用于二分类模型的评价】【不受类别数量不平衡的影响;不受阈值取值的影响】【AUC的计算方式:统计所有正负样本对中的正序对】

    评价二值分类器的指标很多,比如precision.recall.F1 score.P-R曲线等.但这些指标或多或少只能反映模型在某一方面的性能.相比而言,ROC曲线则有很多优点,经常作为评估二值分类器 ...

  4. 逻辑回归模型混淆矩阵评价与ROC曲线最佳阈值的好处(附Accuracy,TPR,FPR计算函数)

    一.得到阈值在0.5和0.8下模型的混淆矩阵 y_prob=result.predict(X_test)#得到概率值y_predict1=pd.DataFrame(y_prob>0.5).ast ...

  5. Python Sklearn库源码学习--kmeans

    前言: 分析体检数据希望不拘泥于Sklearn库中已有的聚类算法,想着改一下Kmeans算法.本着学习的目的,现在开始查看sklearn的源代码.希望能够写成一个通用的包. 有必要先交代一下我使用的p ...

  6. 使用FDDB人脸样本检测库,测试自己的人脸检测算法性能并生成ROC曲线。

    一,说明及环境 网上有关FDDB人脸检测库的使用以及ROC文件生成的文章太少,并且都无法检测opencv中自带的人脸检测算法.最近 工作的原因,需要用到FDDB库检测我们自己的人脸检测算法性能.所以认 ...

  7. 【Python 标准库学习】数据科学计算库 — math

    欢迎加入 Python 官方文档翻译团队:https://www.transifex.com/python-doc/ math 模块官方文档:https://docs.python.org/3/lib ...

  8. python扩展库不是用于科学计算的有_有哪些科学计算的开源库

    2017-03-19 回答 比较火爆的android开源库有以下: 1.volley 主要有以下模块: (1) json,图像等的异步下载: (2) 网络请求的排序(scheduling) (3) 网 ...

  9. python sklearn库silhouette_score轮廓系数接口调用

    官方接口说明:https://scikit-learn.org/stable/modules/generated/sklearn.metrics.silhouette_score.html#sklea ...

最新文章

  1. 谷歌新一轮裁员,云计算部门 50 人首当其冲
  2. 兰州大学C语言程序设计课程作业,兰州大学C语言程序设计课程作业1附答案.doc...
  3. arraylist如何检测某一元素是否为空_java学习笔记:【ArrayList集合】
  4. e-mobile帐号状态存在异常_Java 常见异常种类
  5. 罗永浩回应被列入老赖名单:就算“卖艺”也会把债务还完
  6. SQL 導入導出大全
  7. windows 10 安装 spark 环境(spark 2.2.1 + hadoop2.7)
  8. Redis,唯快不破!
  9. MQL5中的错误处理和日志记录
  10. Phoenix升级:Error: Cluster is being concurrently upgraded from 4.7.x to 4.8.x.
  11. 读后感—肿瘤基因检测行业会好吗
  12. Python模拟数据生成器全代码
  13. python+pygame Hopscotch小游戏
  14. “双花”问题及解决之道
  15. qq群排名如何引流?QQ群排名引流方法,QQ群排名如何做?
  16. Ros学习笔记(一)Ros中HelloWorld实现(C++/Python)
  17. 关于硬盘不可不知的基础知识-硬盘开盘修复
  18. eLife:情绪学习对人脑记忆整合的回溯性促进机制
  19. matlab示波器绘图工具,快速绘制Simulink示波器波形
  20. 从单机到2000万 QPS 并发的 Redis 高性能缓存实践之路

热门文章

  1. 【算法】平衡二叉树 Avl 树
  2. 【IDEA】idea es 报错 Cause: invalid type code: 2D
  3. 【Elasticsearch】Limit of total fields [1000] in index [xxxxxx_index] has been exceeded
  4. 【Linux】Linux查看机器负载-IO负载
  5. 【Mac】Mac下SSH免密登录localhost
  6. MySQL :JDBC连接MySQL报错Unknown system variable 'query_cache_size'
  7. 数据结构知识点大汇总(六)
  8. 学习ssm框架的顺序
  9. C语言分支语句与循环语句较为全面的总结,快来看呀!!
  10. 基于CAS实现SSO单点登录