
  • 1.GRU的原理
  • 2.GRU实战
    • 1.layer
    • 2.cell




import osos.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layerstf.random.set_seed(22)
assert tf.__version__.startswith('2.')batchsz = 128# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)class MyRNN(keras.Model):def __init__(self, units):super(MyRNN, self).__init__()# transform text to embedding representation# [b, 80] => [b, 80, 100]self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)# [b, 80, 100] , h_dim: 64self.rnn = keras.Sequential([# layers.SimpleRNN(units, dropout=0.5, return_sequences=True, unroll=True),# layers.SimpleRNN(units, dropout=0.5, unroll=True)# unroll: Boolean (default False). If True, the network will be unrolled,# else a symbolic loop will be used.# Unrolling can speed-up a RNN, although it tends to be more memory-intensive.# Unrolling is only suitable for short sequences.layers.GRU(units, dropout=0.5, return_sequences=True, unroll=False),layers.GRU(units, dropout=0.5, unroll=False)])# fc, [b, 80, 100] => [b, 64] => [b, 1]self.outlayer = layers.Dense(1)def call(self, inputs, training=None):"""net(x) net(x, training=True) :train modenet(x, training=False): test:param inputs: [b, 80]:param training::return:"""# [b, 80]x = inputs# embedding: [b, 80] => [b, 80, 100]x = self.embedding(x)# rnn cell compute# x: [b, 80, 100] => [b, 64]x = self.rnn(x)# out: [b, 64] => [b, 1]x = self.outlayer(x)# p(y is pos|x)prob = tf.sigmoid(x)return probdef main():units = 64epochs = 4import timet0 = time.time()model = MyRNN(units)model.compile(optimizer=keras.optimizers.Adam(0.001),loss=tf.losses.BinaryCrossentropy(),metrics=['accuracy'],experimental_run_tf_function=False)model.fit(db_train, epochs=epochs, validation_data=db_test)model.evaluate(db_test)t1 = time.time()# Unroll=True# LSTM: 69.3 secnods, 83%# GRU: 100 seconds, 83.4%# Unroll=False# LSTM:23.71, 81.24# GRU 23.05, 83.11# 观测结果与官方文档不一致print('total time cost:', t1 - t0)if __name__ == '__main__':main()


import osos.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layerstf.random.set_seed(22)
assert tf.__version__.startswith('2.')batchsz = 128# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)class MyRNN(keras.Model):def __init__(self, units):super(MyRNN, self).__init__()# [b, 64]self.state0 = [tf.zeros([batchsz, units])]self.state1 = [tf.zeros([batchsz, units])]# transform text to embedding representation# [b, 80] => [b, 80, 100]self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)# [b, 80, 100] , h_dim: 64# RNN: cell1 ,cell2, cell3# SimpleRNN# self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)# self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)self.rnn_cell0 = layers.GRUCell(units, dropout=0.5)self.rnn_cell1 = layers.GRUCell(units, dropout=0.5)# fc, [b, 80, 100] => [b, 64] => [b, 1]self.outlayer = layers.Dense(1)def call(self, inputs, training=None):"""net(x) net(x, training=True) :train modenet(x, training=False): test:param inputs: [b, 80]:param training::return:"""# [b, 80]x = inputs# embedding: [b, 80] => [b, 80, 100]x = self.embedding(x)# rnn cell compute# [b, 80, 100] => [b, 64]state0 = self.state0state1 = self.state1for word in tf.unstack(x, axis=1):  # word: [b, 100]# h1 = x*wxh+h0*whh# out0: [b, 64]out0, state0 = self.rnn_cell0(word, state0, training)# out1: [b, 64]out1, state1 = self.rnn_cell1(out0, state1, training)# out: [b, 64] => [b, 1]x = self.outlayer(out1)# p(y is pos|x)prob = tf.sigmoid(x)return probdef main():units = 64epochs = 4import timet0 = time.time()model = MyRNN(units)model.compile(optimizer=keras.optimizers.Adam(0.001),loss=tf.losses.BinaryCrossentropy(),metrics=['accuracy'],experimental_run_tf_function=False)model.fit(db_train, epochs=epochs, validation_data=db_test)model.evaluate(db_test)t1 = time.time()# LSTM: 64.3 seconds, 83.4%# GRU:  96.7s, 83.4%print('total time cost:', t1 - t0)if __name__ == '__main__':main()


