TensorFlow练习25: 使用深度学习做阅读理解+完形填空
记的在学生时代,英语考试有这么一种类型的题,叫:阅读理解。首先让你读一段洋文材料,然后回答一些基于这个洋文材料提的问题。
我先给你出一道阅读理解
Big Panda learned to code when he was 21. He live in China and have no life, feel like a big loser. But here is one thing Panda want you to remember…it´s never too late! You can do anything if you put your heart on it!
____ is the loser.(下划线处该填什么呢?)
我出的这道填空题,对人来说轻而易举,但是要让机器回答就很难了。机器阅读和理解人类语言是非常有挑战性的。
本帖就使用TensorFlow练习一个阅读理解,看看准确率能到什么程度。
使用的数据集
- https://research.fb.com/projects/babi/
- http://cs.nyu.edu/~kcho/DMQA/
本帖只使用”非死不可”提供的《Children’s Book Test》数据集。
数据预处理
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
import re
import random
import ast
import itertools
import pickle
import numpy as np
train_data_file = './CBTest/data/cbtest_NE_train.txt'
valid_data_file = './CBTest/data/cbtest_NE_valid_2000ex.txt'
def preprocess_data(data_file, out_file):
# stories[x][0] tories[x][1] tories[x][2]
stories = []
with open(data_file) as f:
story = []
for line in f:
line = line.strip()
if not line:
story = []
else:
_, line = line.split(' ', 1)
if line:
if '\t' in line:
q, a, _, answers = line.split('\t')
# tokenize
q = [s.strip() for s in re.split('(\W+)+', q) if s.strip()]
stories.append((story, q, a))
else:
line = [s.strip() for s in re.split('(\W+)+', line) if s.strip()]
story.append(line)
samples = []
for story in stories:
story_tmp = []
content = []
for c in story[0]:
content += c
story_tmp.append(content)
story_tmp.append(story[1])
story_tmp.append(story[2])
samples.append(story_tmp)
random.shuffle(samples)
print(len(samples))
with open(out_file, "w") as f:
for sample in samples:
f.write(str(sample))
f.write('\n')
preprocess_data(train_data_file, 'train.data')
preprocess_data(valid_data_file, 'valid.data')
# 创建词汇表
def read_data(data_file):
stories = []
with open(data_file) as f:
for line in f:
line = ast.literal_eval(line.strip())
stories.append(line)
return stories
stories = read_data('train.data') + read_data('valid.data')
content_length = max([len(s) for s, _, _ in stories])
question_length = max([len(q) for _, q, _ in stories])
print(content_length, question_length)
vocab = sorted(set(itertools.chain(*(story + q + [answer] for story, q, answer in stories))))
vocab_size = len(vocab) + 1
print(vocab_size)
word2idx = dict((w, i + 1) for i,w in enumerate(vocab))
pickle.dump((word2idx, content_length, question_length, vocab_size), open('vocab.data', "wb"))
# From keras 补齐
def pad_sequences(sequences, maxlen=None, dtype='int32',
padding='post', truncating='post', value=0.):
lengths = [len(s) for s in sequences]
nb_samples = len(sequences)
if maxlen is None:
maxlen = np.max(lengths)
# take the sample shape from the first non empty sequence
# checking for consistency in the main loop below.
sample_shape = tuple()
for s in sequences:
if len(s) > 0:
sample_shape = np.asarray(s).shape[1:]
break
x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
for idx, s in enumerate(sequences):
if len(s) == 0:
continue# empty list was found
if truncating == 'pre':
trunc = s[-maxlen:]
elif truncating == 'post':
trunc = s[:maxlen]
else:
raise ValueError('Truncating type "%s" not understood' % truncating)
# check `trunc` has expected shape
trunc = np.asarray(trunc, dtype=dtype)
if trunc.shape[1:] != sample_shape:
raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
(trunc.shape[1:], idx, sample_shape))
if padding == 'post':
x[idx, :len(trunc)] = trunc
elif padding == 'pre':
x[idx, -len(trunc):] = trunc
else:
raise ValueError('Padding type "%s" not understood' % padding)
return x
# 转为向量
def to_vector(data_file, output_file):
word2idx, content_length, question_length, _ = pickle.load(open('vocab.data', "rb"))
X = []
Q = []
A = []
with open(data_file) as f_i:
for line in f_i:
line = ast.literal_eval(line.strip())
x = [word2idx[w] for w in line[0]]
q = [word2idx[w] for w in line[1]]
a = [word2idx[line[2]]]
X.append(x)
Q.append(q)
A.append(a)
X = pad_sequences(X, content_length)
Q = pad_sequences(Q, question_length)
with open(output_file, "w") as f_o:
for i in range(len(X)):
f_o.write(str([X[i].tolist(), Q[i].tolist(), A[i]]))
f_o.write('\n')
to_vector('train.data', 'train.vec')
to_vector('valid.data', 'valid.vec')
"""
# to_word
word2idx, content_length, question_length, _ = pickle.load(open('vocab.data', "rb"))
def get_value(dic,value):
for name in dic:
if dic[name] == value:
return name
with open('train.vec') as f:
for line in f:
line = ast.literal_eval(line.strip())
for word in line[0]:
print(get_value(word2idx, word))
"""
|
生成的文件:vocab.data词汇表、train.vec、valid.vec数据的向量表示。
训练
![TensorFlow练习25: 使用深度学习做阅读理解+完形填空](http://blog.topspeedsnail.com/wp-content/uploads/2017/01/%E5%B1%8F%E5%B9%95%E5%BF%AB%E7%85%A7-2017-01-13-%E4%B8%8B%E5%8D%889.39.59.png)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
import tensorflow as tf
import pickle
import numpy as np
import ast
from collections import defaultdict
train_data = 'train.vec'
valid_data = 'valid.vec'
word2idx, content_length, question_length, vocab_size = pickle.load(open('vocab.data', "rb"))
print(content_length, question_length, vocab_size)
batch_size = 64
train_file = open(train_data)
def get_next_batch():
X = []
Q = []
A = []
for i in range(batch_size):
for line in train_file:
line = ast.literal_eval(line.strip())
X.append(line[0])
Q.append(line[1])
A.append(line[2][0])
break
if len(X) == batch_size:
return X, Q, A
else:
train_file.seek(0)
return get_next_batch()
def get_test_batch():
with open(valid_data) as f:
X = []
Q = []
A = []
for line in f:
line = ast.literal_eval(line.strip())
X.append(line[0])
Q.append(line[1])
A.append(line[2][0])
return X, Q, A
X = tf.placeholder(tf.int32, [batch_size, content_length]) # 洋文材料
Q = tf.placeholder(tf.int32, [batch_size, question_length])# 问题
A = tf.placeholder(tf.int32, [batch_size]) # 答案
# drop out
keep_prob = tf.placeholder(tf.float32)
def glimpse(weights, bias, encodings, inputs):
weights = tf.nn.dropout(weights, keep_prob)
inputs = tf.nn.dropout(inputs, keep_prob)
attention = tf.transpose(tf.matmul(weights, tf.transpose(inputs)) + bias)
attention = tf.batch_matmul(encodings, tf.expand_dims(attention, -1))
attention = tf.nn.softmax(tf.squeeze(attention, -1))
return attention, tf.reduce_sum(tf.expand_dims(attention, -1) * encodings, 1)
def neural_attention(embedding_dim=384, encoding_dim=128):
embeddings = tf.Variable(tf.random_normal([vocab_size, embedding_dim], stddev=0.22), dtype=tf.float32)
tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), [embeddings])
with tf.variable_scope('encode'):
with tf.variable_scope('X'):
X_lens = tf.reduce_sum(tf.sign(tf.abs(X)), 1)
embedded_X = tf.nn.embedding_lookup(embeddings, X)
encoded_X = tf.nn.dropout(embedded_X, keep_prob)
gru_cell = tf.nn.rnn_cell.GRUCell(encoding_dim)
outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_X, sequence_length=X_lens, dtype=tf.float32, swap_memory=True)
encoded_X = tf.concat(2, outputs)
with tf.variable_scope('Q'):
Q_lens = tf.reduce_sum(tf.sign(tf.abs(Q)), 1)
embedded_Q = tf.nn.embedding_lookup(embeddings, Q)
encoded_Q = tf.nn.dropout(embedded_Q, keep_prob)
gru_cell = tf.nn.rnn_cell.GRUCell(encoding_dim)
outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_Q, sequence_length=Q_lens, dtype=tf.float32, swap_memory=True)
encoded_Q = tf.concat(2, outputs)
W_q = tf.Variable(tf.random_normal([2*encoding_dim, 4*encoding_dim], stddev=0.22), dtype=tf.float32)
b_q = tf.Variable(tf.random_normal([2*encoding_dim, 1], stddev=0.22), dtype=tf.float32)
W_d = tf.Variable(tf.random_normal([2*encoding_dim, 6*encoding_dim], stddev=0.22), dtype=tf.float32)
b_d = tf.Variable(tf.random_normal([2*encoding_dim, 1], stddev=0.22), dtype=tf.float32)
g_q = tf.Variable(tf.random_normal([10*encoding_dim, 2*encoding_dim], stddev=0.22), dtype=tf.float32)
g_d = tf.Variable(tf.random_normal([10*encoding_dim, 2*encoding_dim], stddev=0.22), dtype=tf.float32)
with tf.variable_scope('attend') as scope:
infer_gru = tf.nn.rnn_cell.GRUCell(4*encoding_dim)
infer_state = infer_gru.zero_state(batch_size, tf.float32)
for iter_step in range(8):
if iter_step > 0:
scope.reuse_variables()
_, q_glimpse = glimpse(W_q, b_q, encoded_Q, infer_state)
d_attention, d_glimpse = glimpse(W_d, b_d, encoded_X, tf.concat_v2([infer_state, q_glimpse], 1))
gate_concat = tf.concat_v2([infer_state, q_glimpse, d_glimpse, q_glimpse * d_glimpse], 1)
r_d = tf.sigmoid(tf.matmul(gate_concat, g_d))
r_d = tf.nn.dropout(r_d, keep_prob)
r_q = tf.sigmoid(tf.matmul(gate_concat, g_q))
r_q = tf.nn.dropout(r_q, keep_prob)
combined_gated_glimpse = tf.concat_v2([r_q * q_glimpse, r_d * d_glimpse], 1)
_, infer_state = infer_gru(combined_gated_glimpse, infer_state)
return tf.to_float(tf.sign(tf.abs(X))) * d_attention
def train_neural_attention():
X_attentions = neural_attention()
loss = -tf.reduce_mean(tf.log(tf.reduce_sum(tf.to_float(tf.equal(tf.expand_dims(A, -1), X)) * X_attentions, 1) + tf.constant(0.00001)))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
grads_and_vars = optimizer.compute_gradients(loss)
capped_grads_and_vars = [(tf.clip_by_norm(g, 5), v) for g,v in grads_and_vars]
train_op = optimizer.apply_gradients(capped_grads_and_vars)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# writer = tf.summary.FileWriter()
# 恢复前一次训练
ckpt = tf.train.get_checkpoint_state('.')
if ckpt != None:
print(ckpt.model_checkpoint_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
print("没找到模型")
for step in range(20000):
train_x, train_q, train_a = get_next_batch()
loss_, _ = sess.run([loss, train_op], feed_dict={X:train_x, Q:train_q, A:train_a, keep_prob:0.7})
print(loss_)
# 保存模型并计算准确率
if step % 1000 == 0:
path = saver.save(sess, 'machine_reading.model', global_step=step)
print(path)
test_x, test_q, test_a = get_test_batch()
test_x, test_q, test_a = np.array(test_x[:batch_size]), np.array(test_q[:batch_size]), np.array(test_a[:batch_size])
attentions = sess.run(X_attentions, feed_dict={X:test_x, Q:test_q, keep_prob:1.})
correct_count = 0
for x in range(test_x.shape[0]):
probs = defaultdict(int)
for idx, word in enumerate(test_x[x,:]):
probs[word] += attentions[x, idx]
guess = max(probs, key=probs.get)
if guess == test_a[x]:
correct_count += 1
print(correct_count / test_x.shape[0])
train_neural_attention()
|
我只想说,这个东西比我水平高!至少在速度上能甩我十条大街。
- Attention-over-Attention Neural Networks for Reading Comprehension
- Iterative Alternating Neural Attention for Machine Reading
- TensorFlow练习13: 制作一个简单的聊天机器人
如要转载,请保持本文完整,并注明作者@斗大的熊猫和本文原始地址:http://blog.topspeedsnail.com/archives/11062
TensorFlow练习25: 使用深度学习做阅读理解+完形填空相关推荐
- 全国大学生英语竞赛培训:听力、词汇语法、完形填空、阅读理解、翻译、改错、IQ题、大小作文、语法等部分快速得分、备战策略和获奖技巧讲解!
目 录 大学生英语竞赛培训--介绍.学习网址 PPT课件.视频--下载 [2020-04-05]P1 如何准备大学生英语竞赛才能拿奖?2:27:54[竞赛简介vs赛前准备] [2020-04-11 ...
- TensorFlow领衔,七大深度学习框架大对比!
作者|黄文坚 唐源 编辑|小智 TensorFlow 在 2015 年年底一出现就受到了极大的关注,在一个月内获得了 GitHub上超过一万颗星的关注,目前在所有的机器学习.深度学习项目中排名第一,甚 ...
- 如何用深度学习做自然语言处理?这里有份最佳实践清单
如何用深度学习做自然语言处理?这里有份最佳实践清单 By 机器之心2017年7月26日 14:16 对于如何使用深度学习进行自然语言处理,本文作者 Sebastian Ruder 给出了一份详细的最佳 ...
- Tensorflow【实战Google深度学习框架】用卷积神经网络打造图片识别应用
文章目录 1 Tensorflow model 2 卷积神经网络的基础单元 2.1 卷积 2.2 激活函数 2.3 池化 2.4 批归一化 2.5 Dropout 3 主流的25个深度学习模型 4 训 ...
- 用深度学习做命名实体识别(五)-模型使用
通过本文,你将了解如何基于训练好的模型,来编写一个rest风格的命名实体提取接口,传入一个句子,接口会提取出句子中的人名.地址.组织.公司.产品.时间信息并返回. 核心模块entity_extract ...
- 用深度学习做命名实体识别(四)——模型训练
通过本文你将了解如何训练一个人名.地址.组织.公司.产品.时间,共6个实体的命名实体识别模型. 准备训练样本 下面的链接中提供了已经用brat标注好的数据文件以及brat的配置文件,因为标注内容较多放 ...
- 手把手教你用深度学习做物体检测(四):模型使用
上一篇<手把手教你用深度学习做物体检测(三):模型训练>中介绍了如何使用yolov3训练我们自己的物体检测模型,本篇文章将重点介绍如何使用我们训练好的模型来检测图片或视频中的物体. 如 ...
- 深度学习论文阅读目标检测篇(一):R-CNN《Rich feature hierarchies for accurate object detection and semantic...》
深度学习论文阅读目标检测篇(一):R-CNN<Rich feature hierarchies for accurate object detection and semantic segmen ...
- 深度学习论文阅读目标检测篇(三):Faster R-CNN《 Towards Real-Time Object Detection with Region Proposal Networks》
深度学习论文阅读目标检测篇(三):Faster R-CNN< Towards Real-Time Object Detection with Region Proposal Networks&g ...
最新文章
- python课程水平测试成绩查询_学业水平测试成绩查询
- 红帽虚拟化RHEV3.2创建虚拟机(图文Step by Step)
- C++ STL 算法精选之查找篇
- redis 获取服务器信息,StringRedisTemplate获取redis信息
- python如何进行大到小排序_Python3基础 list sort 从小(大)到大(小)排序
- 在vs2012下编译出现Msvcp120d.dll 丢失的问题
- python和excel的结合新软件_Python处理Excel模块的对比分析!
- FLAT:中文NER屠榜之作!
- OPPO全球营销总裁沈义人宣布卸任,网友:第二天宣布入职小米?
- 开门红讨采头,开工喜庆红色PSD分层海报模板
- 服务器ip算是虚拟资产吗,云服务器算资产吗
- 十三、this关键字
- GhostNet论文
- 电脑监控软件应该怎样安装?安装简单吗?
- 体重测试仪软件,一键校准身高体重测量仪
- android ftp播放器,超强本地播放器一款支持samba、FTP/Windows共享服务-简单不折腾...
- codeigniter3 全面集成 phpunit
- 浏览器劫持事件处置(麻辣香锅)
- Spartan6系列之SelectIO---IOB深入详解
- Android 控件右上角角标的实现方案
热门文章
- 电力电子技术第五版王兆安pdf_电力电子技术笔记(考试必备)
- php 检测服务器网速_php测试用户网速
- dpo指标详解买入绝技_巴菲特点破A股市场:5年前5178点买入5万元上峰水泥股票,持有到现在会有怎样的收益?从贫穷到富有...
- mysql sql 语句事务_MySQL: 3、SQL语言 ②约束、事务
- 对Julia社区不熟悉?创始人来告诉你
- Python 列表 insert() 方法
- Oracle PL/SQL的安装
- java 算法--洗牌算法
- HDUOJ-------单词数
- 谈谈WEB开发中的苦大难字符集问题