pytorch RNN原理实现词性判别以及预测下一个词

卷积神经网络利用卷积核的方式来共享参数，使得参数量大大降低的同时还可以利用空间信息，但是对有先后顺序有关的数据就没多大优势

当改变位置信息后还是原来的数据，不会有变换，就比如一句话，我喜欢你，你喜欢我，虽然一样多的词的，但是表达意思就是不一样，所以产生RNN

时间序列

是指将同一统计指标的数值按其发生的时间先后顺序排列而成的数列。时间序列分析的主要目的是根据已有的历史数据对未来进行预测。

在时间序列问题上，观察值具有时间先后的特征，历史数据可以影响未来数据的表达，因此需要网
络具有记忆能力

最开始有自回归模型，将所有数据的前面数据全部拿到，但是参数过多且运算麻烦，

之后产生 N-gram 语言模型：使用固定宽度的窗口

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Yni789FS-1619622800219)(attachment:image.png)]

缺点：受限于窗口的宽度，N并不能取很大的值，会出现长期依赖缺失的问题。

一般我们会通过他在法国，推测出他学会说法语，但是时间跨度太大，数据容易丢失

RNN参数

torch.nn.RNN(input_size, hidden_size, num_layers)

必选参数 input_size，指定输入序列中单个样本的尺寸大小，例如可能用一个 1000 长度的向量表示一个单词，则 input_size=1000
必选参数 hidden_size，指的是隐藏层中输出特征的大小
必选参数 num_layers，指的是纵向的隐藏层个数，一般设置为 1~10，default=1

pytorch 实现词性判别

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

#定义训练数据
training_data = [("The cat ate the fish".split(), ["DET", "NN", "V", "DET", "NN"]),("They read that book".split(), ["NN", "V", "DET", "NN"])
]
#定义测试数据
testing_data=[("They ate the fish".split())]

testing_data

[['They', 'ate', 'the', 'fish']]

word_to_ix = {} # 单词的索引字典
for sent, tags in training_data:for word in sent:if word not in word_to_ix:word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

{'The': 0, 'cat': 1, 'ate': 2, 'the': 3, 'fish': 4, 'They': 5, 'read': 6, 'that': 7, 'book': 8}

tag_to_ix = {"DET": 0, "NN": 1,'V':2} # 手工设定词性标签数据字典

构建网络

class LSTMTagger(nn.Module):def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):super(LSTMTagger, self).__init__()self.hidden_dim = hidden_dimself.word_embeddings = nn.Embedding(vocab_size, embedding_dim)self.lstm = nn.LSTM(embedding_dim, hidden_dim)self.hidden2tag = nn.Linear(hidden_dim, tagset_size)self.hidden = self.init_hidden()#初始化隐含状态State及Cdef init_hidden(self):return (torch.zeros(1, 1, self.hidden_dim),torch.zeros(1, 1, self.hidden_dim))def forward(self, sentence):#获得词嵌入矩阵embedsembeds = self.word_embeddings(sentence)   #按lstm格式，修改embeds的形状lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)#修改隐含状态的形状，作为全连接层的输入tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))#计算每个单词属于各词性的概率tag_scores = F.log_softmax(tag_space,dim=1)return tag_scores

def prepare_sequence(seq, to_ix):idxs = [to_ix[w] for w in seq]tensor = torch.LongTensor(idxs)return tensor

len(tag_to_ix)

EMBEDDING_DIM=10
HIDDEN_DIM=3  #这里等于词性个数model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

inputs = prepare_sequence(training_data[0][0], word_to_ix)
tag_scores = model(inputs)
print(training_data[0][0])
print(inputs)
print(tag_scores)
print(torch.max(tag_scores,1))

['The', 'cat', 'ate', 'the', 'fish']
tensor([0, 1, 2, 3, 4])
tensor([[-1.2455, -0.7574, -1.4134],[-1.2678, -0.7575, -1.3875],[-1.0882, -0.9860, -1.2375],[-1.1371, -0.9226, -1.2667],[-1.2417, -0.7801, -1.3754]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.7574, -0.7575, -0.9860, -0.9226, -0.7801], grad_fn=<MaxBackward0>),
indices=tensor([1, 1, 1, 1, 1]))

for epoch in range(4): # 我们要训练400次。for sentence, tags in training_data:
# 清除网络先前的梯度值model.zero_grad()
# 重新初始化隐藏层数据model.hidden = model.init_hidden()
# 按网络要求的格式处理输入数据和真实标签数据sentence_in = prepare_sequence(sentence, word_to_ix)targets = prepare_sequence(tags, tag_to_ix)
# 实例化模型tag_scores = model(sentence_in)
# 计算损失，反向传递梯度及更新模型参数loss = loss_function(tag_scores, targets)loss.backward()optimizer.step()# 查看模型训练的结果
inputs = prepare_sequence(training_data[0][0], word_to_ix)
tag_scores = model(inputs)
print(training_data[0][0])
print(tag_scores)
print(torch.max(tag_scores,1))

['The', 'cat', 'ate', 'the', 'fish']
tensor([[-1.2365, -0.7653, -1.4090],[-1.2602, -0.7555, -1.3997],[-1.0703, -0.9892, -1.2545],[-1.1131, -0.9252, -1.2910],[-1.2313, -0.7583, -1.4287]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.7653, -0.7555, -0.9892, -0.9252, -0.7583], grad_fn=<MaxBackward0>),
indices=tensor([1, 1, 1, 1, 1]))

test_inputs = prepare_sequence(testing_data[0], word_to_ix)
tag_scores01 = model(test_inputs)
print(testing_data[0])
print(test_inputs)
print(tag_scores01)
print(torch.max(tag_scores01,1))

['They', 'ate', 'the', 'fish']
tensor([5, 2, 3, 4])
tensor([[-1.3469, -0.5845, -1.7006],[-1.0754, -0.9459, -1.3075],[-1.1031, -0.9052, -1.3329],[-1.2269, -0.7529, -1.4447]], grad_fn=<LogSoftmaxBackward>)
torch.return_types.max(
values=tensor([-0.5845, -0.9459, -0.9052, -0.7529], grad_fn=<MaxBackward0>),
indices=tensor([1, 1, 1, 1]))

RNN 实现预测一句话的下一个词

'''code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor
'''
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Datadtype = torch.FloatTensorsentences = [ "i like dog", "i love coffee", "i hate milk"]word_list = " ".join(sentences).split()
vocab = list(set(word_list))
word2idx = {w: i for i, w in enumerate(vocab)}
idx2word = {i: w for i, w in enumerate(vocab)}
n_class = len(vocab)# TextRNN Parameter
batch_size = 2
n_step = 2 # number of cells(= number of Step)
n_hidden = 5 # number of hidden units in one celldef make_data(sentences):input_batch = []target_batch = []for sen in sentences:word = sen.split()input = [word2idx[n] for n in word[:-1]]target = word2idx[word[-1]]input_batch.append(np.eye(n_class)[input])target_batch.append(target)return input_batch, target_batchinput_batch, target_batch = make_data(sentences)
input_batch, target_batch = torch.Tensor(input_batch), torch.LongTensor(target_batch)
dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, batch_size, True)class TextRNN(nn.Module):def __init__(self):super(TextRNN, self).__init__()self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)# fcself.fc = nn.Linear(n_hidden, n_class)def forward(self, hidden, X):# X: [batch_size, n_step, n_class]X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]out, hidden = self.rnn(X, hidden)# out : [n_step, batch_size, num_directions(=1) * n_hidden]# hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]out = out[-1] # [batch_size, num_directions(=1) * n_hidden] ⭐model = self.fc(out)return modelmodel = TextRNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)# Training
for epoch in range(500):for x, y in loader:# hidden : [num_layers * num_directions, batch, hidden_size]hidden = torch.zeros(1, x.shape[0], n_hidden)# x : [batch_size, n_step, n_class]pred = model(hidden, x)# pred : [batch_size, n_class], y : [batch_size] (LongTensor, not one-hot)loss = criterion(pred, y)if (epoch + 1) % 100 == 0:print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))optimizer.zero_grad()loss.backward()optimizer.step()input = [sen.split()[:2] for sen in sentences]
# Predict
hidden = torch.zeros(1, len(input), n_hidden)
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [idx2word[n.item()] for n in predict.squeeze()])

Epoch: 0100 cost = 1.591890
Epoch: 0100 cost = 1.145270
Epoch: 0200 cost = 1.079182
Epoch: 0200 cost = 0.912082
Epoch: 0300 cost = 0.791074
Epoch: 0300 cost = 0.866361
Epoch: 0400 cost = 0.604370
Epoch: 0400 cost = 0.703954
Epoch: 0500 cost = 0.458180
Epoch: 0500 cost = 0.529244
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']