本部分基于paper:Convolutional Neural Networks for Sentence Classification









对每个卷积结果CiCiCi进行max-over-time pooling处理,即取每一组卷积结果的最大值。每一组卷积结果最终处理得到一个单个数字。将池化处理得到的结果进行堆叠,获得一个111×\times×mmm的向量。





import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as Fdtype = torch.FloatTensor# Text-CNN Parameter
embedding_size = 2    # 词向量是二维的
sequence_length = 3   # 句子的长度
num_classes = 2       # 最终对句子分类,共有两类
filter_sizes = [2, 2, 2]  # 最大filter的形状
num_filters = 3       # filter的个数# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.
word_list = " ".join(sentences).split() # 先用" ".join(),以空格为分隔,将sentences中的句子连接起来,再用split()以空格为分割点,将每个词分出来
word_list = list(set(word_list))        # 先用set合并重复的单词,再用list创建单词列表
word_dict = {w: i for i, w in enumerate(word_list)}     # 建立单词到序号的索引
vocab_size = len(word_dict)                             # 词典中单词的数量inputs = []               # 创建输入空列表
for sen in sentences:     # 将每句话的单词的序号组合成一个数组,加入到输入列表inputs中inputs.append(np.asarray([word_dict[n] for n in sen.split()]))
targets = []              # 创建标签空列表
for out in labels:        # 将每句话的标签加入标签空列表中targets.append(out)   # To using Torch Softmax Loss functioninput_batch = Variable(torch.LongTensor(inputs))         #转换成variable形式
target_batch = Variable(torch.LongTensor(targets))class TextCNN(nn.Module):def __init__(self):super(TextCNN, self).__init__()self.num_filters_total = num_filters * len(filter_sizes)    # 一个维数,方便后面的权重矩阵确定维数self.W = nn.Parameter(torch.empty(vocab_size, embedding_size).uniform_(-1, 1)).type(dtype)  # 词向量self.Weight = nn.Parameter(torch.empty(self.num_filters_total, num_classes).uniform_(-1, 1)).type(dtype) # 全连接层权重self.Bias = nn.Parameter(0.1 * torch.ones([num_classes])).type(dtype)  # 全连接层偏置值def forward(self, X):embedded_chars = self.W[X] # 形状是[batch_size, sequence_length, embedding_size]embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]pooled_outputs = []for filter_size in filter_sizes:     # 用不同大小的卷积核进行卷积计算,这里不同卷积核的宽都是2,就height变化# 对输入的句子矩阵embedded_chars卷积# conv2d的参数 : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]conv = nn.Conv2d(1, num_filters, (filter_size, embedding_size), bias=True)(embedded_chars) # 3个filter,故输出通道数为3h = F.relu(conv)    # 激活函数relu# mp : ((filter_height, filter_width))mp = nn.MaxPool2d((sequence_length - filter_size + 1, 1))   # 最大池化# pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]pooled = mp(h).permute(0, 3, 2, 1)       # 重新排列pooled_outputs.append(pooled)            # 将使用某个大小的卷积核计算出的结果添加到outputs中# 使用torch.cat函数,在len(filter_sizes)的维度上将outputs中的张量进行堆叠h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]# 重新排列h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]# 经过全连接层线性计算model = torch.mm(h_pool_flat, self.Weight) + self.Bias # [batch_size, num_classes]return modelmodel = TextCNN()
criterion = nn.CrossEntropyLoss()                     # 损失函数使用交叉熵损失
optimizer = optim.Adam(model.parameters(), lr=0.001)  # 优化方法采用Adam# Training
for epoch in range(5000):optimizer.zero_grad()              # 每次训练前清零梯度缓存output = model(input_batch)        # 输入input_batch,从模型中获得输出# output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)loss = criterion(output, target_batch)  # 计算lossif (epoch + 1) % 1000 == 0:             # 每1000次打印一次训练情况print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))loss.backward()    # 反向传播optimizer.step()   # 优化、更新参数# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = Variable(torch.LongTensor(tests))
# Predict
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:print(test_text,"is Bad Mean...")
else:print(test_text,"is Good Mean!!")


