Tensorflow rnn-word2vec-电影评论

import pandas as pd
import re
import numpy as np
import os
复制代码

from gensim.models import word2vec
复制代码

data_t = pd.read_csv('labeledTrainData.tsv',sep='\t')复制代码

data_t.shape
复制代码

(25000, 3)
复制代码

if not os.path.exists('mymodel'):if not os.path.exists('imdb_text'):data_un = pd.read_csv('unlabeledTrainData.tsv',header=0, delimiter="\t",quoting=3 )pat = re.compile(r'[A-Za-z]+')with open('imdb_text','a',encoding = 'utf-8') as f:for rev in data_un.review:str_list = pat.findall(rev)str_list = [x.lower() for x in str_list]string = ' '.join(str_list)f.write(string + '\n')del data_unsentences =word2vec.Text8Corpus("imdb_text")  # 加载语料  model =word2vec.Word2Vec(sentences, size=50)  #训练skip-gram模型，默认window=5 model.save('mymodel')
else:model = word2vec.Word2Vec.load('mymodel')word_vectors = model.wv
del model
复制代码

word_vectors复制代码

<gensim.models.keyedvectors.Word2VecKeyedVectors at 0x1a24968da0>
复制代码

data_t['vec'] = data_t.review.apply(lambda x :[word_vectors[w] for w in x.split() if w in word_vectors])
复制代码

del data_t['review']
del word_vectors复制代码

import gc
gc.collect()
复制代码

14
复制代码

data_t = data_t[data_t['vec'].apply(lambda x:len(x)>0)]
data_t.sentiment.value_counts()
复制代码

0    12499
1    12495
Name: sentiment, dtype: int64
复制代码

maxlength = max([len(x) for x in data_t.vec])
maxlength
复制代码

1622
复制代码

sum(data_t.vec.apply(len)>300)复制代码

3246
复制代码

def pad(x):if len(x)>300:x1 = x[:300]else:x1 = np.zeros((300,50))x1[:len(x)] = xreturn x1复制代码

data_t['vec'] = data_t.vec.apply(pad)
复制代码

import tensorflow as tf
复制代码

/anaconda3/envs/py35/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5return f(*args, **kwds)
/anaconda3/envs/py35/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.from ._conv import register_converters as _register_converters
复制代码

learning_rate = 0.002
batch_size = 100
n_input = 50
n_steps = 300
n_hidden = 300
n_classes = 2复制代码


x = tf.placeholder(tf.float32, [None, n_steps,n_input])
y = tf.placeholder(tf.int64, [None])
keep_prob = tf.placeholder("float")复制代码

def length(shuru):return tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(shuru),reduction_indices=2)),reduction_indices=1)复制代码

cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.GRUCell(n_hidden),output_keep_prob = keep_prob)
复制代码

output, _ = tf.nn.dynamic_rnn(cell,x,dtype=tf.float32,sequence_length = length(x))
复制代码

output.get_shape()
复制代码

TensorShape([Dimension(None), Dimension(300), Dimension(300)])
复制代码

index = tf.range(0,batch_size)*n_steps + (tf.cast(length(x),tf.int32) - 1)
flat = tf.reshape(output,[-1,int(output.get_shape()[2])])
last = tf.gather(flat,index)
复制代码

weight = tf.Variable(tf.truncated_normal((n_hidden, n_classes), stddev=0.001))
bias = tf.Variable(tf.constant(0.1, shape=[n_classes]))
com_out = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax(com_out)
复制代码

cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = com_out))复制代码

optimizer = tf.train.AdamOptimizer(learning_rate)
grads = optimizer.compute_gradients(cross_entropy)
for i, (g, v) in enumerate(grads):if g is not None:grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
train_op = optimizer.apply_gradients(grads)复制代码

/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py:97: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory."Converting sparse IndexedSlices to a dense Tensor of unknown shape. "WARNING:tensorflow:From /anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/clip_ops.py:110: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
复制代码

correct_pred = tf.equal(tf.argmax(prediction,1), y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))复制代码

def generatebatch(X,Y,n_examples, batch_size):for batch_i in range(n_examples // batch_size):start = batch_i*batch_sizeend = start + batch_sizebatch_xs = X[start:end]batch_ys = Y[start:end]yield batch_xs, batch_ys # 生成每一个batch复制代码

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver()复制代码

for step in range(10):index = np.random.permutation(int(len(data_t.vec.values)))for batch_x,batch_y in generatebatch(data_t.vec.values[index],data_t.sentiment.values[index],len(data_t.vec.values),batch_size): batch_x = np.concatenate(batch_x).reshape(batch_size,300,50)batch_x.astype(np.float32)sess.run(train_op, feed_dict={x: batch_x, y: batch_y,keep_prob: 0.5})acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y,keep_prob: 1})loss = sess.run(cross_entropy, feed_dict={x: batch_x, y: batch_y,keep_prob: 1})saver.save(sess,'./lesson0',global_step = step)print("Iter " + str(step) + ", Minibatch Loss= " +  "{}".format(loss) + ", Training Accuracy= " +  "{}".format(acc))
print("Optimization Finished!")
复制代码

Iter 0, Minibatch Loss= 0.3504045009613037, Training Accuracy= 0.8799999952316284
Iter 1, Minibatch Loss= 0.2799288034439087, Training Accuracy= 0.8899999856948853
Iter 2, Minibatch Loss= 0.25252586603164673, Training Accuracy= 0.8700000047683716
Iter 3, Minibatch Loss= 0.2636661231517792, Training Accuracy= 0.9300000071525574
复制代码

Tensorflow rnn-word2vec-电影评论相关推荐

4.使用Keras和Tensorflow Hub对电影评论进行文本分类
使用Keras和Tensorflow Hub对电影评论进行文本分类本指南使用tf.keras(一个在TensorFlow中用于构建和训练模型的高级API)和tensorflow_hub(一个用于在一 ...
python电影评论情感分析_20行Tensorflow代码实现电影评论情感分析
原标题:20行Tensorflow代码实现电影评论情感分析背景情感分析有很多的应用场景,比如做一个电商网站,卖家需要时刻关心用户对于商品的评论是否是正面的.再比如做一个电影的宣传和策划,电影在键盘 ...
Pytorch+Text-CNN+Word2vec+电影评论情感分析实战
文章目录 0.前言 1.电影评论数据集 2.数据读取 3.数据预处理 4.准备训练和测试集 5.加载词向量模型Word2vec 6.定义网络 7.训练网络 8.测试网络和可视化 9.总结 0.前言很 ...
情感分析之电影评论分析-基于Tensorflow的LSTM
1. 深度学习在自然语言处理中的应用自然语言处理是教会机器如何去处理或者读懂人类语言的系统,目前比较热门的方向,包括如下几类: 对话系统 - 比较著名的案例有:Siri,Alexa 和 Cortan ...
Tensorflow 笔记 Ⅺ——NLP 实现电影评论情感分析
文章目录特别说明数据集 IMDB 简介 IMDB 数据集下载地址目录结构示例文本自然语言处理基础分词词的数字化表示方法与词嵌入循环神经网络 RNN与LSTM 数据的时序与含义 RNN ...
TensorFlow 教程——电影评论文本分类
https://tensorflow.google.cn/tutorials/keras/text_classification 解决方案 import tensorflow as tf from t ...
Tensorflow2.*教程之使用Tensorflow Hub 对IMDB电影评论数据集进行文本分类(2)
使用数据集: IMDB 数据集库文件: tensorflow tensorflow_hub:用于迁移学习的库和平台 tensorflow_datasets:提供常用数据集我们使用 Tensorfl ...
【深度学习kears+tensorflow】电影评论分类：二分类问题
目录 Classifying movie reviews: a binary classification example 电影评论分类:二分类问题 The IMDB dataset IMDB 数据集 ...
自然语言处理--Keras 实现LSTM循环神经网络分类 IMDB 电影评论数据集
LSTM 对于循环网络的每一层都引入了状态(state)的概念,状态作为网络的记忆(memory).但什么是记忆呢?记忆将由一个向量来表示,这个向量与元胞中神经元的元素数量相同.记忆单元将是一个由 n ...
lstm训练情感分析的优点_LSTM对电影评论进行简单的情感分析
今天自己尝试使用LSTM对电影评论进行简单的情感分析代码中npy文件: 代码使用的数据集是IMDB,网盘地址: 首先读取已经做好的词向量模型 import numpy as np # 这里有两个表, ...

Tensorflow rnn-word2vec-电影评论

Tensorflow rnn-word2vec-电影评论相关推荐

最新文章

热门文章