CPU版本文本分类代码寒老师

"""
寒小阳老师CNN_sentence/conv_net_sentence.py
数据清洗代码：https://blog.csdn.net/BTUJACK/article/details/80666643
CPU版本句子分类代码
CNN_sentence/conv_net_classes.py
copy from https://github.com/yoonkim/CNN_sentence/blob/master/conv_net_classes.py
Sample code for
Convolutional Neural Networks for Sentence Classification
http://arxiv.org/pdf/1408.5882v2.pdf
Much of the code is modified from
- deeplearning.net (for ConvNet classes)
- https://github.com/mdenil/dropout (for dropout)
- https://groups.google.com/forum/#!topic/pylearn-dev/3QbKtCumAW4 (for Adadelta)
"""import numpy
import theano.tensor.shared_randomstreams
import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import convdef ReLU(x):y = T.maximum(0.0, x)return(y)
def Sigmoid(x):y = T.nnet.sigmoid(x)return(y)
def Tanh(x):y = T.tanh(x)return(y)
def Iden(x):y = xreturn(y)class HiddenLayer(object):"""Class for HiddenLayer"""def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None,use_bias=False):self.input = inputself.activation = activationif W is None:            if activation.func_name == "ReLU":W_values = numpy.asarray(0.01 * rng.standard_normal(size=(n_in, n_out)), dtype=theano.config.floatX)else:                W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)),size=(n_in, n_out)), dtype=theano.config.floatX)W = theano.shared(value=W_values, name='W')        if b is None:b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)b = theano.shared(value=b_values, name='b')self.W = Wself.b = bif use_bias:lin_output = T.dot(input, self.W) + self.belse:lin_output = T.dot(input, self.W)self.output = (lin_output if activation is None else activation(lin_output))# parameters of the modelif use_bias:self.params = [self.W, self.b]else:self.params = [self.W]def _dropout_from_layer(rng, layer, p):"""p is the probablity of dropping a unit
"""srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999))# p=1-p because 1's indicate keep and p is prob of droppingmask = srng.binomial(n=1, p=1-p, size=layer.shape)# The cast is important because# int * float32 = float64 which pulls things off the gpuoutput = layer * T.cast(mask, theano.config.floatX)return outputclass DropoutHiddenLayer(HiddenLayer):def __init__(self, rng, input, n_in, n_out,activation, dropout_rate, use_bias, W=None, b=None):super(DropoutHiddenLayer, self).__init__(rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b,activation=activation, use_bias=use_bias)self.output = _dropout_from_layer(rng, self.output, p=dropout_rate)class MLPDropout(object):"""A multilayer perceptron with dropout"""def __init__(self,rng,input,layer_sizes,dropout_rates,activations,use_bias=True):#rectified_linear_activation = lambda x: T.maximum(0.0, x)# Set up all the hidden layersself.weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:])self.layers = []self.dropout_layers = []self.activations = activationsnext_layer_input = input#first_layer = True# dropout the inputnext_dropout_layer_input = _dropout_from_layer(rng, input, p=dropout_rates[0])layer_counter = 0for n_in, n_out in self.weight_matrix_sizes[:-1]:next_dropout_layer = DropoutHiddenLayer(rng=rng,input=next_dropout_layer_input,activation=activations[layer_counter],n_in=n_in, n_out=n_out, use_bias=use_bias,dropout_rate=dropout_rates[layer_counter])self.dropout_layers.append(next_dropout_layer)next_dropout_layer_input = next_dropout_layer.output# Reuse the parameters from the dropout layer here, in a different# path through the graph.next_layer = HiddenLayer(rng=rng,input=next_layer_input,activation=activations[layer_counter],# scale the weight matrix W with (1-p)W=next_dropout_layer.W * (1 - dropout_rates[layer_counter]),b=next_dropout_layer.b,n_in=n_in, n_out=n_out,use_bias=use_bias)self.layers.append(next_layer)next_layer_input = next_layer.output#first_layer = Falselayer_counter += 1# Set up the output layern_in, n_out = self.weight_matrix_sizes[-1]dropout_output_layer = LogisticRegression(input=next_dropout_layer_input,n_in=n_in, n_out=n_out)self.dropout_layers.append(dropout_output_layer)# Again, reuse paramters in the dropout output.output_layer = LogisticRegression(input=next_layer_input,# scale the weight matrix W with (1-p)W=dropout_output_layer.W * (1 - dropout_rates[-1]),b=dropout_output_layer.b,n_in=n_in, n_out=n_out)self.layers.append(output_layer)# Use the negative log likelihood of the logistic regression layer as# the objective.self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihoodself.dropout_errors = self.dropout_layers[-1].errorsself.negative_log_likelihood = self.layers[-1].negative_log_likelihoodself.errors = self.layers[-1].errors# Grab all the parameters together.self.params = [ param for layer in self.dropout_layers for param in layer.params ]def predict(self, new_data):next_layer_input = new_datafor i,layer in enumerate(self.layers):if i<len(self.layers)-1:next_layer_input = self.activations[i](T.dot(next_layer_input,layer.W) + layer.b)else:p_y_given_x = T.nnet.softmax(T.dot(next_layer_input, layer.W) + layer.b)y_pred = T.argmax(p_y_given_x, axis=1)return y_preddef predict_p(self, new_data):next_layer_input = new_datafor i,layer in enumerate(self.layers):if i<len(self.layers)-1:next_layer_input = self.activations[i](T.dot(next_layer_input,layer.W) + layer.b)else:p_y_given_x = T.nnet.softmax(T.dot(next_layer_input, layer.W) + layer.b)return p_y_given_xclass MLP(object):"""Multi-Layer Perceptron ClassA multilayer perceptron is a feedforward artificial neural network modelthat has one layer or more of hidden units and nonlinear activations.Intermediate layers usually have as activation function tanh or thesigmoid function (defined here by a ``HiddenLayer`` class)  while thetop layer is a softamx layer (defined here by a ``LogisticRegression``class)."""def __init__(self, rng, input, n_in, n_hidden, n_out):"""Initialize the parameters for the multilayer perceptron:type rng: numpy.random.RandomState:param rng: a random number generator used to initialize weights:type input: theano.tensor.TensorType:param input: symbolic variable that describes the input of thearchitecture (one minibatch):type n_in: int:param n_in: number of input units, the dimension of the space inwhich the datapoints lie:type n_hidden: int:param n_hidden: number of hidden units:type n_out: int:param n_out: number of output units, the dimension of the space inwhich the labels lie"""# Since we are dealing with a one hidden layer MLP, this will translate# into a HiddenLayer with a tanh activation function connected to the# LogisticRegression layer; the activation function can be replaced by# sigmoid or any other nonlinear functionself.hiddenLayer = HiddenLayer(rng=rng, input=input,n_in=n_in, n_out=n_hidden,activation=T.tanh)# The logistic regression layer gets as input the hidden units# of the hidden layerself.logRegressionLayer = LogisticRegression(input=self.hiddenLayer.output,n_in=n_hidden,n_out=n_out)# L1 norm ; one regularization option is to enforce L1 norm to# be small# negative log likelihood of the MLP is given by the negative# log likelihood of the output of the model, computed in the# logistic regression layerself.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood# same holds for the function computing the number of errorsself.errors = self.logRegressionLayer.errors# the parameters of the model are the parameters of the two layer it is# made out ofself.params = self.hiddenLayer.params + self.logRegressionLayer.paramsclass LogisticRegression(object):"""Multi-class Logistic Regression ClassThe logistic regression is fully described by a weight matrix :math:`W`and bias vector :math:`b`. Classification is done by projecting datapoints onto a set of hyperplanes, the distance to which is used todetermine a class membership probability."""def __init__(self, input, n_in, n_out, W=None, b=None):""" Initialize the parameters of the logistic regression:type input: theano.tensor.TensorType:param input: symbolic variable that describes the input of thearchitecture (one minibatch):type n_in: int:param n_in: number of input units, the dimension of the space inwhich the datapoints lie:type n_out: int:param n_out: number of output units, the dimension of the space inwhich the labels lie"""# initialize with 0 the weights W as a matrix of shape (n_in, n_out)if W is None:self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX),name='W')else:self.W = W# initialize the baises b as a vector of n_out 0sif b is None:self.b = theano.shared(value=numpy.zeros((n_out,), dtype=theano.config.floatX),name='b')else:self.b = b# compute vector of class-membership probabilities in symbolic formself.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)# compute prediction as class whose probability is maximal in# symbolic formself.y_pred = T.argmax(self.p_y_given_x, axis=1)# parameters of the modelself.params = [self.W, self.b]def negative_log_likelihood(self, y):"""Return the mean of the negative log-likelihood of the predictionof this model under a given target distribution... math::\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\\ell (\theta=\{W,b\}, \mathcal{D}):type y: theano.tensor.TensorType:param y: corresponds to a vector that gives for each example thecorrect labelNote: we use the mean instead of the sum so thatthe learning rate is less dependent on the batch size"""# y.shape[0] is (symbolically) the number of rows in y, i.e.,# number of examples (call it n) in the minibatch# T.arange(y.shape[0]) is a symbolic vector which will contain# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of# Log-Probabilities (call it LP) with one row per example and# one column per class LP[T.arange(y.shape[0]),y] is a vector# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is# the mean (across minibatch examples) of the elements in v,# i.e., the mean log-likelihood across the minibatch.return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])def errors(self, y):"""Return a float representing the number of errors in the minibatch ;zero one loss over the size of the minibatch:type y: theano.tensor.TensorType:param y: corresponds to a vector that gives for each example thecorrect label"""# check if y has same dimension of y_predif y.ndim != self.y_pred.ndim:raise TypeError('y should have the same shape as self.y_pred',('y', target.type, 'y_pred', self.y_pred.type))# check if y is of the correct datatypeif y.dtype.startswith('int'):# the T.neq operator returns a vector of 0s and 1s, where 1# represents a mistake in predictionreturn T.mean(T.neq(self.y_pred, y))else:raise NotImplementedError()class LeNetConvPoolLayer(object):"""Pool Layer of a convolutional network """def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), non_linear="tanh"):"""Allocate a LeNetConvPoolLayer with shared variable internal parameters.:type rng: numpy.random.RandomState:param rng: a random number generator used to initialize weights:type input: theano.tensor.dtensor4:param input: symbolic image tensor, of shape image_shape:type filter_shape: tuple or list of length 4:param filter_shape: (number of filters, num input feature maps,filter height,filter width):type image_shape: tuple or list of length 4:param image_shape: (batch size, num input feature maps,image height, image width):type poolsize: tuple or list of length 2:param poolsize: the downsampling (pooling) factor (#rows,#cols)"""assert image_shape[1] == filter_shape[1]self.input = inputself.filter_shape = filter_shapeself.image_shape = image_shapeself.poolsize = poolsizeself.non_linear = non_linear# there are "num input feature maps * filter height * filter width"# inputs to each hidden unitfan_in = numpy.prod(filter_shape[1:])# each unit in the lower layer receives a gradient from:# "num output feature maps * filter height * filter width" /#   pooling sizefan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /numpy.prod(poolsize))# initialize weights with random weightsif self.non_linear=="none" or self.non_linear=="relu":self.W = theano.shared(numpy.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape), dtype=theano.config.floatX),borrow=True,name="W_conv")else:W_bound = numpy.sqrt(6. / (fan_in + fan_out))self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),dtype=theano.config.floatX),borrow=True,name="W_conv")   b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)self.b = theano.shared(value=b_values, borrow=True, name="b_conv")# convolve input feature maps with filtersconv_out = conv.conv2d(input=input, filters=self.W,filter_shape=self.filter_shape, image_shape=self.image_shape)if self.non_linear=="tanh":conv_out_tanh = T.tanh(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))self.output = downsample.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True)elif self.non_linear=="relu":conv_out_tanh = ReLU(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))self.output = downsample.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True)else:pooled_out = downsample.max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True)self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')self.params = [self.W, self.b]def predict(self, new_data, batch_size):"""predict for new data"""img_shape = (batch_size, 1, self.image_shape[2], self.image_shape[3])conv_out = conv.conv2d(input=new_data, filters=self.W, filter_shape=self.filter_shape, image_shape=img_shape)if self.non_linear=="tanh":conv_out_tanh = T.tanh(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))output = downsample.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True)if self.non_linear=="relu":conv_out_tanh = ReLU(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))output = downsample.max_pool_2d(input=conv_out_tanh, ds=self.poolsize, ignore_border=True)else:pooled_out = downsample.max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True)output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')return output

CPU版本文本分类代码寒老师相关推荐

pytorch bert文本分类_一起读Bert文本分类代码 (pytorch篇四）
Bert是去年google发布的新模型,打破了11项纪录,关于模型基础部分就不在这篇文章里多说了.这次想和大家一起读的是huggingface的pytorch-pretrained-BERT代码exa ...
机器学习文本分类代码_无需担心机器学习-如何在少于10行代码中对文本进行分类
机器学习文本分类代码 This article builds upon my previous two articles where I share some tips on how to get ...
CNN进行新闻文本分类代码实战，包含分类文本
依次运行三个文件: cnews_loader.py cnn_model.py run_cnn.py cnews新闻文件夹下载路径:链接:https://pan.baidu.com/s/1H3K94E7 ...
数据清洗代码寒老师
#七月在线寒小阳老师的数据处理代码CNN_sentence/process_data.py https://github.com/yoonkim/CNN_sentence/blob/master/pr ...
一起读Bert文本分类代码 (pytorch篇五）
https://zhuanlan.zhihu.com/c_153732961
如何使用BERT实现中文的文本分类（附代码）
如何使用BERT模型实现中文的文本分类前言 Pytorch readme 参数表算法流程 1. 概述 2. 读取数据 3. 特征转换 4. 模型训练 5. 模型测试 6. 测试结果 7. 总结前 ...
【NLP】BERT 模型与中文文本分类实践
简介 2018年10月11日,Google发布的论文<Pre-training of Deep Bidirectional Transformers for Language Understan ...
NLP自然语言处理——文本分类（CNN卷积神经网络）
文本分类是NLP自然语言处理中一项基本功能,它在新闻分类.主题分类.问答匹配.意图识别.情感分类.推断等领域都有涉及. 学习应用文本分类任务的难点有被识别语言的复杂性和评测函数的设计等,本篇将介绍一个 ...
详细介绍NLP文本分类
基于统计方法的文本分类基于统计方法的文本分类是文本分类的主要方法之一.统计方法首先是对原始输入数据进行预处理,一般包括分词.数据清洗和数据统计等,然后人工抽取特征并选择具体的统计模型设计分类算法. ...

CPU版本文本分类代码寒老师

CPU版本文本分类代码寒老师相关推荐

最新文章

热门文章

CPU版本文本分类代码 寒老师

CPU版本文本分类代码 寒老师相关推荐

最新文章

热门文章

CPU版本文本分类代码寒老师

CPU版本文本分类代码寒老师相关推荐