cnews_loader.py

为数据的预处理文件。

# coding: utf-8
#3.7运行OK'''cnews_loader.py为数据的预处理文件。read_file(): 读取文件数据;
build_vocab(): 构建词汇表，使用字符级的表示，这一函数会将词汇表存储下来，避免每一次重复处理;
read_vocab(): 读取上一步存储的词汇表，转换为{词：id}表示;
read_category(): 将分类目录固定，转换为{类别: id}表示;
to_words(): 将一条由id表示的数据重新转换为文字;
process_file(): 将数据集从文字转换为固定长度的id序列表示;
batch_iter(): 为神经网络的训练准备经过shuffle的批次的数据。
经过数据预处理，数据的格式如下：Data    Shape   Data    Shape
x_train [50000, 600]    y_train [50000, 10]
x_val   [5000, 600] y_val   [5000, 10]
x_test  [10000, 600]    y_test  [10000, 10]'''
import sys
from collections import Counterimport numpy as np
import tensorflow.contrib.keras as krif sys.version_info[0] > 2:is_py3 = True
else:reload(sys)sys.setdefaultencoding("utf-8")is_py3 = Falsedef native_word(word, encoding='utf-8'):"""如果在python2下面使用python3训练的模型，可考虑调用此函数转化一下字符编码"""if not is_py3:return word.encode(encoding)else:return worddef native_content(content):if not is_py3:return content.decode('utf-8')else:return contentdef open_file(filename, mode='r'):"""常用文件操作，可在python2和python3间切换.mode: 'r' or 'w' for read or write"""if is_py3:return open(filename, mode, encoding='utf-8', errors='ignore')else:return open(filename, mode)def read_file(filename):"""读取文件数据"""contents, labels = [], []with open_file(filename) as f:for line in f:try:label, content = line.strip().split('\t')if content:contents.append(list(native_content(content)))labels.append(native_content(label))except:passreturn contents, labelsdef build_vocab(train_dir, vocab_dir, vocab_size=5000):"""根据训练集构建词汇表，存储"""data_train, _ = read_file(train_dir)all_data = []for content in data_train:all_data.extend(content)counter = Counter(all_data)count_pairs = counter.most_common(vocab_size - 1)words, _ = list(zip(*count_pairs))# 添加一个 <PAD> 来将所有文本pad为同一长度words = ['<PAD>'] + list(words)open_file(vocab_dir, mode='w').write('\n'.join(words) + '\n')def read_vocab(vocab_dir):"""读取词汇表"""# words = open_file(vocab_dir).read().strip().split('\n')with open_file(vocab_dir) as fp:# 如果是py2 则每个值都转化为unicodewords = [native_content(_.strip()) for _ in fp.readlines()]word_to_id = dict(zip(words, range(len(words))))return words, word_to_iddef read_category():"""读取分类目录，固定"""categories = ['体育', '财经', '房产', '家居', '教育', '科技', '时尚', '时政', '游戏', '娱乐']categories = [native_content(x) for x in categories]cat_to_id = dict(zip(categories, range(len(categories))))return categories, cat_to_iddef to_words(content, words):"""将id表示的内容转换为文字"""return ''.join(words[x] for x in content)def process_file(filename, word_to_id, cat_to_id, max_length=600):"""将文件转换为id表示"""contents, labels = read_file(filename)data_id, label_id = [], []for i in range(len(contents)):data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])label_id.append(cat_to_id[labels[i]])# 使用keras提供的pad_sequences来将文本pad为固定长度x_pad = kr.preprocessing.sequence.pad_sequences(data_id, max_length)y_pad = kr.utils.to_categorical(label_id, num_classes=len(cat_to_id))  # 将标签转换为one-hot表示return x_pad, y_paddef batch_iter(x, y, batch_size=64):"""生成批次数据"""data_len = len(x)num_batch = int((data_len - 1) / batch_size) + 1indices = np.random.permutation(np.arange(data_len))x_shuffle = x[indices]y_shuffle = y[indices]for i in range(num_batch):start_id = i * batch_sizeend_id = min((i + 1) * batch_size, data_len)yield x_shuffle[start_id:end_id], y_shuffle[start_id:end_id]

cnn_model.py

模型文件

# coding: utf-8
'''CNN卷积神经网络
CNN模型
Embedding, CNN, max pooling, fully connected, fully connected, softmax, category id.具体参看cnn_model.py的实现。配置项
CNN可配置的参数如下所示，在cnn_model.py中。class TCNNConfig(object):"""CNN配置参数"""embedding_dim = 64      # 词向量维度seq_length = 600        # 序列长度num_classes = 10        # 类别数num_filters = 128        # 卷积核数目kernel_size = 5         # 卷积核尺寸vocab_size = 5000       # 词汇表达小hidden_dim = 128        # 全连接层神经元dropout_keep_prob = 0.5 # dropout保留比例learning_rate = 1e-3    # 学习率batch_size = 64         # 每批训练大小num_epochs = 10         # 总迭代轮次print_per_batch = 100    # 每多少轮输出一次结果save_per_batch = 10      # 每多少轮存入tensorboard'''
import tensorflow as tfclass TCNNConfig(object):"""CNN配置参数"""embedding_dim = 64  # 词向量维度seq_length = 600  # 序列长度num_classes = 10  # 类别数num_filters = 256  # 卷积核数目kernel_size = 5  # 卷积核尺寸vocab_size = 5000  # 词汇表达小hidden_dim = 128  # 全连接层神经元dropout_keep_prob = 0.5  # dropout保留比例learning_rate = 1e-3  # 学习率batch_size = 64  # 每批训练大小num_epochs = 10  # 总迭代轮次print_per_batch = 100  # 每多少轮输出一次结果save_per_batch = 10  # 每多少轮存入tensorboardclass TextCNN(object):"""文本分类，CNN模型"""def __init__(self, config):self.config = config# 三个待输入的数据self.input_x = tf.placeholder(tf.int32, [None, self.config.seq_length], name='input_x')self.input_y = tf.placeholder(tf.float32, [None, self.config.num_classes], name='input_y')self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')self.cnn()def cnn(self):"""CNN模型"""# 词向量映射with tf.device('/cpu:0'):embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.embedding_dim])embedding_inputs = tf.nn.embedding_lookup(embedding, self.input_x)with tf.name_scope("cnn"):# CNN layerconv = tf.layers.conv1d(embedding_inputs, self.config.num_filters, self.config.kernel_size, name='conv')# global max pooling layergmp = tf.reduce_max(conv, reduction_indices=[1], name='gmp')with tf.name_scope("score"):# 全连接层，后面接dropout以及relu激活fc = tf.layers.dense(gmp, self.config.hidden_dim, name='fc1')fc = tf.contrib.layers.dropout(fc, self.keep_prob)fc = tf.nn.relu(fc)# 分类器self.logits = tf.layers.dense(fc, self.config.num_classes, name='fc2')self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)  # 预测类别with tf.name_scope("optimize"):# 损失函数，交叉熵cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)self.loss = tf.reduce_mean(cross_entropy)# 优化器self.optim = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate).minimize(self.loss)with tf.name_scope("accuracy"):# 准确率correct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls)self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

run_cnn.py

运行文件

训练与验证
终端运行 python3.5 run_cnn.py train，可以开始训练。

若之前进行过训练，请把tensorboard/textcnn删除，避免TensorBoard多次训练结果重叠。

测试
终端运行 python3.5 run_cnn.py test 在测试集上进行测试。

Python3.7运行会报错，说self在定义前就使用了。Python3.5就没有这个报错。

注意修改路径：
base_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/cnews'
save_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/checkpoints/textcnn'
cnews新闻文件夹下载路径：链接:https://pan.baidu.com/s/1H3K94E7JGJdIrGuTKBXvIg 密码:fmdq

cnews下载好和其他文件放一一起。

以下内容来自：链接:https://pan.baidu.com/s/1PstPh6d-cx5mlMOZF8KMEg 密码:5ikj

#!/usr/bin/python
# -*- coding: utf-8 -*-
'''训练与验证
终端运行 python3.5 run_cnn.py train，可以开始训练。若之前进行过训练，请把tensorboard/textcnn删除，避免TensorBoard多次训练结果重叠。测试
终端运行 python3.5 run_cnn.py test 在测试集上进行测试。Python3.7运行会报错，说self在定义前就使用了。Python3.5就没有这个报错。注意修改路径：
base_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/cnews'
save_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/checkpoints/textcnn'
cnews文件夹下载路径：链接:https://pan.baidu.com/s/1H3K94E7JGJdIrGuTKBXvIg  密码:fmdq'''from __future__ import print_functionimport os
import sys
import time
from datetime import timedeltaimport numpy as np
import tensorflow as tf
from sklearn import metricsfrom cnn_model import TCNNConfig, TextCNN
from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, build_vocab#base_dir = 'data/cnews'
base_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/cnews'
train_dir = os.path.join(base_dir, 'cnews.train.txt')
test_dir = os.path.join(base_dir, 'cnews.test.txt')
val_dir = os.path.join(base_dir, 'cnews.val.txt')
vocab_dir = os.path.join(base_dir, 'cnews.vocab.txt')save_dir = '/Users/apple/Documents/ST/python/python项目/CNN_RNN_text_classification/checkpoints/textcnn'
save_path = os.path.join(save_dir, 'best_validation')  # 最佳验证结果保存路径def get_time_dif(start_time):"""获取已使用时间"""end_time = time.time()time_dif = end_time - start_timereturn timedelta(seconds=int(round(time_dif)))def feed_data(x_batch, y_batch, keep_prob):feed_dict = {model.input_x: x_batch,model.input_y: y_batch,model.keep_prob: keep_prob}return feed_dictdef evaluate(sess, x_, y_):"""评估在某一数据上的准确率和损失"""data_len = len(x_)batch_eval = batch_iter(x_, y_, 128)total_loss = 0.0total_acc = 0.0for x_batch, y_batch in batch_eval:batch_len = len(x_batch)feed_dict = feed_data(x_batch, y_batch, 1.0)loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict)total_loss += loss * batch_lentotal_acc += acc * batch_lenreturn total_loss / data_len, total_acc / data_lendef train():print("Configuring TensorBoard and Saver...")# 配置 Tensorboard，重新训练时，请将tensorboard文件夹删除，不然图会覆盖tensorboard_dir = 'tensorboard/textcnn'if not os.path.exists(tensorboard_dir):os.makedirs(tensorboard_dir)tf.summary.scalar("loss", model.loss)tf.summary.scalar("accuracy", model.acc)merged_summary = tf.summary.merge_all()writer = tf.summary.FileWriter(tensorboard_dir)# 配置 Saversaver = tf.train.Saver()if not os.path.exists(save_dir):os.makedirs(save_dir)print("Loading training and validation data...")# 载入训练集与验证集start_time = time.time()x_train, y_train = process_file(train_dir, word_to_id, cat_to_id, config.seq_length)x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length)time_dif = get_time_dif(start_time)print("Time usage:", time_dif)# 创建sessionsession = tf.Session()session.run(tf.global_variables_initializer())writer.add_graph(session.graph)print('Training and evaluating...')start_time = time.time()total_batch = 0  # 总批次best_acc_val = 0.0  # 最佳验证集准确率last_improved = 0  # 记录上一次提升批次require_improvement = 1000  # 如果超过1000轮未提升，提前结束训练flag = Falsefor epoch in range(config.num_epochs):print('Epoch:', epoch + 1)batch_train = batch_iter(x_train, y_train, config.batch_size)for x_batch, y_batch in batch_train:feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob)if total_batch % config.save_per_batch == 0:# 每多少轮次将训练结果写入tensorboard scalars = session.run(merged_summary, feed_dict=feed_dict)writer.add_summary(s, total_batch)if total_batch % config.print_per_batch == 0:# 每多少轮次输出在训练集和验证集上的性能feed_dict[model.keep_prob] = 1.0loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict)loss_val, acc_val = evaluate(session, x_val, y_val)  # todoif acc_val > best_acc_val:# 保存最好结果best_acc_val = acc_vallast_improved = total_batchsaver.save(sess=session, save_path=save_path)improved_str = '*'else:improved_str = ''time_dif = get_time_dif(start_time)msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \+ ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str))session.run(model.optim, feed_dict=feed_dict)  # 运行优化total_batch += 1if total_batch - last_improved > require_improvement:# 验证集正确率长期不提升，提前结束训练print("No optimization for a long time, auto-stopping...")flag = Truebreak  # 跳出循环if flag:  # 同上breakdef test():print("Loading test data...")start_time = time.time()x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)session = tf.Session()session.run(tf.global_variables_initializer())saver = tf.train.Saver()saver.restore(sess=session, save_path=save_path)  # 读取保存的模型print('Testing...')loss_test, acc_test = evaluate(session, x_test, y_test)msg = 'Test Loss: {0:>6.2}, Test Acc: {1:>7.2%}'print(msg.format(loss_test, acc_test))batch_size = 128data_len = len(x_test)num_batch = int((data_len - 1) / batch_size) + 1y_test_cls = np.argmax(y_test, 1)y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32)  # 保存预测结果for i in range(num_batch):  # 逐批次处理start_id = i * batch_sizeend_id = min((i + 1) * batch_size, data_len)feed_dict = {model.input_x: x_test[start_id:end_id],model.keep_prob: 1.0}y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict)# 评估print("Precision, Recall and F1-Score...")print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories))# 混淆矩阵print("Confusion Matrix...")cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)print(cm)time_dif = get_time_dif(start_time)print("Time usage:", time_dif)if __name__ == '__main__':if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:raise ValueError("""usage: python run_cnn.py [train / test]""")print('Configuring CNN model...')config = TCNNConfig()if not os.path.exists(vocab_dir):  # 如果不存在词汇表，重建build_vocab(train_dir, vocab_dir, config.vocab_size)categories, cat_to_id = read_category()words, word_to_id = read_vocab(vocab_dir)config.vocab_size = len(words)model = TextCNN(config)if sys.argv[1] == 'train':train()else:test()'''
16G内存和512G硬盘的苹果电脑CPU运行如下：
建议用GPU跑，要不然普通电脑太慢了
appledeMBP:CNN_RNN_text_classification apple$ python3.5 run_cnn.py train
Configuring CNN model...
Configuring TensorBoard and Saver...
Loading training and validation data...
Time usage: 0:00:18
2018-11-25 08:52:08.149886: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2018-11-25 08:52:08.149909: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2018-11-25 08:52:08.149925: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2018-11-25 08:52:08.149930: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
Training and evaluating...
Epoch: 1
Iter:      0, Train Loss:    2.3, Train Acc:   7.81%, Val Loss:    2.3, Val Acc:   9.44%, Time: 0:00:08 *
Iter:    100, Train Loss:   0.79, Train Acc:  81.25%, Val Loss:   0.99, Val Acc:  69.72%, Time: 0:01:21 *
Iter:    200, Train Loss:   0.36, Train Acc:  89.06%, Val Loss:   0.65, Val Acc:  81.68%, Time: 0:02:34 *
Iter:    300, Train Loss:   0.34, Train Acc:  90.62%, Val Loss:   0.42, Val Acc:  88.58%, Time: 0:03:44 *
Iter:    400, Train Loss:   0.28, Train Acc:  90.62%, Val Loss:   0.37, Val Acc:  89.48%, Time: 0:04:54 *
Iter:    500, Train Loss:   0.25, Train Acc:  93.75%, Val Loss:    0.3, Val Acc:  92.16%, Time: 0:06:05 *
Iter:    600, Train Loss:   0.33, Train Acc:  89.06%, Val Loss:   0.31, Val Acc:  91.16%, Time: 0:07:17
Iter:    700, Train Loss:  0.087, Train Acc:  96.88%, Val Loss:   0.28, Val Acc:  91.70%, Time: 0:08:30
Epoch: 2
Iter:    800, Train Loss:   0.11, Train Acc:  96.88%, Val Loss:   0.27, Val Acc:  91.68%, Time: 0:09:40
Iter:    900, Train Loss:  0.031, Train Acc:  98.44%, Val Loss:   0.22, Val Acc:  93.68%, Time: 0:10:51 *
Iter:   1000, Train Loss:   0.15, Train Acc:  93.75%, Val Loss:   0.23, Val Acc:  93.64%, Time: 0:12:04
Iter:   1100, Train Loss:    0.2, Train Acc:  95.31%, Val Loss:   0.24, Val Acc:  92.46%, Time: 0:13:15
Iter:   1200, Train Loss:  0.048, Train Acc: 100.00%, Val Loss:   0.19, Val Acc:  95.02%, Time: 0:14:26 *
Iter:   1300, Train Loss:   0.08, Train Acc:  96.88%, Val Loss:    0.2, Val Acc:  94.60%, Time: 0:15:37
Iter:   1400, Train Loss:   0.14, Train Acc:  95.31%, Val Loss:   0.24, Val Acc:  92.78%, Time: 0:16:47
Iter:   1500, Train Loss:   0.11, Train Acc:  96.88%, Val Loss:   0.22, Val Acc:  94.36%, Time: 0:17:57
Epoch: 3
Iter:   1600, Train Loss:  0.049, Train Acc:  98.44%, Val Loss:    0.2, Val Acc:  94.72%, Time: 0:19:07
Iter:   1700, Train Loss:   0.13, Train Acc:  96.88%, Val Loss:   0.23, Val Acc:  92.84%, Time: 0:20:22
Iter:   1800, Train Loss:  0.062, Train Acc:  98.44%, Val Loss:   0.19, Val Acc:  94.98%, Time: 0:21:35
Iter:   1900, Train Loss:  0.031, Train Acc: 100.00%, Val Loss:   0.22, Val Acc:  93.82%, Time: 0:22:48
Iter:   2000, Train Loss:  0.094, Train Acc:  95.31%, Val Loss:   0.24, Val Acc:  93.66%, Time: 0:23:59
Iter:   2100, Train Loss:  0.063, Train Acc:  96.88%, Val Loss:   0.22, Val Acc:  94.10%, Time: 0:25:11
Iter:   2200, Train Loss:  0.049, Train Acc:  98.44%, Val Loss:   0.24, Val Acc:  92.64%, Time: 0:26:21
No optimization for a long time, auto-stopping...
appledeMBP:CNN_RNN_text_classification apple$ '''

测试

运行 python run_cnn.py test 在测试集上进行测试。输出：

在测试集上的准确率达到了96.04%，且各类的precision, recall和f1-score都超过了0.9。

从混淆矩阵也可以看出分类效果非常优秀。

认识你是我们的缘分，同学，等等，学习人工智能，记得关注我。

微信扫一扫
关注该公众号

《湾区人工智能》

回复《人生苦短，我用Python》便可以获取下面的超高清电子书和代码

Configuring CNN model...
Loading test data...
Testing...
Test Loss:   0.14, Test Acc:  96.04%
Precision, Recall and F1-Score...precision    recall  f1-score   support体育       0.99      0.99      0.99      1000财经       0.96      0.99      0.97      1000房产       1.00      1.00      1.00      1000家居       0.95      0.91      0.93      1000教育       0.95      0.89      0.92      1000科技       0.94      0.97      0.95      1000时尚       0.95      0.97      0.96      1000时政       0.94      0.94      0.94      1000游戏       0.97      0.96      0.97      1000娱乐       0.95      0.98      0.97      1000avg / total       0.96      0.96      0.96     10000Confusion Matrix...
[[991   0   0   0   2   1   0   4   1   1][  0 992   0   0   2   1   0   5   0   0][  0   1 996   0   1   1   0   0   0   1][  0  14   0 912   7  15   9  29   3  11][  2   9   0  12 892  22  18  21  10  14][  0   0   0  10   1 968   4   3  12   2][  1   0   0   9   4   4 971   0   2   9][  1  16   0   4  18  12   1 941   1   6][  2   4   1   5   4   5  10   1 962   6][  1   0   1   6   4   3   5   0   1 979]]
Time usage: 0:00:05

CNN进行新闻文本分类代码实战，包含分类文本相关推荐

RNN LSTM GRU 代码实战 ---- 简单的文本生成任务
RNN LSTM GRU 代码实战 ---- 简单的文本生成任务 import torch if torch.cuda.is_available():# Tell PyTorch to use the ...
svm多分类代码_监督学习——分类算法I
本文是监督学习分类算法的第一部分,简单介绍对样本进行分类的算法,包括判别分析(DA) 支持向量机(SVM) 随机梯度下降分类(SGD) K近邻分类(KNN) 朴素贝叶斯分类(NaiveBayes) ...
python 含有文本_python – 查找不包含某些文本字符串的所有文本文件
我在 Python 2.7.1上,我正在尝试识别所有不包含某些文本字符串的文本文件. 该程序似乎首先工作,但每当我将文本字符串添加到文件时,它就会不断出现,就好像它不包含它一样(误报).当我检查文本文 ...
word文本样式代码样式_使用文本样式表达创建真相来源
word文本样式代码样式 As of After Effects 17.0, you can use expressions to edit text styles in After Effects. ...
html文本框代码高亮,CodeMirror 编辑器文本框 TextArea 代码高亮插件 - 文章教程
CodeMirror 是一个用于编辑器文本框 TextArea 代码高亮 JavaScript 插件,为各种编程语言实现关键字.函数.变量等代码高亮显示,丰富的 API 和可扩展功能以及多个主题样式, ...
php 分类代码,php无限分类的图文代码介绍
无限分类,是指从一个最高分类开始,每个子分类都可以分出自己的若干个子分类,可以一直分下去,称为无限级分类: 比如一棵树,从一根树干开始,分出多个树枝,而这些树枝又分出其他的分支,理论上是可以无限分裂下 ...
php文本输入框,html文本输入框代码是什么？如何创建html文本输入框
如何创建html文本输入框?html文本输入框代码是什么?HTML表单是获取用户输入所需的内容.但是首先,你需要制作一个文本框,那么如何创建是一个问题,下面我们来讲一下Windows系统是如何创建ht ...
tf第十二讲：TextCNN做文本分类的实战代码
大家好,我是爱编程的喵喵.双985硕士毕业,现担任全栈工程师一职,热衷于将数据思维应用到工作与生活中.从事机器学习以及相关的前后端开发工作.曾在阿里云.科大讯飞.CCF等比赛获得多次Top名次.现 ...
【NLP】文本分类TorchText实战-AG_NEWS 新闻主题分类任务（PyTorch版）
AG_NEWS 新闻主题分类任务(PyTorch版) 前言 1. 使用 N 元组加载数据 2. 安装 Torch-GPU&TorchText 3. 访问原始数据集迭代器 4. 准备数据处理管道 ...

CNN进行新闻文本分类代码实战，包含分类文本

cnews_loader.py

cnn_model.py

run_cnn.py

测试

CNN进行新闻文本分类代码实战，包含分类文本相关推荐

最新文章

热门文章