python中文文本情感分析

导语

要做一个项目要用到中文文本情感分析，查找了多种资料，在网上看了很多博客后，终于完成，对自己帮助最大的两篇博客为【python机器学习】中文情感分析和 Python开发之Sklearn的模型和CountVectorizer Transformer保存和使用中模型的加载与保存，之后又在之前手写数字识别中采用svm、决策树、朴素贝叶斯、knn等算法分别训练模型，采用了data1.csv作为数据集

训练模型保存并测试正确率

import picklefrom sklearn import svm
# 离散型朴素贝叶斯
from sklearn.naive_bayes import MultinomialNB
import os
import joblib
import jieba
import numpy as np
import pandas as pd
import jieba
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from snownlp import SnowNLP
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizerdef make_label(star):if star > 3:return 1else:return 0def snow_result(comment):s = SnowNLP(comment)if s.sentiments >= 0.5:return 1else:return 0# jieba分词
def chinese_word_cut(mytext):return " ".join(jieba.cut(mytext))def get_custom_stopwords(stop_words_file):with open(stop_words_file, 'r', encoding='UTF-8') as f:stopwords = f.read()stopwords_list = stopwords.split('\n')custom_stopwords_list = [i for i in stopwords_list]return custom_stopwords_listdef nb_mode_train(x_train,y_train,Vectorizer):nb = MultinomialNB()nb.fit(x_train, y_train)# 创建文件目录dirs = 'testModel'if not os.path.exists(dirs):os.makedirs(dirs)# 保存模型joblib.dump(nb, dirs + '/nb.pkl')feature_path = 'testFeature'if not os.path.exists(feature_path):os.makedirs(feature_path)feature_path = feature_path + '/nb.pkl'with open(feature_path, 'wb') as fw:pickle.dump(Vectorizer.vocabulary_, fw)return nb# svm训练模型
def svm_model_train(x_train,y_train,Vectorizer):svm_model = svm.LinearSVC()svm_model.fit(x_train, y_train)# 创建文件目录dirs = 'testModel'if not os.path.exists(dirs):os.makedirs(dirs)# 保存模型joblib.dump(svm_model, dirs + '/svm_model.pkl')feature_path = 'testFeature'if not os.path.exists(feature_path):os.makedirs(feature_path)feature_path = feature_path + '/svm_model.pkl'with open(feature_path, 'wb') as fw:pickle.dump(Vectorizer.vocabulary_, fw)return svm_model# 决策树算法训练模型
def tree_model_train(x_train,y_train,Vectorizer):tree_model = DecisionTreeClassifier(criterion="entropy")tree_model.fit(x_train, y_train)# 创建文件目录dirs = 'testModel'if not os.path.exists(dirs):os.makedirs(dirs)# 保存模型joblib.dump(tree_model, dirs + '/tree_model.pkl')feature_path = 'testFeature'if not os.path.exists(feature_path):os.makedirs(feature_path)feature_path = feature_path + '/tree_model.pkl'with open(feature_path, 'wb') as fw:pickle.dump(Vectorizer.vocabulary_, fw)return tree_model# Knn算法训练模型
def knn_model_train(x_train,y_train,Vectorizer):knn_model = KNeighborsClassifier(n_neighbors=3)knn_model.fit(x_train, y_train)# 创建文件目录dirs = 'testModel'if not os.path.exists(dirs):os.makedirs(dirs)# 保存模型joblib.dump(knn_model, dirs + '/knn_model.pkl')feature_path = 'testFeature'if not os.path.exists(feature_path):os.makedirs(feature_path)feature_path = feature_path + '/knn_model.pkl'with open(feature_path, 'wb') as fw:pickle.dump(Vectorizer.vocabulary_, fw)return knn_modelif __name__ == '__main__':data = pd.read_csv('data1.csv')data['sentiment'] = data.star.apply(make_label)data['snlp_result'] = data.comment.apply(snow_result)counts = 0for i in range(len(data)):if data.iloc[i, 2] == data.iloc[i, 3]:counts += 1print("snowNLP构建模型测试准确率",counts / len(data))# jieba分词data['cut_comment'] = data.comment.apply(chinese_word_cut)# 划分数据集X = data['cut_comment']y = data.sentimentX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19113122)# 设置停用词表stop_words_file = '哈工大停用词表.txt'stopwords = get_custom_stopwords(stop_words_file)# print(stopwords)Vectorizer = CountVectorizer(max_df=0.8,min_df=3,token_pattern=u'(?u)\\b[^\\d\\W]\\w+\\b',stop_words=frozenset(stopwords))test = pd.DataFrame(Vectorizer.fit_transform(X_train).toarray(),columns=Vectorizer.get_feature_names())X_train_vect = Vectorizer.fit_transform(X_train)X_test_vect = Vectorizer.transform(X_test)# 训练模型并保存训练模型和特征nb = nb_mode_train(X_train_vect,y_train,Vectorizer)z = nb.predict(X_test_vect)print('朴素贝叶斯构建模型测试集测试准确率：', np.sum(z == y_test) / z.size)#train_score = nb.score(X_train_vect, y_train)#print('朴树贝叶斯训练集测试正确率',train_score)#print('朴树贝叶斯测试集测试正确率',nb.score(X_test_vect, y_test))# 训练模型并保存训练模型和特征svm_model = svm_model_train(X_train_vect, y_train, Vectorizer)z = svm_model.predict(X_test_vect)print('svm构建模型测试集测试准确率：', np.sum(z == y_test) / z.size)# 训练模型并保存训练模型和特征tree_model = tree_model_train(X_train_vect, y_train, Vectorizer)z = tree_model.predict(X_test_vect)print('决策树构建模型测试集测试准确率：', np.sum(z == y_test) / z.size)# 训练模型并保存训练模型和特征knn_model = knn_model_train(X_train_vect, y_train, Vectorizer)z = knn_model.predict(X_test_vect)print('knn构建模型测试集测试准确率：', np.sum(z == y_test) / z.size)

使用保存的模型

import pickleimport joblib
import jieba
from sklearn.feature_extraction.text import CountVectorizer# jieba分词
def chinese_word_cut(mytext):return " ".join(jieba.cut(mytext))def get_custom_stopwords(stop_words_file):with open(stop_words_file, 'r', encoding='UTF-8') as f:stopwords = f.read()stopwords_list = stopwords.split('\n')custom_stopwords_list = [i for i in stopwords_list]return custom_stopwords_list# 处理文本数据
def content_handler(content,Vectorizer):content = chinese_word_cut(content)content = [content]content_ver = Vectorizer.transform(content)return content_ver# 使用模型
def useModel(model_name,feature_name,content):dirs = 'testModel'nb = joblib.load(dirs + '/' +model_name)features = 'testFeature'feature_path = features + '/' +feature_nameVectorizer = CountVectorizer(decode_error="replace", vocabulary=pickle.load(open(feature_path, "rb")))return nb,content_handler(content,Vectorizer)if __name__ == '__main__':content = '开心有时也很容易啊，比如刚到车站车就来了，随机播放正好是最近喜欢的歌，还有今天的风真舒服。'nb,content_vec = useModel('nb.pkl','nb.pkl',content)result = nb.predict(content_vec)print(result)print('___________________')print("朴素贝叶斯")print(float(nb.predict_proba(content_vec)[:, 1]))

python中文文本情感分析相关推荐

python 文本分析库_Python有趣|中文文本情感分析
前言前文给大家说了python机器学习的路径,这光说不练假把式,这次,罗罗攀就带大家完成一个中文文本情感分析的机器学习项目,今天的流程如下: 数据情况和处理数据情况这里的数据为大众点评上的评论数 ...
NLP之情感分析：基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分)之全部代码
NLP之情感分析:基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分)之全部代码目录全部代码相关文章 NLP之情感分析:基于python编程(jieba库)实现中文文本情 ...
python情感分析模型_Python有趣|中文文本情感分析
前言前文给大家说了python机器学习的路径,这光说不练假把式,这次,罗罗攀就带大家完成一个中文文本情感分析的机器学习项目,今天的流程如下: 数据情况和处理数据情况这里的数据为大众点评上的评论数 ...
python情感分析中文_Python有趣|中文文本情感分析
前言前文给大家说了python机器学习的路径,这光说不练假把式,这次,罗罗攀就带大家完成一个中文文本情感分析的机器学习项目,今天的流程如下: 数据情况和处理数据情况这里的数据为大众点评上的评论数 ...
python中文文本分析_Python有趣|中文文本情感分析
前言前文给大家说了python机器学习的路径,这光说不练假把式,这次,罗罗攀就带大家完成一个中文文本情感分析的机器学习项目,今天的流程如下: 数据情况和处理数据情况这里的数据为大众点评上的评论数 ...
NLP之TEA：基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分)之全部代码
NLP之TEA:基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分)之全部代码目录全部代码相关文章 NLP之TEA:基于python编程(jieba库)实现中文文本情感分 ...
NLP之TEA：基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分)
NLP之TEA:基于python编程(jieba库)实现中文文本情感分析(得到的是情感评分) 目录输出结果设计思路相关资料 1.关于代码 2.关于数据集关于留言 1.留言内容的注意事项 2.如 ...
Python：snownlp中文文本情感分析
hello,大家好,我是wangzirui32,今天来教大家如何使用snownlp的中文文本情感分析功能,开始学习吧! 1. pip 安装命令: pip install snownlp -i htt ...
snownlp中文文本情感分析详细教程
hello,大家好,我是wangzirui32,今天来教大家如何使用snownlp的中文文本情感分析功能,开始学习吧! 1. pip 安装命令: pip install snownlp -i htt ...

python中文文本情感分析

目录

python中文文本情感分析

导语

训练模型保存并测试正确率

使用保存的模型

python中文文本情感分析相关推荐

最新文章

热门文章