BosonNLP情感词典评论情感分析

from snownlp import SnowNLP
import pandas as pd
from collections import defaultdict
import os
import re
import jieba
import codecs
'''
#读取评论内容的.txt文件
txt = open('C:/Users/24224/Desktop/1.txt',encoding='utf-8')
text = txt.readlines()
print(text)#确认读取文件成功，并关闭文件节省资源
print('读入成功')
txt.close()#遍历每一条评论，得到每条评论是positive文本的概率，每条评论计算完成后输出ok确认执行成功
comments = []
comments_score = []
for i in text:a1 = SnowNLP(i)a2 = a1.sentimentscomments.append(i)comments_score.append(a2)print('ok')#将结果数据框存为.xlsx表格，查看结果及分布
table = pd.DataFrame(comments, comments_score)
print(table)
table.to_excel('C:/Users/24224/Desktop/emotion_analyse.xlsx', sheet_name='result')
#打分范围是[0-1]，此次定义[0,0.5]为负向评论，(0.5,1]为正向评论，观察其分布。#基于波森情感词典计算情感值
def getscore(text):df = pd.read_table(r"BosonNLP_sentiment_score\BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score'])key = df['key'].values.tolist()score = df['score'].values.tolist()# jieba分词segs = jieba.lcut(text,cut_all = False) #返回list# 计算得分score_list = [score[key.index(x)] for x in segs if(x in key)]return sum(score_list)#读取文件
def read_txt(filename):with open(filename,'r',encoding='utf-8')as f:txt = f.read()return txt
#写入文件
def write_data(filename,data):with open(filename,'a',encoding='utf-8')as f:f.write(data)if __name__=='__main__':text = read_txt('C:/Users/24224/Desktop/1.txt')lists  = text.split('\n')i = 0for list in lists:if list  != '':sentiments = round(getscore(list),2)#情感值为正数，表示积极；为负数表示消极print(list)print("情感值：",sentiments)if sentiments > 0:print("机器标注情感倾向：积极\n")s = "机器判断情感倾向：积极\n"else:print('机器标注情感倾向：消极\n')s = "机器判断情感倾向：消极"+'\n'sentiment = '情感值：'+str(sentiments)+'\n'#文件写入filename = 'BosonNLP情感分析结果.txt'write_data(filename,'情感分析文本：')write_data(filename,list+'\n') #写入待处理文本write_data(filename,sentiment) #写入情感值#write_data(filename,al_sentiment) #写入机器判断情感倾向write_data(filename,s+'\n') #写入人工标注情感i = i+1
'''
# 生成stopword表，需要去除一些否定词和程度词汇
stopwords = set()
fr = open('停用词.txt', 'r', encoding='utf-8')for word in fr:stopwords.add(word.strip())  # Python strip() 方法用于移除字符串头尾指定的字符（默认为空格或换行符）或字符序列。
# 读取否定词文件
not_word_file = open('否定词.txt', 'r+', encoding='utf-8')
not_word_list = not_word_file.readlines()
not_word_list = [w.strip() for w in not_word_list]
# 读取程度副词文件
degree_file = open('程度副词.txt', 'r+',encoding='utf-8')
degree_list = degree_file.readlines()
degree_list = [item.split(',')[0] for item in degree_list]# 生成新的停用词表
with open('stopwords.txt', 'w', encoding='utf-8') as f:for word in stopwords:if (word not in not_word_list) and (word not in degree_list):f.write(word + '\n')
# jieba分词后去除停用词def seg_word(sentence):seg_list = jieba.cut(sentence)seg_result = []for i in seg_list:seg_result.append(i)stopwords = set()with open('stopwords.txt', 'r',encoding='utf-8') as fr:for i in fr:stopwords.add(i.strip())return list(filter(lambda x: x not in stopwords, seg_result))# 找出文本中的情感词、否定词和程度副词def classify_words(word_list):# 读取情感词典文件sen_file = open('BosonNLP_sentiment_score\BosonNLP_sentiment_score.txt', 'r+', encoding='utf-8')# 获取词典文件内容sen_list = sen_file.readlines()# 创建情感字典sen_dict = defaultdict()# 读取词典每一行的内容，将其转换成字典对象，key为情感词，value为其对应的权重for i in sen_list:if len(i.split(' ')) == 2:sen_dict[i.split(' ')[0]] = i.split(' ')[1]# 读取否定词文件not_word_file = open('否定词.txt', 'r+', encoding='utf-8')not_word_list = not_word_file.readlines()# 读取程度副词文件degree_file = open('程度副词.txt', 'r+', encoding='utf-8')degree_list = degree_file.readlines()degree_dict = defaultdict()for i in degree_list:degree_dict[i.split(',')[0]] = i.split(',')[0]sen_word = dict()not_word = dict()degree_word = dict()# 分类for i in range(len(word_list)):word = word_list[i]if word in sen_dict.keys() and word not in not_word_list and word not in degree_dict.keys():# 找出分词结果中在情感字典中的词sen_word[i] = sen_dict[word]elif word in not_word_list and word not in degree_dict.keys():# 分词结果中在否定词列表中的词not_word[i] = -1elif word in degree_dict.keys():# 分词结果中在程度副词中的词degree_word[i] = degree_dict[word]# 关闭打开的文件sen_file.close()not_word_file.close()degree_file.close()# 返回分类结果return sen_word, not_word, degree_word# 计算情感词的分数
def score_sentiment(sen_word, not_word, degree_word, seg_result):# 权重初始化为1W = 1score = 0# 情感词下标初始化sentiment_index = -1# 情感词的位置下标集合sentiment_index_list = list(sen_word.keys())# 遍历分词结果for i in range(0, len(seg_result)):# 如果是情感词if i in sen_word.keys():# 权重*情感词得分score += W * float(sen_word[i])# 情感词下标加一，获取下一个情感词的位置sentiment_index += 1if sentiment_index < len(sentiment_index_list) - 1:# 判断当前的情感词与下一个情感词之间是否有程度副词或否定词for j in range(sentiment_index_list[sentiment_index], sentiment_index_list[sentiment_index + 1]):# 更新权重，如果有否定词，权重取反if j in not_word.keys():W *= -1elif j in degree_word.keys():W *= float(degree_word[j])# 定位到下一个情感词if sentiment_index < len(sentiment_index_list) - 1:i = sentiment_index_list[sentiment_index + 1]return score# 计算得分
def sentiment_score(sentence):# 1.对文档分词seg_list = seg_word(sentence)# 2.将分词结果转换成字典，找出情感词、否定词和程度副词sen_word, not_word, degree_word = classify_words(seg_list)# 3.计算得分score = score_sentiment(sen_word, not_word, degree_word, seg_list)return score
#读取文件
def read_txt(filename):with open(filename,'r',encoding='utf-8')as f:txt = f.read()return txt
def write_data(filename,data):with open(filename,'a',encoding='utf-8')as f:f.write(data)
#基于波森情感词典计算情感值
text = read_txt('C:/Users/24224/Desktop/1.txt')
lists  = text.split('\n')
i = 0
for l in lists:if l  != '':sentiments =sentiment_score(l)#情感值为正数，表示积极；为负数表示消极print("情感值：",sentiments)if sentiments > 0:print(l)print("机器标注情感倾向：积极\n")s = "机器判断情感倾向：积极\n"else:print(l)print('机器标注情感倾向：消极\n')s = "机器判断情感倾向：消极"+'\n'sentiment = '情感值：'+str(sentiments)+'\n'#文件写入filename = 'BosonNLP情感分析结果.txt'write_data(filename,'情感分析文本：')write_data(filename,l+'\n') #写入待处理文本write_data(filename,sentiment) #写入情感值#write_data(filename,al_sentiment) #写入机器判断情感倾向write_data(filename,s+'\n') #写入人工标注情感i = i+1

BosonNLP情感词典评论情感分析相关推荐

基于情感词典的情感分析
思路以及代码都来源于下面两篇文章: 一个不知死活的胖子:Python做文本情感分析之情感极性分析 Ran Fengzheng 的博客:基于情感词典的文本情感极性分析相关代码基于情感词典的情感分析应该 ...
[Python人工智能] 二十二.基于大连理工情感词典的情感分析和情绪计算
从本专栏开始,作者正式研究Python深度学习.神经网络及人工智能相关知识.前一篇文章分享了CNN实现中文文本分类的过程,并与贝叶斯.决策树.逻辑回归.随机森林.KNN.SVM等分类算法进行对比.这篇 ...
python 基于情感词典的情感分析之乐，惧，惊，哀，恶，怒和未知七种情感分析
背景情感分析是通过计算技术对文本内容的主观客观性.情绪等挖掘分析,对文本的情感偏向做出判断.目的是识别出文本中的具体情感分类,之前做文本分类都是通过深度学习或者机器学习进行文本分类,但是需要进行数据 ...
python情感词典计算得分_基于情感词典的情感打分
原理我就不讲了,请移步下面这篇论文,包括情感词典的构建(各位读者可以根据自己的需求稍作简化),以及打分策略(程序对原论文稍有改动). 论文在这里下载:基于情感词典的中文微博情感倾向性研究 (大家可以上 ...
基于情感词典的情感打分
原理我就不讲了,请移步下面这篇论文,包括情感词典的构建(各位读者可以根据自己的需求稍作简化),以及打分策略(程序对原论文稍有改动). 论文在这里下载:基于情感词典的中文微博情感倾向性研究 (大家可以 ...
基于情感词典的情感值分析
#utils.py import reclass ToolGeneral():"""Tool function"""def is_odd(s ...
实体词典情感词典_人工智能技术落地：情感分析概述
从自然语言处理技术的角度来看,情感分析的任务是从评论的文本中提取出评论的实体,以及评论者对该实体所表达的情感倾向,自然语言所有的核心技术问题.因此,情感分析被认为是一个自然语言处理的子任务. 情感分析 ...
基于情感词典的python情感分析！它居然比我还懂我女友！
近期老师给我们安排了一个大作业,要求根据情感词典对微博语料进行情感分析.于是在网上狂找资料,看相关书籍,终于搞出了这个任务.现在做做笔记,总结一下本次的任务,同时也给遇到有同样需求的人,提供一点帮助. ...
基于python的情感分析案例-基于情感词典的python情感分析
Python Python开发 Python语言基于情感词典的python情感分析近期老师给我们安排了一个大作业,要求根据情感词典对微博语料进行情感分析.于是在网上狂找资料,看相关书籍,终于搞出了 ...
Selenium爬取京东商品评价，并进行基于情感词典的文本情感极性分析
Selenium爬取京东商品评价,并进行基于情感词典的文本情感极性分析 1. 介绍及开发环境 2. 爬虫实现 2.1 请求构造 2.2 提取信息 2.3 数据存储 2.4 运行结果 3. 文本情感分析 ...

BosonNLP情感词典评论情感分析

BosonNLP情感词典评论情感分析相关推荐

最新文章

热门文章

BosonNLP情感词典 评论情感分析

BosonNLP情感词典 评论情感分析相关推荐

最新文章

热门文章

BosonNLP情感词典评论情感分析

BosonNLP情感词典评论情感分析相关推荐