文本分析用GUI界面显示

请结合面向对象的概念，

结合wordcloud将《红楼梦》、《水浒传》、《三国演义》分别绘制主要人物的词云图（按照人物出现的频率）

分别统计《红楼梦》、《水浒传》、《三国演义》前20个主要人物的出场次数，并绘制出场次数的统计图

结合networkx绘制《红楼梦》、《水浒传》、《三国演义》主要人物的社交关系网络图

学校的一道实验作业，这里直接给代码了

# -*- coding: utf-8 -*-
"""
Created on Sat Dec  5 14:24:49 2020@author: 散修涵
"""
import jieba
import jieba.posseg as psg
from jieba import analyse
import matplotlib.pyplot as plt
import wordcloud
from imageio import imread
from tkinter import *
import time
from tkinter import ttk
from PIL import Image, ImageTk
from PIL.ImageTk import PhotoImage
import os,sys
import networkx as nx
import matplotlib
"""
文本分析类
"""
class Txtanalysiz:def __init__(self,pathtxt):self.pathtxt=pathtxtself.main()def getText(self,path):f=open(path,"r",encoding='utf-8')print("B")text=f.read()f.close()return textdef stopwordslist(self,path1):stopwords=[line.strip() for line in open (path1,'r',encoding='utf-8').readlines()]return stopwordsdef wordFrep(self,path,text,topn):words=jieba.lcut(text.strip())counts={}path1=(r"F:\实验\文本分析_python实验\停用表.txt")stopwords=self.stopwordslist(path1)for word in words:if len(word)==1:continueelif word not in stopwords:counts[word]=counts.get(word,0)+1items=list(counts.items())items.sort(key=lambda x:x[1],reverse=True)f=open(path[:-4]+'_词频.txt',"w")for i in range (topn):word,count=items[i]f.writelines("{}\t{}\n".format(word,count))f.close()   def express(self,path2,path3):f1=open(path2)bg_pic=imread('star.jpg')f1.close()f=open(path2)text=f.read()f.close()wcloud=wordcloud.WordCloud(font_path=r'C:Windows\Fonts\simhei.ttf',background_color="white",width=1000,max_words=500,mask=bg_pic,height=860,margin=2).generate(text)wcloud.to_file("{}cloud_star.png".format(self.pathtxt[:-4]))plt.imshow(wcloud)plt.axis('off')plt.show()def main(self):print("A")text=self.getText(self.pathtxt)print("C")self.wordFrep(self.pathtxt, text, 20)nametxt=self.pathtxt[:-4]+'_词频.txt'path2=(r"{}".format(nametxt))path3=(r"F:\实验\文本分析_python实验\star.jpg")self.express(path2,path3)
'''
人物出场统计类
'''
class Tongji:def __init__(self,textname,peoplelist):self.textname=textnameself.peoplelist=peoplelistself.divide()def divide(self):f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')s=f.read()  lst_chapter=[]chapter=re.findall("第[\u4E00-\u9FA5]+回",s)for x in chapter: if x not in lst_chapter and len(x)<=7:lst_chapter.append(x)print(lst_chapter)print(len(lst_chapter))lst_start_chapterindex=[]for x in lst_chapter:lst_start_chapterindex.append(s.index(x))lst_end_chapterindex=lst_start_chapterindex[1:]+[len(s)]lst_chapterindex=list(zip(lst_start_chapterindex,lst_end_chapterindex))print(lst_chapterindex)self.Tongjitu(self.peoplelist,lst_chapterindex, s)def Tongjitu(self,people,lst_chapterindex,s):for x in range(0,20):cnt_liulaolao=[]for ii in range(120):start=lst_chapterindex[ii][0]end=lst_chapterindex[ii][1]cnt_liulaolao.append(s[start:end].count("{}".format(people[x])))plt.rcParams['font.sans-serif']=['SimHei']plt.figure(figsize=(18,4))plt.plot(cnt_liulaolao,label='{}出场次数'.format(people[x]))plt.xlabel("章节数",Fontproperties='SimHei')plt.ylabel("出场次数数",Fontproperties='SimHei')plt.legend()plt.title("《{}》——{}出场统计图".format(self.textname,people[x]),Fontproperties='SimHei')plt.savefig(r'F:\实验\文本分析_python实验\{}人物出场统计图\{}.jpg'.format(self.textname,people[x]))
'''
统计人物社交关系类
'''
class RaletionPeople:def __init__(self,textname,peoplelist):self.textname=textnameself.peoplelist=peoplelistself.divide()def divide(self):relations={}f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')s=f.read()Names=self.peoplelistlst_para=s.split('\n')for text in lst_para:for name1 in Names:if name1 in text:for name2 in Names:if name2 in text and name1 !=name2 and (name2,name1) not in relations:relations[(name1,name2)]=relations.get((name1,name2),0)+1print(relations.items())maxPela=max([v for k,v in relations.items()])relations={k:v/maxPela for k,v in relations.items()}print(relations.items())self.MakePic(relations)def MakePic(self,relations):matplotlib.rcParams['font.sans-serif']=['SimHei']plt.figure(figsize=(15,15))G=nx.Graph()for k,v in relations.items():G.add_edge(k[0],k[1],weight=v)elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight']>0.6]emidle=[(u,v)for (u,v,d) in G.edges(data=True)if (d['weight']>0.3)&(d['weight']<=0.6)]esmall=[(u,v)for (u,v,d)in G.edges(data=True)if d['weight']<=0.3]pos=nx.circular_layout(G)nx.draw_networkx_nodes(G,pos,alpha=0.8,node_size=800)nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2.5,alpha=0.9,edge_color='g')nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.4,edge_color='b',style='dashed')nx.draw_networkx_labels(G,pos,font_size=12)plt.axis('off')plt.title("《{}》主要人物社交关系网络图".format(self.textname))plt.savefig(r'F:\实验\文本分析_python实验\社交关系网络图\{}社交关系网络图.jpg'.format(self.textname))
'''
GUI界面类
'''
class GUI:def __init__(self):self.root=Tk()self.root.title('文本分析系统                 @author: 散修涵')self.root.geometry('300x400')self.main()mainloop()def main(self):self.page=Frame(self.root) self.page.pack(side=TOP)Label(self.page, text='选择要分析的名著', font=('粗体', 20)).pack()Button(self.page, text='红楼梦', command=self.Hongloumeng, width=10, height=2).pack(side=RIGHT)Button(self.page, text='水浒传', command=self.Shuihuzhuang, width=10, height=2).pack(side=RIGHT)Button(self.page, text='三国演义', command=self.Sanguoyanyi, width=10, height=2).pack(side=RIGHT)def Hongloumeng(self):self.name='红楼梦'self.people=["宝玉",'贾母','凤姐','王夫人','老太太','贾琏','平儿','袭人','宝钗','黛玉','凤姐儿','薛姨妈','探春','二爷','贾政','晴雯','湘云','刘姥姥','小丫头','邢夫人']# path=(r"F:\实验\文本分析_python实验\红楼梦.txt")# test=Txtanalysiz(path)self.gongneng()def Shuihuzhuang(self): self.name='水浒传'self.people=['宋江','李逵','武松','林冲','吴用','卢俊义','柴进','鲁智深','戴宗','公孙胜','花荣','朱仝','燕青','秦明','李俊','史进','晁盖','杨志','高太尉','石秀' ]# path=(r"F:\实验\文本分析_python实验\水浒传.txt")# test=Txtanalysiz(path)self.gongneng()def Sanguoyanyi(self):self.gongneng()self.name='三国演义'self.people=['曹操','孔明','关公','张飞'  ,'吕布','刘备','孙权','赵云','司马懿'    ,'周瑜','袁绍','马超','魏延','黄忠','姜维','马岱','庞德','孟获','刘表','夏侯惇']# path=(r"F:\实验\文本分析_python实验\三国演义.txt")# test=Txtanalysiz(path)def gongneng(self):self.page.pack_forget()self.page3 = Frame(self.root) self.page3.pack()self.root.geometry('1100x1100')Label(self.page3, text='分析结果', fg='red', font=('宋体', 25)).pack(side=TOP, fill='x')button1=Button(self.page3, width=20, height=2, text="主要人物词云图", bg='gray', font=("宋", 12),relief='raise',command =self.imgCiyun)button1.pack(padx = 20, pady = 20)button2=Button(self.page3, width=20, height=2, text="主要人物出场次数", bg='gray', font=("宋", 12),relief='raise',command =self.checkDataView)button2.pack(padx = 20, pady = 20)button3=Button(self.page3, width=20, height=2, text="社交关系网络图", bg='gray', font=("宋", 12),relief='raise',command =self.imgRale)button3.pack(padx = 20, pady = 20)button4=Button(self.page3, width=20, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command =self.backMain)button4.pack(padx = 20, pady = 20)def imgCiyun(self):def resize( w_box, h_box, pil_image): #参数是：要适应的窗口宽、高、Image.open后的图片w, h =1024,1024 #获取图像的原始大小   f1 = 1.0*w_box/w f2 = 1.0*h_box/h    factor = min([f1, f2])   width = int(w*factor)    height = int(h*factor)    return pil_image.resize((width, height), Image.ANTIALIAS) self.page3.pack_forget()self.page2 = Frame(self.root)self.page2.pack()Label(self.page2, text='人物词云图', font=('粗体', 20)).pack(side=TOP)w_box=600h_box=700pil_image = Image.open(r'F:/实验/文本分析_python实验/{}cloud_star.png'.format(self.name))  pil_image_resized = resize( w_box, h_box, pil_image) tk_image = ImageTk.PhotoImage(pil_image_resized) label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)button21.pack(padx=5,pady = 5)mainloop()def checkDataView(self):self.page3.pack_forget()self.pagePeople = Frame(self.root)self.pagePeople.pack()self.root.geometry('600x360')Label(self.pagePeople, text='主要人物列表', fg='black', font=('宋体', 25)).pack(side=TOP, fill='x')self.checkDate = ttk.Treeview(self.pagePeople,column=('name' ))self.checkDate.heading('#0', text='序号')self.checkDate.heading('name',text='人名')self.checkDate.column('name', width=200, anchor="center") c=list(range(1,len(self.people)+1)) d=[]for inde in range(0,len(self.people)):d.append(self.people[inde])dict1 = dict(zip(c, d))rowCount=1self.checkDate.tag_configure("evenColor",background="LightBlue")for inde in dict1.keys():if rowCount%2==0:self.checkDate.insert("", 'end',text=inde, values=dict1[inde])else:self.checkDate.insert("", 'end',text=inde, values=dict1[inde],tags=("evenColor"))rowCount+=1def show(*arge):print("*****")print(self.checkDate.selection())yuan=self.checkDate.selection()i=yuan[0]print(i)print(type(i))i=i[1:4]print(i)print(type(i))i=int(i, 16)self.imgPeople(i)self.checkDate.bind('<<TreeviewSelect>>',show)#用来使点击人物名字时显示他的出场统计图# y滚动条yscrollbar = Scrollbar(self.pagePeople, orient=VERTICAL, command=self.checkDate.yview)self.checkDate.configure(yscrollcommand=yscrollbar.set)yscrollbar.pack(side=RIGHT, fill=Y)self.checkDate.pack(expand = 1, fill = BOTH)Button(self.pagePeople, width=20, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command =self.backFri).pack(padx = 20, pady = 20)def imgPeople(self,i):#text=Tongji(self.name, self.people)def resize( w_box, h_box, pil_image): w, h =1296,288    f1 = 1.0*w_box/w f2 = 1.0*h_box/h    factor = min([f1, f2])   width = int(w*factor)    height = int(h*factor)    return pil_image.resize((width, height), Image.ANTIALIAS) self.pagePeople.pack_forget() self.page2 = Frame(self.root)self.page2.pack()Label(self.page2, text='{}出场统计图'.format(self.people[i-1]), font=('粗体', 20)).pack(side=TOP)w_box=1296h_box=500pil_image = Image.open(r'F:/实验/文本分析_python实验/{}人物出场统计图/{}.jpg'.format(self.name,self.people[i-1]))   pil_image_resized = resize( w_box, h_box, pil_image)  tk_image = ImageTk.PhotoImage(pil_image_resized)   label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backPeoList)button21.pack(padx=5,pady = 5)mainloop()def imgRale(self):#test=RaletionPeople(self.name, self.people)def resize( w_box, h_box, pil_image): w, h =1080,1080   f1 = 1.0*w_box/w f2 = 1.0*h_box/h    factor = min([f1, f2])   width = int(w*factor)    height = int(h*factor)    return pil_image.resize((width, height), Image.ANTIALIAS) self.page3.pack_forget() self.page2 = Frame(self.root)self.page2.pack()Label(self.page2, text='{}人物关系图'.format(self.name), font=('粗体', 20)).pack(side=TOP)w_box=600h_box=600pil_image = Image.open(r'F:/实验/文本分析_python实验/社交关系网络图/{}社交关系网络图.jpg'.format(self.name))    pil_image_resized = resize( w_box, h_box, pil_image)    tk_image = ImageTk.PhotoImage(pil_image_resized)  label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)button21.pack(padx=5,pady = 5)mainloop()def backPeoList(self):self.page2.pack_forget()self.pagePeople.pack()def backFri(self):self.pagePeople.pack_forget()self.page3.pack()def backFirst(self):self.page2.pack_forget()self.page3.pack()def backMain(self):self.root.geometry('900x600')self.page3.pack_forget()       self.page.pack()
if __name__=="__main__":gui=GUI()

文本分析用GUI界面显示相关推荐

linux文本分析利器awk
转快速理解linux文本分析利器awk 原文链接杜亦舒性能与架构 awk是什么如果工作中需要操作linux比较多,那么awk是非常值得学习的 awk是一个极其强大的文本分析工具,把文件逐行的读 ...
utf8 txt文本转ansi vbs_数据分析之路-文本分析
含义:文本分析是指对文本的表示及其特征项的选取:文本分析是文本挖掘.信息检索的一个基本问题,它把从文本中抽取出的特征词进行量化来表示文本信息. 分析过程:1.搭建语料库(即要分析文章的集合).知识点: ...
特朗普推文的文本分析
导言通常不会发布关于政治的信息(我对投票不是特别精明,这是数据科学对政治产生最大影响的地方),但本周末我看到了一个关于唐纳德特朗普的推特账户的假设,这个假设只是被要求调查有数据: 当特朗普祝奥运队好 ...
不需编程/无需写代码的中文英文文本分析教程，免费傻瓜式，多图预警
这里讲解如何使用KH Coder.整个过程不需要编程知识. 有两种导入数据的方式,第一种是将所有的文字放到一个txt文件中:第二种是将文字放到多个txt文件中,这些文件放到一个文件夹下.因为第二种更加 ...
免费的文本分析文本挖掘软件工具
这里针对非编程人员,介绍几个免费文本分析和文本挖掘软件,可以直接导入中文文本,得出一些统计和可视化信息.另外如果是编程人员,推荐使用一下文本挖掘的工具包.下面是两个可选的工具. KH Coder 链接 ...
python中文文本分析_python--文本分析
一. 导读文本分析主要用来分词分析,情感分析以及主题分析,参考知乎用户的文章,他从方法代码上讲解了中英文分词(wordcloud,jieba),中英文情感分析(textblob,snownlp), ...
linux文本分析工具awk解读
awk是一个强大的文本分析工具,相对于grep的查找.sed的编辑,awk在其对数据分析并生成报告时,显得尤为强大.awk把文件逐行的读入,以空格为默认分隔符将每行切片,切开的部分再进行各种分析处理. ...
python可视化文本分析(1)—分析QQ班群聊天记录宏观
公众号文章链接前一段时间就想做简单的可视化文本分析玩,今天就花点时间先对整体班级的QQ群聊天信息做一个简单的分析. 打算分两步做,本文是最简单的第一步过程 1:分析整个聊天记录的时间分配.并且用ma ...
python爬虫自学笔记分析解密_python爬虫学习笔记——1 各种文本分析工具简介之汇总...
此处只简单汇总一下各种文本工具名目,他们的详细使用参见具体帖子.本文主要参考<8种目前Python使用率最高的文本处理工具>一文0.SnowNLP包用于中文文本的处理 1.Jieba 2 ...
文本分析：吉利不裁员不降薪背后的真相是什么
之前推荐了一些数据入门的资料,这里不妨以一个小小的案例讲解如何进行简单的文本分析.主要的步骤包括以下: 从知乎相关问题爬下所有回答将答案分词.去掉停用词.维护语料库查看分词结果,并制作词云我 ...

文本分析用GUI界面显示

文本分析用GUI界面显示相关推荐

最新文章

热门文章