三国演义亲和度python_Python之三国演义源码
文章分析见:拂羽:Python之三国演义(上)zhuanlan.zhihu.com拂羽:Python之三国演义(下)zhuanlan.zhihu.com
#!/usr/bin/env python
# coding: utf-8
# In[47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import jieba #需要安装:pip install jieba
from pandas import read_csv
from scipy.cluster.hierarchy import dendrogram,ward
from scipy.spatial.distance import pdist,squareform
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.manifold import MDS
from sklearn.decomposition import PCA
import nltk
from nltk.cluster.kmeans import KMeansClusterer
# In[2]:
## 设置字体和 设置pandas显示方式
##font=FontProperties(fname = "C:/Windows/Fonts/Hiragino Sans GB W3.otf",size=14)
## 设置字体和 设置pandas显示方式(字体设置一定要是自己计算机上面存在的字体)
font=FontProperties(fname = "C:\Windows\Fonts\STFANGSO.TTF",size=14)
pd.set_option("display.max_rows",8)
pd.options.mode.chained_assignment = None # default='warn'
# In[3]:
## 读取停用词和需要的词典
stopword = read_csv(r"E:\bigdata\sanguoTest2\my_stop_words.txt",header=None,names = ["Stopwords"])
mydict = read_csv(r"E:\bigdata\sanguoTest2\red_dictionary.txt",header=None, names=["Dictionary"])
print(stopword)
print("---------------------------------")
print(mydict)
RedDream = read_csv(r"E:\bigdata\sanguoTest2\sanguo.txt",header=None,names = ["Reddream"])
RedDream
# In[4]:
#删除空白行和不需要的段,并重新设置索引
###查看数据是否有空白行
np.sum(pd.isnull(RedDream))
# In[5]:
###删除卷处理,使用正则表达式
####包含相应关键字的索引
indexjuan = RedDream.Reddream.str.contains("^正文+")
# In[6]:
####删除不需要的段,并重新设置索引
RedDream = RedDream[~indexjuan].reset_index(drop=True)
RedDream
# In[7]:
####包含相应关键字的索引
indexjuan = RedDream.Reddream.str.contains("^分节阅读+")
# In[8]:
####删除不需要的段,并重新设置索引
RedDream = RedDream[~indexjuan].reset_index(drop=True)
RedDream
# In[9]:
## 找出每一章节的头部索引和尾部索引
## 每一章节的标题
indexhui = RedDream.Reddream.str.match("^第+.+回")
chapnames = RedDream.Reddream[indexhui].reset_index(drop=True)
print(chapnames)
print("--------------------------------------")
# In[10]:
## 处理章节名,按照空格分割字符串
chapnamesplit = chapnames.str.split(" ").reset_index(drop=True)
chapnamesplit
# In[15]:
## 建立保存数据的数据表
Red_df=pd.DataFrame(list(chapnamesplit),columns=["Chapter","Leftname","Rightname","null"])
Red_df
# In[16]:
## 添加新的变量
Red_df["Chapter2"] = np.arange(1,121)
Red_df["ChapName"] = Red_df.Leftname+","+Red_df.Rightname
## 每章的开始行(段)索引
Red_df["StartCid"] = indexhui[indexhui == True].index
## 每章的结束行数
Red_df["endCid"] = Red_df["StartCid"][1:len(Red_df["StartCid"])].reset_index(drop = True) - 1
Red_df["endCid"][[len(Red_df["endCid"])-1]] = RedDream.index[-1]
## 每章的段落长度
Red_df["Lengthchaps"] = Red_df.endCid - Red_df.StartCid
Red_df["Artical"] = "Artical"
# In[17]:
## 每章节的内容
for ii in Red_df.index:
## 将内容使用""连接
chapid = np.arange(Red_df.StartCid[ii]+1,int(Red_df.endCid[ii]))
## 每章节的内容替换掉空格
Red_df["Artical"][ii] = "".join(list(RedDream.Reddream[chapid])).replace("\u3000","")
## 计算某章有多少字
Red_df["lenzi"] = Red_df.Artical.apply(len)
Red_df
# In[20]:
####散点图##########
##字长和段落数的散点图一
from pylab import *
mpl.rcParams['font.sans-serif']=['SimHei']#指定默认字体
mpl.rcParams['axes.unicode_minus']=False#解决保存图像是负号'-'显示为方块的问题
plt.figure(figsize=(10,6))
plt.scatter(Red_df.Lengthchaps,Red_df.lenzi)
for ii in Red_df.index:
plt.text(Red_df.Lengthchaps[ii]+1,Red_df.lenzi[ii],Red_df.Chapter2[ii])
plt.xlabel("章节段数")
plt.ylabel("章节字数")
plt.title("《三国演义》120回")
plt.show
# In[21]:
##字长和段落数的散点图二
plt.figure(figsize=(10,6))
plt.scatter(Red_df.Lengthchaps,Red_df.lenzi)
for ii in Red_df.index: plt.text(Red_df.Lengthchaps[ii]-2,Red_df.lenzi[ii]+100,Red_df.Chapter[ii],size=7)
plt.xlabel("章节段数")
plt.ylabel("章节字数")
plt.title("《三国演义》120回")
plt.show
# In[26]:
plt.figure(figsize=(16,12))
plt.subplot(2,1,1)
plt.plot(Red_df.Chapter2,Red_df.Lengthchaps,"ro-",label="段落")
plt.ylabel("章节段数",Fontproperties=font)
plt.title("《三国演义》120回",Fontproperties=font)
##添加平均值
plt.hlines(np.mean(Red_df.Lengthchaps),-5,125,"b")
plt.xlim((-5,125))
plt.subplot(2,1,2)
plt.plot(Red_df.Chapter2,Red_df.lenzi,"ro-",label="段落")
plt.xlabel("章节",Fontproperties=font)
plt.ylabel("章节字数",Fontproperties=font)
##添加平均值
plt.hlines(np.mean(Red_df.lenzi),-5,125,"b")
plt.xlim((-5,125))
plt.show()
# In[28]:
## 对三国演义全文进行分词
## 数据表的行数
row,col = Red_df.shape
## 预定义列表
Red_df["cutword"] = "cutword"
for ii in np.arange(row):
## 分词
cutwords = list(jieba.cut(Red_df.Artical[ii], cut_all=True))
## 去除长度为1的词
cutwords = pd.Series(cutwords)[pd.Series(cutwords).apply(len)>1]
## 去停用词
cutwords = cutwords[~cutwords.isin(stopword)]
Red_df.cutword[ii] = cutwords.values
##查看最后一段的分词结果
print(cutwords)
print(cutwords.values)
##查看全书分词结果
Red_df.cutword
# In[29]:
##连接list
words=np.concatenate(Red_df.cutword)
##统计词频
word_df=pd.DataFrame({"Word":words})
word_stat=word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})
word_stat=word_stat.reset_index().sort_values(by="number",ascending=False)
word_stat["wordlen"]=word_stat.Word.apply(len)
word_stat
#去除长度大于5的词
print(np.where(word_stat.Word.apply(len)<5))
word_stat=word_stat.loc[word_stat.Word.apply(len)<5,:]
word_stat=word_stat.sort_values(by="number",ascending=False)
word_stat
# In[38]:
###词云
from wordcloud import WordCloud
##连接全书的词
"/".join(np.concatenate(Red_df.cutword))
##width=2200,height=1600
wlred=WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",margin=5,width=1800,height=1800).generate("/".join(np.concatenate(Red_df.cutword)))
plt.figure(figsize=(10,10))
plt.imshow(wlred)
plt.axis("off")
plt.show()
# In[40]:
worddict = {}
for key,value in zip(word_stat.Word,word_stat.number):
worddict[key] = value
for ii,myword in zip(range(10),worddict.items()):
print(ii)
print(myword)
redcold = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",
margin=5,
width=1800, height=1800).generate("/".join(np.concatenate(Red_df.cutword)))
# worddict = worddict.items()
# worddict =tuple(worddict)
# redcold.generate_from_frequencies(frequencies=worddict)
plt.figure(figsize=(10,10))
plt.imshow(redcold)
plt.axis("off")
plt.show()
# In[52]:
from wordcloud import ImageColorGenerator
from matplotlib.pyplot import imread
back_image = imread(r"E:\bigdata\sanguoTest2\img2.jpg")
red_wc = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",
margin=5, width=1800,height=1800,
background_color="white",
max_words=2000,
mask=back_image,
random_state=42,
).generate("/".join(np.concatenate(Red_df.cutword)))
# 从背景图片生成颜色值
image_colors = ImageColorGenerator(back_image)
plt.figure(figsize=(10,10))
plt.imshow(red_wc.recolor(color_func=image_colors))
plt.axis("off")
plt.show()
# In[53]:
##词语出现次数的直方图
#筛选数据,出现次数大于500次的词语
newdata=word_stat.loc[word_stat.number>500]
##绘制直方图
newdata.plot(kind="bar",x="Word",y="number",figsize=(10,7))
plt.xticks(FontProperties=font,size=10)
plt.xlabel("关键词",FontProperties=font)
plt.ylabel("频数",FontProperties=font)
plt.title("《三国演义》",FontProperties=font)
# In[55]:
#筛选数据,频数大于250
newdata=word_stat.loc[word_stat.number>250]
##绘制直方图
newdata.plot(kind="bar",x="Word",y="number",figsize=(16,7))
plt.xticks(FontProperties=font,size=10)
plt.xlabel("关键词",FontProperties=font)
plt.ylabel("频数",FontProperties=font)
plt.title("《三国演义》",FontProperties=font)
plt.show()
# In[56]:
def plotwordcould(wordlist,title,figsize=(6,6)):
"""
该函数用来绘制一个list的词云
wordlist:词组成的一个列表
title : 图的名字
"""
## 统计词频
words = wordlist
name = title
word_df = pd.DataFrame({"Word":words})
word_stat = word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})
word_stat = word_stat.reset_index().sort_values(by="number",ascending=False)
word_stat["wordlen"] = word_stat.Word.apply(len)
word_stat
## 将词和词频组成字典数据准备
worddict = {}
for key,value in zip(word_stat.Word,word_stat.number):
worddict[key] = value
# 生成词云, 可以用计算好词频使用generate_from_frequencies函数
red_wc = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",
margin=5, width=1800, height=1800,
background_color="black",
max_words=800,
max_font_size=400,
random_state=42,
).generate_from_frequencies(frequencies=worddict)
# 绘制词云
plt.figure(figsize=figsize)
plt.imshow(red_wc)
plt.axis("off")
plt.title(name,FontProperties=font,size = 12)
plt.show()
# In[57]:
print("plot all red deram wordcould")
t0 = time.time()
for ii in np.arange(12):
ii = ii * 10
name = Red_df.Chapter[ii] +":"+ Red_df.Leftname[ii] +","+ Red_df.Rightname[ii]
words = Red_df.cutword[ii]
plotwordcould(words,name,figsize=(6,6))
print("Plot all wordcolud use %.2fs"%(time.time()-t0))
# In[60]:
def plotredmanfre(wordlist,title,figsize=(12,6)):
"""
该函数用来统计一个list的人物频率
wordlist:词组成的一个列表
title : 图的名字
"""
## 统计词频
words = wordlist
name = title
word_df = pd.DataFrame({"Word":words})
word_stat= word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})
word_stat = word_stat.reset_index().sort_values(by="number",ascending=False)
wordname = word_stat.loc[word_stat.Word.isin(word_stat.iloc[:,0].values)].reset_index(drop = True)
## 直方图
## 绘制直方图
size = np.min([np.max([6,np.ceil(300 / wordname.shape[0])]),12])
wordname.plot(kind="bar",x="Word",y="number",figsize=(16,6))
plt.xticks(FontProperties = font,size = size)
plt.xlabel("人名",FontProperties = font)
plt.ylabel("频数",FontProperties = font)
plt.title(name,FontProperties = font)
plt.show()
# In[61]:
import time
print("plot 所有章节的人物词频")
t0 = time.time()
for ii in np.arange(120):
name = Red_df.Chapter[ii] +":"+ Red_df.Leftname[ii] +","+ Red_df.Rightname[ii]
words = Red_df.cutword[ii]
plotredmanfre(words,name,figsize=(16,6))
print("Plot 所有章节的人物词频 use %.2fs"%(time.time()-t0))
# In[62]:
## 准备工作,将分词后的结果整理成CountVectorizer()可应用的形式
## 将所有分词后的结果使用空格连接为字符串,并组成列表,每一段为列表中的一个元素
articals = []
for cutword in Red_df.cutword:
articals.append(" ".join(cutword))
## 构建语料库,并计算文档--词的TF-IDF矩阵
vectorizer = CountVectorizer()
transformer = TfidfVectorizer()
tfidf = transformer.fit_transform(articals)
print(tfidf)
# In[63]:
## 将tfidf转化为数组的形式,文档-词矩阵
dtm = tfidf.toarray()
dtm
# In[64]:
'''
vectorizer.fit_transform(articals)
print(vectorizer.get_feature_names()[1:10])
print(len(vectorizer.get_feature_names()))
'''
# In[65]:
'''
print(cosine_distance(dtm[1,:], dtm[1,:]))
print(cosine_distance(dtm[2,:], dtm[3,:]))
'''
# In[69]:
from nltk.cluster.util import cosine_distance
from nltk.cluster.kmeans import KMeansClusterer
## 使用夹角余弦距离进行k均值聚类
kmeans = KMeansClusterer(num_means=3, #聚类数目
distance=cosine_distance, #夹角余弦距离
)
kmeans.cluster(dtm)
## 聚类得到的类别
labpre = [kmeans.classify(i) for i in dtm]
kmeanlab = Red_df[["ChapName","Chapter"]]
kmeanlab["cosd_pre"] = labpre
kmeanlab
# In[70]:
## 查看每类有多少个分组
count = kmeanlab.groupby("cosd_pre").count()
count
# In[71]:
## 将分类可视化
count.plot(kind='barh',figsize=(6,5))
for xx,yy,s in zip(count.index,count.ChapName,count.ChapName):
plt.text(y =xx-0.1, x = yy+0.5,s=s)
plt.ylabel("cluster label")
plt.xlabel("number")
plt.show()
# In[72]:
## 使用MDS对数据进行降维
mds = MDS(n_components=2,random_state=123)
coord = mds.fit_transform(dtm)
print(coord.shape)
# In[73]:
## 绘制降维后的结果
plt.figure(figsize=(8,8))
plt.scatter(coord[:,0],coord[:,1],c=kmeanlab.cosd_pre)
for ii in np.arange(120):
plt.text(coord[ii,0]+0.02,coord[ii,1],s = Red_df.Chapter2[ii])
plt.xlabel("X")
plt.ylabel("Y")
plt.title("K-means MDS")
plt.show()
# In[74]:
pca = PCA(n_components=2)
pca.fit(dtm)
print(pca.explained_variance_ratio_)
coord = pca.fit_transform(dtm)
print(coord.shape)
# In[75]:
plt.figure(figsize=(8,8))
plt.scatter(coord[:,0],coord[:,1],c=kmeanlab.cosd_pre)
for ii in np.arange(120):
plt.text(coord[ii,0]+0.02,coord[ii,1],s = Red_df.Chapter2[ii])
plt.xlabel("主成分1",FontProperties = font)
plt.ylabel("主成分2",FontProperties = font)
plt.title("K-means PCA")
plt.show()
# In[76]:
## 层次聚类
labels = Red_df.Chapter.values
cosin_matrix = squareform(pdist(dtm,'cosine'))
ling = ward(cosin_matrix)
fig, ax = plt.subplots(figsize=(10, 15))
ax = dendrogram(ling,orientation='right', labels=labels);
plt.yticks(FontProperties = font,size = 8) #Y轴刻度上的文本
plt.title("《三国演义》各章节层次聚类",FontProperties = font)
plt.tight_layout()
plt.show()
# In[79]:
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer
from sklearn.manifold import TSNE
##准备工作,将分词后的结果整理成CountVectorizer()可应用的形式
##将所有分词后的结果使用空格连接为字符串,并组成列表,每一段为列表中的一个元素
articals=[]
for cutword in Red_df.cutword:
cutword=[s for s in cutword if len(s) < 5]
cutword=" ".join(cutword)
articals.append(cutword)
##max_features参数根据出现的频率排序,只取指定数目
vectorizer=CountVectorizer(max_features=10000)
transformer=TfidfTransformer()
tfidf=transformer.fit_transform(vectorizer.fit_transform(articals))
##降为三维
X=tfidf.toarray()
tsne=TSNE(n_components=3,metric='cosine',init='random',random_state=1233)
X_tsne=tsne.fit_transform(X)
##可视化
from mpl_toolkits.mplot3d import Axes3D
fig=plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1,projection="3d")
ax.scatter(X_tsne[:,0],X_tsne[:,1],X_tsne[:,2],c="red")
ax.view_init(30,45)
plt.xlabel("章节段数",FontProperties=font)
plt.ylabel("章节字数",FontProperties=font)
plt.title("《三国演义》-t-SNE",FontProperties=font)
plt.show()
# In[80]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
articals=[]
for cutword in Red_df.cutword:
cutword=[s for s in cutword if len(s)<5]
cutword=" ".join(cutword)
articals.append(cutword)
tf_vectorizer=CountVectorizer(max_features=10000)
tf=tf_vectorizer.fit_transform(articals)
##查看结果
print(tf_vectorizer.get_feature_names()[400:420])
tf.toarray()[20:50,200:800]
# In[81]:
##主题数目
n_topics=3
lda=LatentDirichletAllocation(n_topics=n_topics,max_iter=25,learning_method='online',learning_offset=50,random_state=0)
#模型应用于数据
lda.fit(tf)
##得到每个章节属于某个主题的可能性
chapter_top=pd.DataFrame(lda.transform(tf),
index=Red_df.Chapter,
columns=np.arange(n_topics)+1)
chapter_top
##每一行和
chapter_top.apply(sum,axis=1).values
##查看每一列的最大值
chapter_top.apply(max,axis=1).values
##找到大于相应值的索引
np.where(chapter_top>=np.min(chapter_top.apply(max,axis=1).values))
# In[82]:
mpl.rcParams['font.sans-serif'] = ['SimHei'] #指定默认字体
mpl.rcParams['axes.unicode_minus'] = False #解决保存图像时负号'-'显示为方块的问题
n_top_words = 40
tf_feature_names = tf_vectorizer.get_feature_names()
for topic_id,topic in enumerate(lda.components_):
topword = pd.DataFrame(
{"word":[tf_feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]],
"componets":topic[topic.argsort()[:-n_top_words - 1:-1]]})
topword.sort_values(by = "componets").plot(kind = "barh",
x = "word",
y = "componets",
figsize=(6,8),
legend=False)
plt.yticks(FontProperties = font,size = 10)
plt.ylabel("")
plt.legend("")
plt.title("Topic %d" %(topic_id+1))
plt.show()
# In[83]:
def print_top_words(model, feature_names, n_top_words):
for topic_id, topic in enumerate(model.components_):
print('\nTopic Nr.%d:' % int(topic_id + 1))
print(''.join([feature_names[i] + ' ' + str(round(topic[i], 2))
+' | ' for i in topic.argsort()[:-n_top_words - 1:-1]]))
n_top_words = 10
tf_feature_names = tf_vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)
# In[98]:
TEXT_PATH = 'E:\bigdata\sanguoTest2\sanguo.txt' # 文本路径
DICT_PATH = 'E:\bigdata\sanguoTest2\dict.txt' # 人物字典路径
SYNONYMOUS_DICT_PATH = 'E:\bigdata\sanguoTest2\sanguo_dict.txt' # 同义词路径
SAVE_NODE_PATH = 'E:\bigdata\sanguoTest2\node.csv'
SAVE_EDGE_PATH = 'E:\bigdata\sanguoTest2\edge.csv'
'''
person_counter是一个计数器,用来统计人物出现的次数。{'a':1,'b':2}
person_per_paragraph每段文字中出现的人物[['a','b'],[]]
relationships保存的是人物间的关系。key为人物A,value为字典,包含人物B和权值。
'''
person_counter = defaultdict(int) # 人物出场次数计数器
person_per_paragraph = []
relationships = {}
synonymous_dict = {}
def count_person(self):
'''
统计人物出场次数,添加每段的人物
:return:
'''
paragraphs = self.get_clean_paragraphs()
synonymous = self.synonymous_names()
print('start process node')
with codecs.open(self._dict_path, 'r', 'utf-8') as f:
name_list = f.read().split(' 10 nr\r\n') # 获取干净的name_list
for p in paragraphs:
jieba.load_userdict(self._dict_path)
# 分词,为每一段初始化新字典
poss = jieba.cut(p)
self._person_per_paragraph.append([])
for w in poss:
# 判断是否在姓名字典以及同义词区分
if w not in name_list:
continue
if synonymous.get(w):
w = synonymous[w]
# 往每段中添加人物
self._person_per_paragraph[-1].append(w)
# 初始化人物关系,计数
if self._person_counter.get(w) is None:
self._relationships[w] = {}
self._person_counter[w] += 1
return self._person_counter
def calc_relationship(self):
'''
统计人物关系权值
:return:
'''
print("start to process edge")
# 遍历每一段落,笛卡尔积形式,统计人物关系
for p in self._person_per_paragraph:
for name1 in p:
for name2 in p:
if name1 == name2:
continue
if self._relationships[name1].get(name2) is None:
self._relationships[name1][name2] = 1
else:
self._relationships[name1][name2] += 1
return self._relationships
def save_node_and_edge(self):
'''
根据dephi格式保存为csv
:return:
'''
with codecs.open(SAVE_NODE_PATH, "a+", "utf-8") as f:
f.write("Id,Label,Weight\r\n")
for name, times in self._person_counter.items():
f.write(name + "," + name + "," + str(times) + "\r\n")
with codecs.open(SAVE_EDGE_PATH, "a+", "utf-8") as f:
f.write("Source,Target,Weight\r\n")
for name, edges in self._relationships.items():
for v, w in edges.items():
if w > 3:
f.write(name + "," + v + "," + str(w) + "\r\n")
print('save file successful!')
# In[108]:
from pandas import read_csv
Red_df = pd.read_csv(r'E:\bigdata\sanguoTest2\society.csv',encoding="gbk")
Red_df.head()
# In[109]:
import networkx as nx
Red_df["weight"] = Red_df.Id / 120
Red_df2 = Red_df[Red_df.weight >0.025].reset_index(drop = True)
plt.figure(figsize=(15,15))
G=nx.Graph()
for ii in Red_df2.index:
G.add_edge(Red_df2.First[ii],Red_df2.Second[ii],weight = Red_df2.weight[ii])
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.2]
emidle = [(u,v) for (u,v,d) in G.edges(data=True) if (d['weight'] >0.1) & (d['weight'] <= 0.2)]
esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.1]
pos=nx.spring_layout(G)
nx.draw_networkx_nodes(G,pos,alpha=0.8,node_size= 350)
nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2,alpha=0.9,edge_color='g')
nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')
nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.4,edge_color='b',style='dashed')
nx.draw_networkx_labels(G,pos,font_size= 8)
plt.axis('off')
plt.title("《三国演义》社交网络")
plt.show()
# In[112]:
## 计算每个节点的度
Gdegree = nx.degree(G)
Gdegree = dict(Gdegree)
Gdegree = pd.DataFrame({"name":list(Gdegree.keys()),"degree":list(Gdegree.values())})
Gdegree.sort_values(by="degree",ascending=False).plot(
x = "name",
y = "degree",
kind="bar",
figsize=(12,6),
legend=False)
plt.xticks(FontProperties = font,size = 10)
plt.ylabel("degree")
plt.show()
# In[113]:
plt.figure(figsize=(13,13))
Red_df2 = Red_df[Red_df.weight >0.1].reset_index(drop = True)#控制图中圆圈上的点数(人)
G=nx.Graph()
for ii in Red_df2.index:
G.add_edge(Red_df2.First[ii],Red_df2.Second[ii],weight = Red_df2.weight[ii])
elarge = [(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.30]
emidle = [(u,v) for (u,v,d) in G.edges(data=True) if (d['weight'] >0.2) & (d['weight'] <= 0.30)]
esmall = [(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.2]
#布局模型
pos=nx.circular_layout(G)
nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size = 20 + Gdegree.degree *5)# nodes根据节点的入度和初度来设置节点的大小
nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2,alpha=0.9,edge_color='g')#alpha是透明度,width是连接线的宽度
nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')
nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.2,edge_color='b',style='dashed')
nx.draw_networkx_labels(G,pos,font_size=10)
#nx.draw_networkx_labels(G,pos,font_size=10,font_family="cmb10") #添加这个目录下能有的字体:C:\Users\yubg\Anaconda3\Lib\site-packages\matplotlib\mpl-data\fonts\ttf
plt.axis('off')
plt.title("《三国演义》社交网络")
plt.savefig(r'E:\bigdata\sanguoTest2\社交网络图\节点图.png')#将图像保存
plt.show()
三国演义亲和度python_Python之三国演义源码相关推荐
- java代码开发类似知网导出_知网语义相似度计算java实现 - 源码下载|其它|中文信息处理|源代码 - 源码中国...
压缩包 : WordSimilarity.zip 列表 WordSimilarity/ WordSimilarity/.checkstyle WordSimilarity/.classpath Wor ...
- 探秘RocketMQ源码——Series1:Producer视角看事务消息
简介:探秘RocketMQ源码--Series1:Producer视角看事务消息 1. 前言 Apache RocketMQ作为广为人知的开源消息中间件,诞生于阿里巴巴,于2016年捐赠给了Apach ...
- 技术干货 | 源码解析 Github 上 14.1k Star 的 RocketMQ
简介: 站在发送方视角,通过源码,来分析在事务消息发送中 RocketMQ 是如何工作的. 前言 Apache RocketMQ 作为广为人知的开源消息中间件,诞生于阿里巴巴,于 2016 年捐赠给了 ...
- 这次看到源码了,华为开源了方舟编译器
今年 4 月份华为发布了方舟编译器(ArkCompiler),同时宣布将在 8 月将其编译框架代码开源,开发者可以研究参考,并在 11 月完整开源方舟编译器代码.目前在华为云官网上我们已经看到释出的源 ...
- 智能优化算法之灰狼优化算法(GWO)的实现(Python附源码)
文章目录 一.灰狼优化算法的实现思路 1.社会等级结构分级 2.包围猎物 3.攻击猎物 4.搜索猎物 二.算法步骤 三.实例 一.灰狼优化算法的实现思路 灰狼优化算法(Grey Wolf Optimi ...
- 智能优化算法之遗传算法(GA)的实现(基于二进制编码,Python附源码)
文章目录 一.遗传算法的实现思路 二.基于二进制编码方式的遗传算法的实现 1.库的导入 2.目标函数 3.个体编码函数 4.个体解码函数 5.选择函数 6.交叉函数 7.变异函数 8.算法主流程 一. ...
- ConcurrentHashMap源码走读
ConcurrentHashMap源码走读 文章目录 ConcurrentHashMap源码走读 简介 放入数据 容器元素总数更新 容器扩容 协助扩容 遍历 简介 在从JDK8开始,为了提高并发度,C ...
- 智能优化算法之鸟群算法(BSA)的实现(Python附源码)
文章目录 一.鸟群算法的实现思路 1.飞行行为 2.生产者行为 3.乞讨者行为 4.觅食行为 5.保持警戒行为 二.算法步骤 三.实例 一.鸟群算法的实现思路 鸟群算法(Bird Swarm Alog ...
- 智能优化算法之萤火虫算法(FA)的实现(Python附源码)
一.萤火虫算法的实现思路 萤火虫算法(Firefly Algorithm,FA)是由Yang于2010年提出的一种群智能优化算法,在自然界中,萤火虫之间通过自身发光来吸引异性前来交配以及吸引猎物进行捕 ...
最新文章
- Python基础学习!容器:列表,元组,字典与集合!(1)
- python-实现单链表
- 让后台服务不被杀———电话录音
- 129. 火车进栈【栈】
- SAP Classification(物料特性)
- 入门之路:“机器学习初学者”公众号2019年的精选原创文章
- javascript等待异步线程完成_前端:什么是单线程,同步,异步?彻底弄懂 JavaScript 执行机制...
- 谈谈一些有趣的CSS题目(十五)-- 谈谈 CSS 关键字 initial、inherit 和 unset
- stl vector 函数_vector :: push_back()函数,以及C ++ STL中的示例
- 神经网络模型中class的forward函数何时调用_总结深度学习PyTorch神经网络箱使用...
- 损失函数、tensorflow2实现——Python实战
- 感悟:决定一个系统走势是多因素构成,而非单一因素决定
- numpy下, meshgrid
- grep查找文件内容
- configmapsecrets基本操作
- 计算机病毒的基本结构
- 搜狗2020秋招笔试的一道算法题
- 基于socket.io的php扩展介绍---phpsocket.io
- Beyond Compare 设置
- 工业视觉 四 曝光与增益 、伽马、饱和度、对比度、锐度、黑电平
热门文章
- cf Educational Codeforces Round 54 C. Meme Problem
- 18日精读掌握《费曼物理学讲义-卷一》计划(2019/6/12-2019/6/29)
- python 爬取企业注册信息_读书笔记(十)——python简单爬取企查查网企业信息,并以excel格式存储...
- linux脚本编写图形,shell图形化界面脚本实现
- 【tomcat运行异常】Error running ‘*** [org.apache.tomcat.maven:tomcat7-maven-plugin:2.2:run]‘
- access团员人数公式_2015年3月全国二级ACCESS操作真题第1套
- 备案需要域名证书吗?如何下载域名证书?
- 一夜撸700万,羊毛党不光薅羊毛,还吃羊肉吸羊血。
- uva1594 水题
- 十年风雨,一个普通程序员的成长之路(五) 成长:得到与教训