文章分析见:拂羽:Python之三国演义(上)​zhuanlan.zhihu.com拂羽:Python之三国演义(下)​zhuanlan.zhihu.com

#!/usr/bin/env python

# coding: utf-8

# In[47]:

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from matplotlib.font_manager import FontProperties

import jieba #需要安装:pip install jieba

from pandas import read_csv

from scipy.cluster.hierarchy import dendrogram,ward

from scipy.spatial.distance import pdist,squareform

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer

from sklearn.manifold import MDS

from sklearn.decomposition import PCA

import nltk

from nltk.cluster.kmeans import KMeansClusterer

# In[2]:

## 设置字体和 设置pandas显示方式

##font=FontProperties(fname = "C:/Windows/Fonts/Hiragino Sans GB W3.otf",size=14)

## 设置字体和 设置pandas显示方式(字体设置一定要是自己计算机上面存在的字体)

font=FontProperties(fname = "C:\Windows\Fonts\STFANGSO.TTF",size=14)

pd.set_option("display.max_rows",8)

pd.options.mode.chained_assignment = None # default='warn'

# In[3]:

## 读取停用词和需要的词典

stopword = read_csv(r"E:\bigdata\sanguoTest2\my_stop_words.txt",header=None,names = ["Stopwords"])

mydict = read_csv(r"E:\bigdata\sanguoTest2\red_dictionary.txt",header=None, names=["Dictionary"])

print(stopword)

print("---------------------------------")

print(mydict)

RedDream = read_csv(r"E:\bigdata\sanguoTest2\sanguo.txt",header=None,names = ["Reddream"])

RedDream

# In[4]:

#删除空白行和不需要的段,并重新设置索引

###查看数据是否有空白行

np.sum(pd.isnull(RedDream))

# In[5]:

###删除卷处理,使用正则表达式

####包含相应关键字的索引

indexjuan = RedDream.Reddream.str.contains("^正文+")

# In[6]:

####删除不需要的段,并重新设置索引

RedDream = RedDream[~indexjuan].reset_index(drop=True)

RedDream

# In[7]:

####包含相应关键字的索引

indexjuan = RedDream.Reddream.str.contains("^分节阅读+")

# In[8]:

####删除不需要的段,并重新设置索引

RedDream = RedDream[~indexjuan].reset_index(drop=True)

RedDream

# In[9]:

## 找出每一章节的头部索引和尾部索引

## 每一章节的标题

indexhui = RedDream.Reddream.str.match("^第+.+回")

chapnames = RedDream.Reddream[indexhui].reset_index(drop=True)

print(chapnames)

print("--------------------------------------")

# In[10]:

## 处理章节名,按照空格分割字符串

chapnamesplit = chapnames.str.split(" ").reset_index(drop=True)

chapnamesplit

# In[15]:

## 建立保存数据的数据表

Red_df=pd.DataFrame(list(chapnamesplit),columns=["Chapter","Leftname","Rightname","null"])

Red_df

# In[16]:

## 添加新的变量

Red_df["Chapter2"] = np.arange(1,121)

Red_df["ChapName"] = Red_df.Leftname+","+Red_df.Rightname

## 每章的开始行(段)索引

Red_df["StartCid"] = indexhui[indexhui == True].index

## 每章的结束行数

Red_df["endCid"] = Red_df["StartCid"][1:len(Red_df["StartCid"])].reset_index(drop = True) - 1

Red_df["endCid"][[len(Red_df["endCid"])-1]] = RedDream.index[-1]

## 每章的段落长度

Red_df["Lengthchaps"] = Red_df.endCid - Red_df.StartCid

Red_df["Artical"] = "Artical"

# In[17]:

## 每章节的内容

for ii in Red_df.index:

## 将内容使用""连接

chapid = np.arange(Red_df.StartCid[ii]+1,int(Red_df.endCid[ii]))

## 每章节的内容替换掉空格

Red_df["Artical"][ii] = "".join(list(RedDream.Reddream[chapid])).replace("\u3000","")

## 计算某章有多少字

Red_df["lenzi"] = Red_df.Artical.apply(len)

Red_df

# In[20]:

####散点图##########

##字长和段落数的散点图一

from pylab import *

mpl.rcParams['font.sans-serif']=['SimHei']#指定默认字体

mpl.rcParams['axes.unicode_minus']=False#解决保存图像是负号'-'显示为方块的问题

plt.figure(figsize=(10,6))

plt.scatter(Red_df.Lengthchaps,Red_df.lenzi)

for ii in Red_df.index:

plt.text(Red_df.Lengthchaps[ii]+1,Red_df.lenzi[ii],Red_df.Chapter2[ii])

plt.xlabel("章节段数")

plt.ylabel("章节字数")

plt.title("《三国演义》120回")

plt.show

# In[21]:

##字长和段落数的散点图二

plt.figure(figsize=(10,6))

plt.scatter(Red_df.Lengthchaps,Red_df.lenzi)

for ii in Red_df.index: plt.text(Red_df.Lengthchaps[ii]-2,Red_df.lenzi[ii]+100,Red_df.Chapter[ii],size=7)

plt.xlabel("章节段数")

plt.ylabel("章节字数")

plt.title("《三国演义》120回")

plt.show

# In[26]:

plt.figure(figsize=(16,12))

plt.subplot(2,1,1)

plt.plot(Red_df.Chapter2,Red_df.Lengthchaps,"ro-",label="段落")

plt.ylabel("章节段数",Fontproperties=font)

plt.title("《三国演义》120回",Fontproperties=font)

##添加平均值

plt.hlines(np.mean(Red_df.Lengthchaps),-5,125,"b")

plt.xlim((-5,125))

plt.subplot(2,1,2)

plt.plot(Red_df.Chapter2,Red_df.lenzi,"ro-",label="段落")

plt.xlabel("章节",Fontproperties=font)

plt.ylabel("章节字数",Fontproperties=font)

##添加平均值

plt.hlines(np.mean(Red_df.lenzi),-5,125,"b")

plt.xlim((-5,125))

plt.show()

# In[28]:

## 对三国演义全文进行分词

## 数据表的行数

row,col = Red_df.shape

## 预定义列表

Red_df["cutword"] = "cutword"

for ii in np.arange(row):

## 分词

cutwords = list(jieba.cut(Red_df.Artical[ii], cut_all=True))

## 去除长度为1的词

cutwords = pd.Series(cutwords)[pd.Series(cutwords).apply(len)>1]

## 去停用词

cutwords = cutwords[~cutwords.isin(stopword)]

Red_df.cutword[ii] = cutwords.values

##查看最后一段的分词结果

print(cutwords)

print(cutwords.values)

##查看全书分词结果

Red_df.cutword

# In[29]:

##连接list

words=np.concatenate(Red_df.cutword)

##统计词频

word_df=pd.DataFrame({"Word":words})

word_stat=word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})

word_stat=word_stat.reset_index().sort_values(by="number",ascending=False)

word_stat["wordlen"]=word_stat.Word.apply(len)

word_stat

#去除长度大于5的词

print(np.where(word_stat.Word.apply(len)<5))

word_stat=word_stat.loc[word_stat.Word.apply(len)<5,:]

word_stat=word_stat.sort_values(by="number",ascending=False)

word_stat

# In[38]:

###词云

from wordcloud import WordCloud

##连接全书的词

"/".join(np.concatenate(Red_df.cutword))

##width=2200,height=1600

wlred=WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",margin=5,width=1800,height=1800).generate("/".join(np.concatenate(Red_df.cutword)))

plt.figure(figsize=(10,10))

plt.imshow(wlred)

plt.axis("off")

plt.show()

# In[40]:

worddict = {}

for key,value in zip(word_stat.Word,word_stat.number):

worddict[key] = value

for ii,myword in zip(range(10),worddict.items()):

print(ii)

print(myword)

redcold = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",

margin=5,

width=1800, height=1800).generate("/".join(np.concatenate(Red_df.cutword)))

# worddict = worddict.items()

# worddict =tuple(worddict)

# redcold.generate_from_frequencies(frequencies=worddict)

plt.figure(figsize=(10,10))

plt.imshow(redcold)

plt.axis("off")

plt.show()

# In[52]:

from wordcloud import ImageColorGenerator

from matplotlib.pyplot import imread

back_image = imread(r"E:\bigdata\sanguoTest2\img2.jpg")

red_wc = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",

margin=5, width=1800,height=1800,

background_color="white",

max_words=2000,

mask=back_image,

random_state=42,

).generate("/".join(np.concatenate(Red_df.cutword)))

# 从背景图片生成颜色值

image_colors = ImageColorGenerator(back_image)

plt.figure(figsize=(10,10))

plt.imshow(red_wc.recolor(color_func=image_colors))

plt.axis("off")

plt.show()

# In[53]:

##词语出现次数的直方图

#筛选数据,出现次数大于500次的词语

newdata=word_stat.loc[word_stat.number>500]

##绘制直方图

newdata.plot(kind="bar",x="Word",y="number",figsize=(10,7))

plt.xticks(FontProperties=font,size=10)

plt.xlabel("关键词",FontProperties=font)

plt.ylabel("频数",FontProperties=font)

plt.title("《三国演义》",FontProperties=font)

# In[55]:

#筛选数据,频数大于250

newdata=word_stat.loc[word_stat.number>250]

##绘制直方图

newdata.plot(kind="bar",x="Word",y="number",figsize=(16,7))

plt.xticks(FontProperties=font,size=10)

plt.xlabel("关键词",FontProperties=font)

plt.ylabel("频数",FontProperties=font)

plt.title("《三国演义》",FontProperties=font)

plt.show()

# In[56]:

def plotwordcould(wordlist,title,figsize=(6,6)):

"""

该函数用来绘制一个list的词云

wordlist:词组成的一个列表

title : 图的名字

"""

## 统计词频

words = wordlist

name = title

word_df = pd.DataFrame({"Word":words})

word_stat = word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})

word_stat = word_stat.reset_index().sort_values(by="number",ascending=False)

word_stat["wordlen"] = word_stat.Word.apply(len)

word_stat

## 将词和词频组成字典数据准备

worddict = {}

for key,value in zip(word_stat.Word,word_stat.number):

worddict[key] = value

# 生成词云, 可以用计算好词频使用generate_from_frequencies函数

red_wc = WordCloud(font_path="C:\Windows\Fonts\STFANGSO.TTF",

margin=5, width=1800, height=1800,

background_color="black",

max_words=800,

max_font_size=400,

random_state=42,

).generate_from_frequencies(frequencies=worddict)

# 绘制词云

plt.figure(figsize=figsize)

plt.imshow(red_wc)

plt.axis("off")

plt.title(name,FontProperties=font,size = 12)

plt.show()

# In[57]:

print("plot all red deram wordcould")

t0 = time.time()

for ii in np.arange(12):

ii = ii * 10

name = Red_df.Chapter[ii] +":"+ Red_df.Leftname[ii] +","+ Red_df.Rightname[ii]

words = Red_df.cutword[ii]

plotwordcould(words,name,figsize=(6,6))

print("Plot all wordcolud use %.2fs"%(time.time()-t0))

# In[60]:

def plotredmanfre(wordlist,title,figsize=(12,6)):

"""

该函数用来统计一个list的人物频率

wordlist:词组成的一个列表

title : 图的名字

"""

## 统计词频

words = wordlist

name = title

word_df = pd.DataFrame({"Word":words})

word_stat= word_df.groupby(by=["Word"])["Word"].agg({"number":np.size})

word_stat = word_stat.reset_index().sort_values(by="number",ascending=False)

wordname = word_stat.loc[word_stat.Word.isin(word_stat.iloc[:,0].values)].reset_index(drop = True)

## 直方图

## 绘制直方图

size = np.min([np.max([6,np.ceil(300 / wordname.shape[0])]),12])

wordname.plot(kind="bar",x="Word",y="number",figsize=(16,6))

plt.xticks(FontProperties = font,size = size)

plt.xlabel("人名",FontProperties = font)

plt.ylabel("频数",FontProperties = font)

plt.title(name,FontProperties = font)

plt.show()

# In[61]:

import time

print("plot 所有章节的人物词频")

t0 = time.time()

for ii in np.arange(120):

name = Red_df.Chapter[ii] +":"+ Red_df.Leftname[ii] +","+ Red_df.Rightname[ii]

words = Red_df.cutword[ii]

plotredmanfre(words,name,figsize=(16,6))

print("Plot 所有章节的人物词频 use %.2fs"%(time.time()-t0))

# In[62]:

## 准备工作,将分词后的结果整理成CountVectorizer()可应用的形式

## 将所有分词后的结果使用空格连接为字符串,并组成列表,每一段为列表中的一个元素

articals = []

for cutword in Red_df.cutword:

articals.append(" ".join(cutword))

## 构建语料库,并计算文档--词的TF-IDF矩阵

vectorizer = CountVectorizer()

transformer = TfidfVectorizer()

tfidf = transformer.fit_transform(articals)

print(tfidf)

# In[63]:

## 将tfidf转化为数组的形式,文档-词矩阵

dtm = tfidf.toarray()

dtm

# In[64]:

'''

vectorizer.fit_transform(articals)

print(vectorizer.get_feature_names()[1:10])

print(len(vectorizer.get_feature_names()))

'''

# In[65]:

'''

print(cosine_distance(dtm[1,:], dtm[1,:]))

print(cosine_distance(dtm[2,:], dtm[3,:]))

'''

# In[69]:

from nltk.cluster.util import cosine_distance

from nltk.cluster.kmeans import KMeansClusterer

## 使用夹角余弦距离进行k均值聚类

kmeans = KMeansClusterer(num_means=3, #聚类数目

distance=cosine_distance, #夹角余弦距离

)

kmeans.cluster(dtm)

## 聚类得到的类别

labpre = [kmeans.classify(i) for i in dtm]

kmeanlab = Red_df[["ChapName","Chapter"]]

kmeanlab["cosd_pre"] = labpre

kmeanlab

# In[70]:

## 查看每类有多少个分组

count = kmeanlab.groupby("cosd_pre").count()

count

# In[71]:

## 将分类可视化

count.plot(kind='barh',figsize=(6,5))

for xx,yy,s in zip(count.index,count.ChapName,count.ChapName):

plt.text(y =xx-0.1, x = yy+0.5,s=s)

plt.ylabel("cluster label")

plt.xlabel("number")

plt.show()

# In[72]:

## 使用MDS对数据进行降维

mds = MDS(n_components=2,random_state=123)

coord = mds.fit_transform(dtm)

print(coord.shape)

# In[73]:

## 绘制降维后的结果

plt.figure(figsize=(8,8))

plt.scatter(coord[:,0],coord[:,1],c=kmeanlab.cosd_pre)

for ii in np.arange(120):

plt.text(coord[ii,0]+0.02,coord[ii,1],s = Red_df.Chapter2[ii])

plt.xlabel("X")

plt.ylabel("Y")

plt.title("K-means MDS")

plt.show()

# In[74]:

pca = PCA(n_components=2)

pca.fit(dtm)

print(pca.explained_variance_ratio_)

coord = pca.fit_transform(dtm)

print(coord.shape)

# In[75]:

plt.figure(figsize=(8,8))

plt.scatter(coord[:,0],coord[:,1],c=kmeanlab.cosd_pre)

for ii in np.arange(120):

plt.text(coord[ii,0]+0.02,coord[ii,1],s = Red_df.Chapter2[ii])

plt.xlabel("主成分1",FontProperties = font)

plt.ylabel("主成分2",FontProperties = font)

plt.title("K-means PCA")

plt.show()

# In[76]:

## 层次聚类

labels = Red_df.Chapter.values

cosin_matrix = squareform(pdist(dtm,'cosine'))

ling = ward(cosin_matrix)

fig, ax = plt.subplots(figsize=(10, 15))

ax = dendrogram(ling,orientation='right', labels=labels);

plt.yticks(FontProperties = font,size = 8) #Y轴刻度上的文本

plt.title("《三国演义》各章节层次聚类",FontProperties = font)

plt.tight_layout()

plt.show()

# In[79]:

from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer

from sklearn.manifold import TSNE

##准备工作,将分词后的结果整理成CountVectorizer()可应用的形式

##将所有分词后的结果使用空格连接为字符串,并组成列表,每一段为列表中的一个元素

articals=[]

for cutword in Red_df.cutword:

cutword=[s for s in cutword if len(s) < 5]

cutword=" ".join(cutword)

articals.append(cutword)

##max_features参数根据出现的频率排序,只取指定数目

vectorizer=CountVectorizer(max_features=10000)

transformer=TfidfTransformer()

tfidf=transformer.fit_transform(vectorizer.fit_transform(articals))

##降为三维

X=tfidf.toarray()

tsne=TSNE(n_components=3,metric='cosine',init='random',random_state=1233)

X_tsne=tsne.fit_transform(X)

##可视化

from mpl_toolkits.mplot3d import Axes3D

fig=plt.figure(figsize=(8,6))

ax=fig.add_subplot(1,1,1,projection="3d")

ax.scatter(X_tsne[:,0],X_tsne[:,1],X_tsne[:,2],c="red")

ax.view_init(30,45)

plt.xlabel("章节段数",FontProperties=font)

plt.ylabel("章节字数",FontProperties=font)

plt.title("《三国演义》-t-SNE",FontProperties=font)

plt.show()

# In[80]:

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.decomposition import LatentDirichletAllocation

articals=[]

for cutword in Red_df.cutword:

cutword=[s for s in cutword if len(s)<5]

cutword=" ".join(cutword)

articals.append(cutword)

tf_vectorizer=CountVectorizer(max_features=10000)

tf=tf_vectorizer.fit_transform(articals)

##查看结果

print(tf_vectorizer.get_feature_names()[400:420])

tf.toarray()[20:50,200:800]

# In[81]:

##主题数目

n_topics=3

lda=LatentDirichletAllocation(n_topics=n_topics,max_iter=25,learning_method='online',learning_offset=50,random_state=0)

#模型应用于数据

lda.fit(tf)

##得到每个章节属于某个主题的可能性

chapter_top=pd.DataFrame(lda.transform(tf),

index=Red_df.Chapter,

columns=np.arange(n_topics)+1)

chapter_top

##每一行和

chapter_top.apply(sum,axis=1).values

##查看每一列的最大值

chapter_top.apply(max,axis=1).values

##找到大于相应值的索引

np.where(chapter_top>=np.min(chapter_top.apply(max,axis=1).values))

# In[82]:

mpl.rcParams['font.sans-serif'] = ['SimHei'] #指定默认字体

mpl.rcParams['axes.unicode_minus'] = False #解决保存图像时负号'-'显示为方块的问题

n_top_words = 40

tf_feature_names = tf_vectorizer.get_feature_names()

for topic_id,topic in enumerate(lda.components_):

topword = pd.DataFrame(

{"word":[tf_feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]],

"componets":topic[topic.argsort()[:-n_top_words - 1:-1]]})

topword.sort_values(by = "componets").plot(kind = "barh",

x = "word",

y = "componets",

figsize=(6,8),

legend=False)

plt.yticks(FontProperties = font,size = 10)

plt.ylabel("")

plt.legend("")

plt.title("Topic %d" %(topic_id+1))

plt.show()

# In[83]:

def print_top_words(model, feature_names, n_top_words):

for topic_id, topic in enumerate(model.components_):

print('\nTopic Nr.%d:' % int(topic_id + 1))

print(''.join([feature_names[i] + ' ' + str(round(topic[i], 2))

+' | ' for i in topic.argsort()[:-n_top_words - 1:-1]]))

n_top_words = 10

tf_feature_names = tf_vectorizer.get_feature_names()

print_top_words(lda, tf_feature_names, n_top_words)

# In[98]:

TEXT_PATH = 'E:\bigdata\sanguoTest2\sanguo.txt' # 文本路径

DICT_PATH = 'E:\bigdata\sanguoTest2\dict.txt' # 人物字典路径

SYNONYMOUS_DICT_PATH = 'E:\bigdata\sanguoTest2\sanguo_dict.txt' # 同义词路径

SAVE_NODE_PATH = 'E:\bigdata\sanguoTest2\node.csv'

SAVE_EDGE_PATH = 'E:\bigdata\sanguoTest2\edge.csv'

'''

person_counter是一个计数器,用来统计人物出现的次数。{'a':1,'b':2}

person_per_paragraph每段文字中出现的人物[['a','b'],[]]

relationships保存的是人物间的关系。key为人物A,value为字典,包含人物B和权值。

'''

person_counter = defaultdict(int) # 人物出场次数计数器

person_per_paragraph = []

relationships = {}

synonymous_dict = {}

def count_person(self):

'''

统计人物出场次数,添加每段的人物

:return:

'''

paragraphs = self.get_clean_paragraphs()

synonymous = self.synonymous_names()

print('start process node')

with codecs.open(self._dict_path, 'r', 'utf-8') as f:

name_list = f.read().split(' 10 nr\r\n') # 获取干净的name_list

for p in paragraphs:

jieba.load_userdict(self._dict_path)

# 分词,为每一段初始化新字典

poss = jieba.cut(p)

self._person_per_paragraph.append([])

for w in poss:

# 判断是否在姓名字典以及同义词区分

if w not in name_list:

continue

if synonymous.get(w):

w = synonymous[w]

# 往每段中添加人物

self._person_per_paragraph[-1].append(w)

# 初始化人物关系,计数

if self._person_counter.get(w) is None:

self._relationships[w] = {}

self._person_counter[w] += 1

return self._person_counter

def calc_relationship(self):

'''

统计人物关系权值

:return:

'''

print("start to process edge")

# 遍历每一段落,笛卡尔积形式,统计人物关系

for p in self._person_per_paragraph:

for name1 in p:

for name2 in p:

if name1 == name2:

continue

if self._relationships[name1].get(name2) is None:

self._relationships[name1][name2] = 1

else:

self._relationships[name1][name2] += 1

return self._relationships

def save_node_and_edge(self):

'''

根据dephi格式保存为csv

:return:

'''

with codecs.open(SAVE_NODE_PATH, "a+", "utf-8") as f:

f.write("Id,Label,Weight\r\n")

for name, times in self._person_counter.items():

f.write(name + "," + name + "," + str(times) + "\r\n")

with codecs.open(SAVE_EDGE_PATH, "a+", "utf-8") as f:

f.write("Source,Target,Weight\r\n")

for name, edges in self._relationships.items():

for v, w in edges.items():

if w > 3:

f.write(name + "," + v + "," + str(w) + "\r\n")

print('save file successful!')

# In[108]:

from pandas import read_csv

Red_df = pd.read_csv(r'E:\bigdata\sanguoTest2\society.csv',encoding="gbk")

Red_df.head()

# In[109]:

import networkx as nx

Red_df["weight"] = Red_df.Id / 120

Red_df2 = Red_df[Red_df.weight >0.025].reset_index(drop = True)

plt.figure(figsize=(15,15))

G=nx.Graph()

for ii in Red_df2.index:

G.add_edge(Red_df2.First[ii],Red_df2.Second[ii],weight = Red_df2.weight[ii])

elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.2]

emidle = [(u,v) for (u,v,d) in G.edges(data=True) if (d['weight'] >0.1) & (d['weight'] <= 0.2)]

esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.1]

pos=nx.spring_layout(G)

nx.draw_networkx_nodes(G,pos,alpha=0.8,node_size= 350)

nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2,alpha=0.9,edge_color='g')

nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')

nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.4,edge_color='b',style='dashed')

nx.draw_networkx_labels(G,pos,font_size= 8)

plt.axis('off')

plt.title("《三国演义》社交网络")

plt.show()

# In[112]:

## 计算每个节点的度

Gdegree = nx.degree(G)

Gdegree = dict(Gdegree)

Gdegree = pd.DataFrame({"name":list(Gdegree.keys()),"degree":list(Gdegree.values())})

Gdegree.sort_values(by="degree",ascending=False).plot(

x = "name",

y = "degree",

kind="bar",

figsize=(12,6),

legend=False)

plt.xticks(FontProperties = font,size = 10)

plt.ylabel("degree")

plt.show()

# In[113]:

plt.figure(figsize=(13,13))

Red_df2 = Red_df[Red_df.weight >0.1].reset_index(drop = True)#控制图中圆圈上的点数(人)

G=nx.Graph()

for ii in Red_df2.index:

G.add_edge(Red_df2.First[ii],Red_df2.Second[ii],weight = Red_df2.weight[ii])

elarge = [(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.30]

emidle = [(u,v) for (u,v,d) in G.edges(data=True) if (d['weight'] >0.2) & (d['weight'] <= 0.30)]

esmall = [(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.2]

#布局模型

pos=nx.circular_layout(G)

nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size = 20 + Gdegree.degree *5)# nodes根据节点的入度和初度来设置节点的大小

nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2,alpha=0.9,edge_color='g')#alpha是透明度,width是连接线的宽度

nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')

nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.2,edge_color='b',style='dashed')

nx.draw_networkx_labels(G,pos,font_size=10)

#nx.draw_networkx_labels(G,pos,font_size=10,font_family="cmb10") #添加这个目录下能有的字体:C:\Users\yubg\Anaconda3\Lib\site-packages\matplotlib\mpl-data\fonts\ttf

plt.axis('off')

plt.title("《三国演义》社交网络")

plt.savefig(r'E:\bigdata\sanguoTest2\社交网络图\节点图.png')#将图像保存

plt.show()

三国演义亲和度python_Python之三国演义源码相关推荐

  1. java代码开发类似知网导出_知网语义相似度计算java实现 - 源码下载|其它|中文信息处理|源代码 - 源码中国...

    压缩包 : WordSimilarity.zip 列表 WordSimilarity/ WordSimilarity/.checkstyle WordSimilarity/.classpath Wor ...

  2. 探秘RocketMQ源码——Series1:Producer视角看事务消息

    简介:探秘RocketMQ源码--Series1:Producer视角看事务消息 1. 前言 Apache RocketMQ作为广为人知的开源消息中间件,诞生于阿里巴巴,于2016年捐赠给了Apach ...

  3. 技术干货 | 源码解析 Github 上 14.1k Star 的 RocketMQ

    简介: 站在发送方视角,通过源码,来分析在事务消息发送中 RocketMQ 是如何工作的. 前言 Apache RocketMQ 作为广为人知的开源消息中间件,诞生于阿里巴巴,于 2016 年捐赠给了 ...

  4. 这次看到源码了,华为开源了方舟编译器

    今年 4 月份华为发布了方舟编译器(ArkCompiler),同时宣布将在 8 月将其编译框架代码开源,开发者可以研究参考,并在 11 月完整开源方舟编译器代码.目前在华为云官网上我们已经看到释出的源 ...

  5. 智能优化算法之灰狼优化算法(GWO)的实现(Python附源码)

    文章目录 一.灰狼优化算法的实现思路 1.社会等级结构分级 2.包围猎物 3.攻击猎物 4.搜索猎物 二.算法步骤 三.实例 一.灰狼优化算法的实现思路 灰狼优化算法(Grey Wolf Optimi ...

  6. 智能优化算法之遗传算法(GA)的实现(基于二进制编码,Python附源码)

    文章目录 一.遗传算法的实现思路 二.基于二进制编码方式的遗传算法的实现 1.库的导入 2.目标函数 3.个体编码函数 4.个体解码函数 5.选择函数 6.交叉函数 7.变异函数 8.算法主流程 一. ...

  7. ConcurrentHashMap源码走读

    ConcurrentHashMap源码走读 文章目录 ConcurrentHashMap源码走读 简介 放入数据 容器元素总数更新 容器扩容 协助扩容 遍历 简介 在从JDK8开始,为了提高并发度,C ...

  8. 智能优化算法之鸟群算法(BSA)的实现(Python附源码)

    文章目录 一.鸟群算法的实现思路 1.飞行行为 2.生产者行为 3.乞讨者行为 4.觅食行为 5.保持警戒行为 二.算法步骤 三.实例 一.鸟群算法的实现思路 鸟群算法(Bird Swarm Alog ...

  9. 智能优化算法之萤火虫算法(FA)的实现(Python附源码)

    一.萤火虫算法的实现思路 萤火虫算法(Firefly Algorithm,FA)是由Yang于2010年提出的一种群智能优化算法,在自然界中,萤火虫之间通过自身发光来吸引异性前来交配以及吸引猎物进行捕 ...

最新文章

  1. Python基础学习!容器:列表,元组,字典与集合!(1)
  2. python-实现单链表
  3. 让后台服务不被杀———电话录音
  4. 129. 火车进栈【栈】
  5. SAP Classification(物料特性)
  6. 入门之路:“机器学习初学者”公众号2019年的精选原创文章
  7. javascript等待异步线程完成_前端:什么是单线程,同步,异步?彻底弄懂 JavaScript 执行机制...
  8. 谈谈一些有趣的CSS题目(十五)-- 谈谈 CSS 关键字 initial、inherit 和 unset
  9. stl vector 函数_vector :: push_back()函数,以及C ++ STL中的示例
  10. 神经网络模型中class的forward函数何时调用_总结深度学习PyTorch神经网络箱使用...
  11. 损失函数、tensorflow2实现——Python实战
  12. 感悟:决定一个系统走势是多因素构成,而非单一因素决定
  13. numpy下, meshgrid
  14. grep查找文件内容
  15. configmapsecrets基本操作
  16. 计算机病毒的基本结构
  17. 搜狗2020秋招笔试的一道算法题
  18. 基于socket.io的php扩展介绍---phpsocket.io
  19. Beyond Compare 设置
  20. 工业视觉 四 曝光与增益 、伽马、饱和度、对比度、锐度、黑电平

热门文章

  1. cf Educational Codeforces Round 54 C. Meme Problem
  2. 18日精读掌握《费曼物理学讲义-卷一》计划(2019/6/12-2019/6/29)
  3. python 爬取企业注册信息_读书笔记(十)——python简单爬取企查查网企业信息,并以excel格式存储...
  4. linux脚本编写图形,shell图形化界面脚本实现
  5. 【tomcat运行异常】Error running ‘*** [org.apache.tomcat.maven:tomcat7-maven-plugin:2.2:run]‘
  6. access团员人数公式_2015年3月全国二级ACCESS操作真题第1套
  7. 备案需要域名证书吗?如何下载域名证书?
  8. 一夜撸700万,羊毛党不光薅羊毛,还吃羊肉吸羊血。
  9. uva1594 水题
  10. 十年风雨,一个普通程序员的成长之路(五) 成长:得到与教训