重叠社区发现算法LFM算法python源码含数据集

LFM算法是来源于论文《Detecting the overlapping and hieerarchical community structure in complex networks》，文档中包含该算法的python的源码，以及用到的数据集，仅供大家学习参考。

使用networkx工具包实现了LFM社区发现算法，并提供了模块度评价方法和可视化的方法。数据分析结果和可视化图片都已保存在文件中，代码可直接运行。

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 11 17:13:54 2019@author: suibian
"""import random
import networkx as nx
import matplotlib.pyplot as plt
import zipfile
#import urllib.request as urllib
class Community():''' use set operation to optimize calculation '''def __init__(self,G,alpha=1.0):self.G = Gself.alpha = alphaself._nodes = set()self._k_in = 0self._k_out = 0def add_node(self,node):neighbors = set(self.G.neighbors(node))node_k_in = len(neighbors & self._nodes)node_k_out = len(neighbors) - node_k_inself._nodes.add(node)self._k_in += 2*node_k_inself._k_out = self._k_out+node_k_out-node_k_indef remove_node(self,node):neighbors = set(self.G.neighbors(node))community_nodes = self._nodesnode_k_in = len(neighbors & community_nodes)node_k_out = len(neighbors) - node_k_inself._nodes.remove(node)self._k_in -= 2*node_k_inself._k_out = self._k_out - node_k_out+node_k_indef cal_add_fitness(self,node):neighbors = set(self.G.neighbors(node))old_k_in = self._k_inold_k_out = self._k_outvertex_k_in = len(neighbors & self._nodes)vertex_k_out = len(neighbors) - vertex_k_in new_k_in = old_k_in + 2*vertex_k_innew_k_out = old_k_out + vertex_k_out-vertex_k_innew_fitness = new_k_in/(new_k_in+new_k_out)**self.alphaold_fitness = old_k_in/(old_k_in+old_k_out)**self.alphareturn new_fitness-old_fitnessdef cal_remove_fitness(self,node):neighbors = set(self.G.neighbors(node))new_k_in = self._k_innew_k_out = self._k_outnode_k_in = len(neighbors & self._nodes)node_k_out = len(neighbors) - node_k_inold_k_in = new_k_in - 2*node_k_inold_k_out = new_k_out - node_k_out + node_k_inold_fitness = old_k_in/(old_k_in+old_k_out)**self.alphanew_fitness = new_k_in/(new_k_in+new_k_out)**self.alphareturn new_fitness-old_fitnessdef recalculate(self):for vid in self._nodes:fitness = self.cal_remove_fitness(vid)if fitness < 0.0:return vidreturn Nonedef get_neighbors(self):neighbors = set()for node in self._nodes:neighbors.update(set(self.G.neighbors(node)) - self._nodes)return neighborsdef get_fitness(self):return float(self._k_in)/((self._k_in+self._k_out) ** self._alpha)class LFM():def __init__(self, G, alpha):self.G = Gself.alpha = alphadef execute(self):communities = []node_not_include = list(self.G.node.keys())[:]while(len(node_not_include) != 0):c = Community(self.G, self.alpha)# randomly select a seed nodeseed = random.choice(node_not_include)c.add_node(seed)to_be_examined = c.get_neighbors()while(to_be_examined):#largest fitness to be addedm = {}for node in to_be_examined:fitness = c.cal_add_fitness(node)m[node] = fitnessto_be_add = sorted(m.items(),key=lambda x:x[1],reverse = True)[0]#适应度降序排列#stop conditionif(to_be_add[1] < 0.0):breakc.add_node(to_be_add[0])to_be_remove = c.recalculate()while(to_be_remove != None):c.remove_node(to_be_remove)to_be_remove = c.recalculate()to_be_examined = c.get_neighbors()for node in c._nodes:if(node in node_not_include):node_not_include.remove(node)communities.append(c._nodes)return communitiesif(__name__ == "__main__"):#G = nx.karate_club_graph()#一个边集一个点集# G = nx.florentine_families_graph()zf = zipfile.ZipFile('football.zip')  # zipfile objecttxt = zf.read('football.txt').decode()  # read info filegml = zf.read('football.gml').decode()  # read gml data
# throw away bogus first line with # from mejn filesgml = gml.split('\n')[1:]G = nx.parse_gml(gml)  # parse gml dataprint(txt)
# print degree for each team - number of gamesfor n, d in G.degree():print('%s %d' % (n, d))options = {'node_color': 'red','node_size': 50,'line_color': 'grey','linewidths': 0,'width': 0.1,}# nx.draw(G, **options)# networkx.draw(G, with_labels=True)nx.draw(G, with_labels=True)plt.savefig('fig.png', bbox_inches='tight')plt.show()algorithm = LFM(G,0.8)communities = algorithm.execute()for c in communities:print(len(c),sorted(c))# print c

注释代码147行，解注释代码145行

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 11 17:13:54 2019@author: suibian
"""import random
import networkx as nx
import matplotlib.pyplot as plt
import zipfile
#import urllib.request as urllib
class Community():''' use set operation to optimize calculation '''def __init__(self,G,alpha=1.0):self.G = Gself.alpha = alphaself._nodes = set()self._k_in = 0self._k_out = 0def add_node(self,node):neighbors = set(self.G.neighbors(node))node_k_in = len(neighbors & self._nodes)node_k_out = len(neighbors) - node_k_inself._nodes.add(node)self._k_in += 2*node_k_inself._k_out = self._k_out+node_k_out-node_k_indef remove_node(self,node):neighbors = set(self.G.neighbors(node))community_nodes = self._nodesnode_k_in = len(neighbors & community_nodes)node_k_out = len(neighbors) - node_k_inself._nodes.remove(node)self._k_in -= 2*node_k_inself._k_out = self._k_out - node_k_out+node_k_indef cal_add_fitness(self,node):neighbors = set(self.G.neighbors(node))old_k_in = self._k_inold_k_out = self._k_outvertex_k_in = len(neighbors & self._nodes)vertex_k_out = len(neighbors) - vertex_k_in new_k_in = old_k_in + 2*vertex_k_innew_k_out = old_k_out + vertex_k_out-vertex_k_innew_fitness = new_k_in/(new_k_in+new_k_out)**self.alphaold_fitness = old_k_in/(old_k_in+old_k_out)**self.alphareturn new_fitness-old_fitnessdef cal_remove_fitness(self,node):neighbors = set(self.G.neighbors(node))new_k_in = self._k_innew_k_out = self._k_outnode_k_in = len(neighbors & self._nodes)node_k_out = len(neighbors) - node_k_inold_k_in = new_k_in - 2*node_k_inold_k_out = new_k_out - node_k_out + node_k_inold_fitness = old_k_in/(old_k_in+old_k_out)**self.alphanew_fitness = new_k_in/(new_k_in+new_k_out)**self.alphareturn new_fitness-old_fitnessdef recalculate(self):for vid in self._nodes:fitness = self.cal_remove_fitness(vid)if fitness < 0.0:return vidreturn Nonedef get_neighbors(self):neighbors = set()for node in self._nodes:neighbors.update(set(self.G.neighbors(node)) - self._nodes)return neighborsdef get_fitness(self):return float(self._k_in)/((self._k_in+self._k_out) ** self._alpha)class LFM():def __init__(self, G, alpha):self.G = Gself.alpha = alphadef execute(self):communities = []node_not_include = list(self.G.node.keys())[:]while(len(node_not_include) != 0):c = Community(self.G, self.alpha)# randomly select a seed nodeseed = random.choice(node_not_include)c.add_node(seed)to_be_examined = c.get_neighbors()while(to_be_examined):#largest fitness to be addedm = {}for node in to_be_examined:fitness = c.cal_add_fitness(node)m[node] = fitnessto_be_add = sorted(m.items(),key=lambda x:x[1],reverse = True)[0]#适应度降序排列#stop conditionif(to_be_add[1] < 0.0):breakc.add_node(to_be_add[0])to_be_remove = c.recalculate()while(to_be_remove != None):c.remove_node(to_be_remove)to_be_remove = c.recalculate()to_be_examined = c.get_neighbors()for node in c._nodes:if(node in node_not_include):node_not_include.remove(node)communities.append(c._nodes)return communitiesif(__name__ == "__main__"):#G = nx.karate_club_graph()#一个边集一个点集# G = nx.florentine_families_graph()zf = zipfile.ZipFile('football.zip')  # zipfile objecttxt = zf.read('football.txt').decode()  # read info filegml = zf.read('football.gml').decode()  # read gml data
# throw away bogus first line with # from mejn filesgml = gml.split('\n')[1:]G = nx.parse_gml(gml)  # parse gml dataprint(txt)
# print degree for each team - number of gamesfor n, d in G.degree():print('%s %d' % (n, d))options = {'node_color': 'red','node_size': 50,'line_color': 'grey','linewidths': 0,'width': 0.1,}nx.draw(G, **options)# networkx.draw(G, with_labels=True)# nx.draw(G, with_labels=True)plt.savefig('fig.png', bbox_inches='tight')plt.show()algorithm = LFM(G,0.8)communities = algorithm.execute()for c in communities:print(len(c),sorted(c))# print c

数据集及源码下载链接，运行的话可以采用上述代码，数据集可以使用网盘链接里面的数据集：

链接：https://pan.baidu.com/s/1F56jLvSpoyUDUE6pV5e-VA
提取码：v3fh

重叠社区发现算法LFM算法python源码含数据集相关推荐

机器学习中常见的六种分类算法（附Python源码+数据集）
今天和大家学习一下机器学习中常见的六种分类算法,如K近邻.决策树.朴素贝叶斯.逻辑回归.支持向量机.随机森林除了介绍这六种不同分类算法外,还附上对应的Python代码案例,并分析各自的优缺点. 01 ...
十分钟简明易懂搞定堆排序算法，附Python源码
什么是堆(heap) 堆是一个数组,它可以被看成一个近似的完全二叉树.树上的每一个节点对应数组中的一个元素,而且是从左向右填充. 从上述描述,我们可以做出总结: 从存储的角度来看,堆是一个数组从结构 ...
国内下载 Python 源码特别慢
开始接触python,有些坑要踩的. 事件背景是使用pyenv 安装python 版本, root@HIH-L-4823:/# pyenv install 3.7.5 pyenv: /root/.py ...
社区发现算法——LFM算法
LFM算法 LFM算法是来源于论文<Detecting the overlapping and hieerarchical community structure in complex netw ...
基于链接密度聚类的重叠社区发现算法
基于链接密度聚类的重叠社区发现算法文章发表时间:2013年12月 1. 传统社区算法的局限性传统社区算法是将网络划分为若干各互不相连的社区,每个节点都必须隶属与唯一的社区,然而,在实际的社交网络中 ...
多目标遗传优化算法nsga2求解复杂约束问题【python源码实现，实数编码】
效果图如下: 提示:专栏解锁后,可以查看该专栏所有文章. 文章目录一.必备知识二.nsga2(遗传算法为实数编码) 2.1 有规则的例子 2.2 没有规则的例子总结一.必备知识十分钟了解完多 ...
多目标遗传优化算法nsga2求解复杂约束问题【python源码实现，二进制编码】
前言本文讲解多个输入情况下,多目标复杂约束问题.示例问题.Pareto解分布.代码量见下图提示:专栏解锁后,可以查看该专栏所有文章.划算. 文章目录前言一.必备知识二.多目标遗传优化算法求解 ...
社区发现算法 python_社区发现(Community Detection)算法（转）
作者: peghoty 社区发现(Community Detection)算法用来发现网络中的社区结构,也可以看做是一种聚类算法. 以下是我的一个 PPT 报告,分享给大家. 从上述定义可以看出:社区 ...
RS（255,223）纠错算法原理与项目源码
RS(255,223)纠错算法原理与项目源码 ##1.背景数据在网络传输.存储过程中由于信道噪声条件,硬件设备等问题数据产生了差错,这时候应该如何处理呢?特别是现在企业对海量大数据的传输,存储的重视 ...

重叠社区发现算法LFM算法python源码含数据集

重叠社区发现算法LFM算法python源码含数据集相关推荐

最新文章

热门文章