python apriori_python apriori算法代码怎么实现

展开全部

class Apriori(object):

def __init__(self, filename, min_support, item_start, item_end):

self.filename = filename

self.min_support = min_support # 最小支持度

self.min_confidence = 50

self.line_num = 0 # item的行数2113

self.item_start = item_start # 取哪行的item

self.item_end = item_end

self.location = [[i] for i in range(self.item_end - self.item_start + 1)]

self.support = self.sut(self.location)

self.num = list(sorted(set([j for i in self.location for j in i])))# 记录item

self.pre_support = [] # 保存前一个5261support,location,num

self.pre_location = []

self.pre_num = []

self.item_name = [] # 项目名

self.find_item_name()

self.loop()

self.confidence_sup()

def deal_line(self, line):

"提取出需要的项"

return [i.strip() for i in line.split(' ') if i][self.item_start - 1:self.item_end]

def find_item_name(self):

"根据4102第一行抽取item_name"

with open(self.filename, 'r') as F:

for index,line in enumerate(F.readlines()):

if index == 0:

self.item_name = self.deal_line(line)

break

def sut(self, location):

"""

输入1653[[1,2,3],[2,3,4],[1,3,5]...]

输出每个位置集的support [123,435,234...]

"""

with open(self.filename, 'r') as F:

support = [0] * len(location)

for index,line in enumerate(F.readlines()):

if index == 0: continue

# 提取每信息

item_line = self.deal_line(line)

for index_num,i in enumerate(location):

flag = 0

for j in i:

if item_line[j] != 'T':

flag = 1

break

if not flag:

support[index_num] += 1

self.line_num = index # 一共多少行,出去第一行的item_name

return support

def select(self, c):

"返回位置"

stack = []

for i in self.location:

for j in self.num:

if j in i:

if len(i) == c:

stack.append(i)

else:

stack.append([j] + i)

# 多重列表去重

import itertools

s = sorted([sorted(i) for i in stack])

location = list(s for s,_ in itertools.groupby(s))

return location

def del_location(self, support, location):

"清除不满足条件的候选集"

# 小于最小支持度的剔除

for index,i in enumerate(support):

if i < self.line_num * self.min_support / 100:

support[index] = 0

# apriori第二条规则,剔除

for index,j in enumerate(location):

sub_location = [j[:index_loc] + j[index_loc+1:]for index_loc in range(len(j))]

flag = 0

for k in sub_location:

if k not in self.location:

flag = 1

break

if flag:

support[index] = 0

# 删除没用的位置

location = [i for i,j in zip(location,support) if j != 0]

support = [i for i in support if i != 0]

return support, location

def loop(self):

"s级频繁项级的迭代"

s = 2

while True:

print '-'*80

print 'The' ,s - 1,'loop'

print 'location' , self.location

print 'support' , self.support

print 'num' , self.num

print '-'*80

# 生成下一级候选集

location = self.select(s)

support = self.sut(location)

support, location = self.del_location(support, location)

num = list(sorted(set([j for i in location for j in i])))

s += 1

if location and support and num:

self.pre_num = self.num

self.pre_location = self.location

self.pre_support = self.support

self.num = num

self.location = location

self.support = support

else:

break

def confidence_sup(self):

"计算confidence"

if sum(self.pre_support) == 0:

print 'min_support error' # 第一次迭代即失败

else:

for index_location,each_location in enumerate(self.location):

del_num = [each_location[:index] + each_location[index+1:] for index in range(len(each_location))] # 生成上一级频繁项级

del_num = [i for i in del_num if i in self.pre_location] # 删除不存在上一级频繁项级子集

del_support = [self.pre_support[self.pre_location.index(i)] for i in del_num if i in self.pre_location] # 从上一级支持度查找

# print del_num

# print self.support[index_location]

# print del_support

for index,i in enumerate(del_num): # 计算每个关联规则支持度和自信度

index_support = 0

if len(self.support) != 1:

index_support = index

support = float(self.support[index_location])/self.line_num * 100 # 支持度

s = [j for index_item,j in enumerate(self.item_name) if index_item in i]

if del_support[index]:

confidence = float(self.support[index_location])/del_support[index] * 100

if confidence > self.min_confidence:

print ','.join(s) , '->>' , self.item_name[each_location[index]] , ' min_support: ' , str(support) + '%' , ' min_confidence:' , str(confidence) + '%'

def main():

c = Apriori('basket.txt', 14, 3, 13)

d = Apriori('simple.txt', 50, 2, 6)

if __name__ == '__main__':

main()

Apriori(filename, min_support, item_start, item_end)

参数说明

filename:(路径)文件名

min_support:最小支持度

item_start:item起始位置

item_end:item结束位置import apriori

c = apriori.Apriori('basket.txt', 11, 3, 13)

输出：

python apriori_python apriori算法代码怎么实现相关推荐

python实现rsa加密解密代码_使用python实现rsa算法代码
RSA算法是一种非对称加密算法,是现在广泛使用的公钥加密算法,主要应用是加密信息和数字签名. 维基百科给出的RSA算法简介如下: 假设Alice想要通过一个不可靠的媒体接收Bob的一条私人讯息.她可以 ...
apriori算法代码python_Apriori算法原理及Python代码
一.Apriori算法原理参考:Python --深入浅出Apriori关联分析算法(一)www.cnblogs.com 二.在Python中使用Apriori算法查看Apriori算法的帮助文 ...
apriori算法代码_资源 | 《机器学习实战》及代码（基于Python3）
〇.<机器学习实战> 今天推荐给大家的是<机器学习实战>这本书. 机器学习作为人工智能研究领域中一个极其重要的研究方向(一文章看懂人工智能.机器学习和深度学习),在当下极其热门 ...
Python使用Apriori算法查找关系密切的演员组合
Apriori算法基本概念: 关联规则:可以表示为一个蕴含式R:X==>Y,其中X&Y为空集.关联规则的含义是,如果X发生,那么Y很可能也会发生. 关联分析或者关联规则学习:从大规模数据 ...
apriori算法代码_sklearn(九)apriori 关联规则算法,以及FP-growth 算法
是什么: apriori算法是第一个关联规则挖掘算法,利用逐层搜索的迭代方法找出数据库中的项集(项的集合)的关系,以形成规则,其过程由连接(类矩阵运算)与剪枝(去掉没必要的中间结果)组成.是一种挖掘关 ...
python实现Apriori算法
★ 关联分析: 从大规模数据集中寻找物品间的隐含关系被称作关联分析.而寻找物品的不同组合是一项十分耗时的任务,所需的计算代价很高.Apriori算法正是来解决这一问题. 物品之间的关系一般可以有两种形 ...
python关联规则apriori算法_Python --深入浅出Apriori关联分析算法（二） Apriori关联规则实战...
上一篇我们讲了关联分析的几个概念,支持度,置信度,提升度.以及如何利用Apriori算法高效地根据物品的支持度找出所有物品的频繁项集. 这次呢,我们会在上次的基础上,讲讲如何分析物品的关联规则得出关联 ...
apriori算法代码python_Apriori算法的Python实现
输入数据格式 25 52 164 240 274 328 368 448 538 561 630 687 730 775 825 834 39 120 124 205 401 581 704 814 ...
基于MATLAB与Python的DBSCAN算法代码
接上文,我们详细介绍了DBSCAN与几种常见聚类算法的对比与流程,DBSCAN聚类算法最为特殊,它是一种基于密度的聚类方法,聚类前不需要预先指定聚类的个数,接下来将DBSCAN分析代码分享 Pytho ...

python apriori_python apriori算法代码怎么实现

python apriori_python apriori算法代码怎么实现相关推荐

最新文章

热门文章