实验结果

最小支持度0.001条件下可以得到准确结果，仅用1.6s
最小可以支持最小支持度为0.0003的计算

完整代码

package com.company;import java.io.*;
import java.util.*;class FPNode {String name;int count = 0;FPNode brother = null, parent = null;ArrayList<FPNode> children = new ArrayList<>();FPNode(String name) {this.name = name;}FPNode(String name, FPNode parent, int count) {this.name = name;this.parent = parent;this.count = count;}
}class Item {String name;int support;Item(String name, int support) {this.name = name;this.support = support;}
}class Trade {ArrayList<Item> items = new ArrayList<>();public Trade(List<String> items, Map<String, Integer> frequentItems) {items.forEach(item -> {if (frequentItems.containsKey(item))this.items.add(new Item(item, frequentItems.get(item)));});this.items.sort((l, r) -> Integer.compare(r.support, l.support));//按支持度从大到小排序}
}class FPTree {int minSupport;FPNode root = new FPNode("root");Map<String, Integer> frequentItems = new HashMap<>();Map<String, FPNode> headTable = new HashMap<>();//该项对应的最后一个节点Map<String, FPNode> currentPosition = new HashMap<>();List<List<String>> database;public FPTree(List<List<String>> database, int minSupport) {this.minSupport = minSupport;this.database = database;getFrequentItems();buildHeadTable();buildTree();}//计算支持度public void getFrequentItems() {Map<String, Integer> supportCount = new HashMap<>();for (List<String> line :database)for (String item :line)supportCount.merge(item, 1, Integer::sum);supportCount.forEach((item, support) -> {if (support >= minSupport)frequentItems.put(item, support);});}//建立头表public void buildHeadTable() {frequentItems.keySet().forEach(frequentItem -> {headTable.put(frequentItem, new FPNode(frequentItem));currentPosition.put(frequentItem, headTable.get(frequentItem));});}//建立FP-treepublic void buildTree() {database.forEach(items -> {Trade trade = new Trade(items, frequentItems);insertTree(trade, root);});}public int insertTree(Trade trade, FPNode fpNode) {if (trade.items.size() == 0)return 0;Item item = trade.items.remove(0);FPNode nextFPNode;// 查找该项是否存在for (FPNode children :fpNode.children)if (children.name.equals(item.name)) {children.count++;nextFPNode = children;return insertTree(trade, nextFPNode);}nextFPNode = new FPNode(item.name, fpNode, 1);fpNode.children.add(nextFPNode);currentPosition.get(item.name).brother = nextFPNode;currentPosition.put(item.name, nextFPNode);return insertTree(trade, nextFPNode);}
}class FrequentItem {List<String> items;int support;public FrequentItem(List<String> items, int support) {this.items = items;this.support = support;}
}class FPGrowth {int minSupport;List<FrequentItem> patternList = new ArrayList<>();public FPGrowth(FPTree fpTree, int minSupport) {this.minSupport = minSupport;fpGrowth(fpTree, null);}private void fpGrowth(FPTree fpTree, List<String> suffix) {if (fpTree.root.children.size() == 0)return;fpTree.frequentItems.keySet().forEach(frequentItem -> {List<String> newSuffix = new ArrayList<>();newSuffix.add(frequentItem);if (suffix != null && !suffix.isEmpty())newSuffix.addAll(suffix);patternList.add(new FrequentItem(newSuffix, fpTree.frequentItems.get(frequentItem)));//生成条件模式库List<List<String>> conditionalPatternDatabase = generateConditionalPatternDatabase(fpTree, frequentItem);//生成条件FPTreeFPTree conditionalFPTree = new FPTree(conditionalPatternDatabase, minSupport);fpGrowth(conditionalFPTree, newSuffix);});}private List<List<String>> generateConditionalPatternDatabase(FPTree fpTree, String frequentItem) {List<List<String>> conditionalPatternDatabase = new ArrayList<>();FPNode headNode = fpTree.headTable.get(frequentItem);for (FPNode fpNode = headNode.brother; fpNode != null; fpNode = fpNode.brother) {//生成前缀路径List<String> prefixPath = new ArrayList<>();for (FPNode fpNode1 = fpNode.parent; fpNode1.parent != null; fpNode1 = fpNode1.parent)prefixPath.add(fpNode1.name);for (int i = 0; i < fpNode.count; i++)conditionalPatternDatabase.add(prefixPath);}return conditionalPatternDatabase;}public List<FrequentItem> getPatternList() {patternList.sort((l, r) -> Integer.compare(r.support, l.support));return patternList;}
}public class Main {private static final List<List<String>> database = new ArrayList<>();public static void main(String[] args) throws IOException {double minSupport;int count = 0;Scanner scanner = new Scanner(System.in);minSupport = scanner.nextDouble();long startTime = System.currentTimeMillis();loadData();FPTree fpTree = new FPTree(database, (int) Math.ceil(minSupport * database.size()));FPGrowth fpGrowth = new FPGrowth(fpTree, (int) Math.ceil(minSupport * database.size()));for (FrequentItem frequentItem :fpGrowth.getPatternList()) {System.out.println(frequentItem.items + ": " + frequentItem.support);count++;}System.out.println("总数: " + count);long endTime = System.currentTimeMillis();System.out.println("程序运行时间：" + (endTime - startTime) + "ms");}private static void loadData() throws IOException {try (BufferedReader bufferedReader = new BufferedReader(new FileReader("retail.dat"))) {String line;while ((line = bufferedReader.readLine()) != null) {String[] temp = line.split(" ");database.add(Arrays.asList(temp));}}}
}

数据挖掘FPGrowth算法JAVA实现相关推荐

关联规则FpGrowth算法 Java实现
关联规则算法有Apriori和FpGrowth,与Apriori相比,FpGrowth扫描数据库的次数更少,效率大大提高,FpGrowth算法通过构造一个树结构来压缩数据记录,使得挖掘频繁项集只需要扫 ...
数据挖掘各种算法JAVA的实现方法
数据挖掘-关联分析频繁模式挖掘Apriori.FP-Growth及Eclat算法的JAVA及C++实现: 网址:http://blog.csdn.net/yangliuy/article/detail ...
数据挖掘原理与算法_技术分享|大数据挖掘算法之FPGrowth算法
程一舰数据技术处我们常说我们生活在信息时代,实际上,我们更多的还是生活在数据时代.因为从过去到现在累积了大量的数据,对数据的挖掘和分析也仅是从最近几年大数据和人工智能技术的发展而兴起.我们对现有数 ...
数据挖掘算法之关联规则挖掘（二）FPGrowth算法
之前介绍的apriori算法中因为存在许多的缺陷,例如进行大量的全表扫描和计算量巨大的自然连接,所以现在几乎已经不再使用在mahout的算法库中使用的是PFP算法,该算法是FPGrowth算法的分布 ...
【机器学习】数据挖掘算法——关联规则（三），FP-growth算法
前言上一篇文章介绍了用来挖掘发现强关联规则的Apriori算法.同时也知道了Apriori算法在实现过程中由于需要频繁的扫描数据集导致效率较低. FP-growth算法基于Apriori构建 ...
数据挖掘算法之FP-Growth算法介绍及Spark代码实现
FP-Growth算法概述阶段1:FP树构建步骤1:清洁和分类步骤2:构造FP树,带有已清理项目集的头表阶段2:开采主要树和条件FP树步骤1:将主要FP树划分为条件FP树步骤2:递归地挖 ...
数据挖掘：FP-Growth算法（Python实现）
目录介绍代码实现与解释感谢 pyfpgrowth 1.0 版本漏掉频繁项集分析介绍 item_sets = [['f', 'a', 'c', 'd', 'g', 'i', 'm', 'p'] ...
【数据挖掘】4、关联分析：Apriori、FP-Growth 算法、买面包是否也爱买啤酒
文章目录一.概念 1.1 支持度 1.2 置信度 1.3 提升度二.Apriori 算法 2.1 频繁项集的定义 2.2 手动推导 2.3 SDK 实战 2.3.1 超市购物 2.3.2 挑选演员 ...
【数据挖掘】FPgrowth算法笔记
1. 提出问题对于Apriror算法来说,仍然受到两种非平凡开销的影响: 仍然需要产生大量的候选集.例如,如果有10^4个频繁1项集,则需要产生10^7个候选频繁2项集. 可能需要重复地扫描整个数据 ...
Spark机器学习(9)：FPGrowth算法
关联规则挖掘最典型的例子是购物篮分析,通过分析可以知道哪些商品经常被一起购买,从而可以改进商品货架的布局. 1. 基本概念首先,介绍一些基本概念. (1) 关联规则:用于表示数据内隐含的关联性,一般 ...

数据挖掘FPGrowth算法JAVA实现

实验结果

相关阅读

完整代码

数据挖掘FPGrowth算法JAVA实现相关推荐

最新文章

热门文章