层次汇合聚类(Hierarchical Agglomerative Clustering,HAC)



// HAC_learning.cpp: 定义控制台应用程序的入口点。
//#include "stdafx.h"
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
using namespace std;
const int iniClusNum = 12;
const int stopNum = 3;class Point
{public:double x;double y;int NumPBelong;Point(){x = 0;y = 0;NumPBelong = -1;}Point(double x1, double y1, int f = -1) :x(x1), y(y1), NumPBelong(f) {}const Point& operator=(const Point& p){x = p.x;y = p.y;NumPBelong = p.NumPBelong;return *this;}
};class ManagerP
{public:double getDistance(const Point& p1, const Point& p2){return sqrt(pow((p1.x - p2.x), 2) + pow((p1.y - p2.y), 2));}Point getMean(const Point& p1, const Point& p2){Point p;p.x = (p1.x + p2.x) / 2;p.y = (p1.y + p2.y) / 2;return p;}
};class ManagerC
{public:Point Cluster[iniClusNum];vector<int> ClusterLast[iniClusNum];bool isIndexClose[iniClusNum];bool isIndexClose2[iniClusNum];void initCluster()//use txt to init, import txt{ifstream  myfile("point.txt");if (!myfile){cout << "cannot open file.";   return;}Point p;int x, y;int i = 0;while (!myfile.eof()){myfile >> x >> y;p.x = x;p.y = y;Cluster[i] = p;i++;}myfile.close();}void initIndexClose(){for (int i = 0; i<iniClusNum; i++){isIndexClose[i] = false;isIndexClose2[i] = false;}}void print(){for (int i = 0; i<iniClusNum; i++){if (ClusterLast[i].empty()){continue;}cout << "cluster " << i + 1 << endl;vector<int>::iterator ite = ClusterLast[i].begin();for (; ite != ClusterLast[i].end(); ite++){cout << *ite << "\t";}cout << endl;}cout << endl;}void ClusterAlgo()//use minheap to realize, to optimize{int ClustNum = iniClusNum;int clus_index = 0;while (ClustNum>stopNum){double min = INT_MAX;int x = -1, y = -1;ManagerP mp;for (int i = 0; i<iniClusNum; i++){if (isIndexClose[i]){continue;}for (int j = i + 1; j<iniClusNum; j++){if (isIndexClose[j]){continue;}double new_d = mp.getDistance(Cluster[i], Cluster[j]);if (new_d < min){min = new_d;x = i; y = j;}}}if (x == -1 || y == -1){break;}Point p = mp.getMean(Cluster[x], Cluster[y]);//x<y store the resultif (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong == -1){cout << "a0" << endl;ClusterLast[clus_index].push_back(x);//xchange to p, y closeClusterLast[clus_index].push_back(y);p.NumPBelong = clus_index;isIndexClose[y] = true;//y is closedCluster[x] = p;//new p is openisIndexClose[x] = false;isIndexClose2[x] = true;isIndexClose2[y] = true;clus_index++;}else if (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong != -1)//already exists one cluster{cout << "a1" << endl;ClusterLast[Cluster[y].NumPBelong].push_back(x);isIndexClose[x] = true;//x is closedp.NumPBelong = Cluster[y].NumPBelong;Cluster[y] = p;//new p is openisIndexClose2[x] = true;}else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong == -1){cout << "a2" << endl;ClusterLast[Cluster[x].NumPBelong].push_back(y);isIndexClose[y] = true;//y is closedp.NumPBelong = Cluster[x].NumPBelong;Cluster[x] = p;//new p is openisIndexClose2[y] = true;}else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong != -1)//both are clusteroid{cout << "a3" << endl;vector<int>::iterator ite = ClusterLast[Cluster[y].NumPBelong].begin();//put y's node in xfor (; ite != ClusterLast[Cluster[y].NumPBelong].end(); ite++){ClusterLast[Cluster[x].NumPBelong].push_back(*ite);}ClusterLast[Cluster[y].NumPBelong].clear();isIndexClose[y] = true;//y is closedp.NumPBelong = Cluster[x].NumPBelong;Cluster[x] = p;//new p is open}ClustNum--;}int total_size = 0;for (int i = 0; i<stopNum; i++){total_size += ClusterLast[i].size();}if (total_size<iniClusNum){int j = 0;for (int i = 0; i<iniClusNum; i++){if (isIndexClose2[i] == false){ClusterLast[stopNum - 1 - j].push_back(i);j++;}}}}};int main()
{ManagerC M;M.initCluster();M.initIndexClose();M.ClusterAlgo();M.print();system("pause");


4 10
4 8
7 10
6 8
3 4
2 2
5 2
9 3
10 5
11 4
12 3
12 6





{clus_index=0;在x,y的isIndexClose值都不为true(关)的条件下(不为中心点/没有改变过)找出距离最小的两个点x,y并计算他们的距离min如果所有的点的isIndexClose值都为true(关)跳出循环//计算x,y的中点pa0:如果x,y都不是某类的中心点将x,y分入第clus_index类关闭y(isIndexClose=true)(y不再是某类中心点)把x,y的中点设为x,并令这个新的x为此类的中心点打开x(isIndexClose=true)(x为此类中心点)标记x,y(isIndexClose2= true;)a1:如果x不是某类中心点,y是某类中心点将x放入y所在的类别中关闭x把x,y的中点设为y标记xa2:如果x是某类中心点,y不是某类中心点将y放入x所在的类别中关闭y把x,y的中点设为x标记ya3:如果x,y都是中心点把y所在类别的所有点移入x所在类别中关闭y把x,y的中点设为x
{对每一个数进行检查,将所有未标记数(isIndexClose2 == false)平均分给各个类


