
data = data[row_idx][:, col_idx]

这个例子演示如何产生一个数据集,并且用谱系共聚类法(Spectral Co-Clustering algorithm)对它进行双向聚类。所谓“双向聚类”,是指对变量和实例同时聚类。

import numpy as np
from matplotlib import pyplot as pltfrom sklearn.datasets import make_biclusters
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

调用函数make_biclusters, 产生一个300行,300列的数据数组,该数组有一个用于双向聚类的固定对角块结构。预置5个biclusters, 5个标准差的高斯误差。作图显示产生的数据集。

data, rows, columns = make_biclusters(shape=(300, 300), n_clusters=5, noise=5,shuffle=False, random_state=0)plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

# shuffle clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=5, random_state=0)
score = consensus_score(model.biclusters_,(rows[:, row_idx], columns[:, col_idx]))print("consensus score: {:.3f}".format(score))fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")plt.show()

consensus score: 1.000

