





1. 聚类分配:根据每个数据点距聚类质心的距离,为其分配一个聚类。

2. 移动质心:计算聚类所有点的平均值,并将聚类质心重定位到平均位置。





import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.colors as colorimport numpy as np
from collections import Counter
import pandas as pd
import mathfrom sklearn.cluster import KMeans
from PIL import Image
import webcolors
import jsonimport argparse


parser = argparse.ArgumentParser()parser.add_argument("--clusters", help="No. of clusters")
parser.add_argument("--imagepath", help="Path to input image")args = parser.parse_args()IMG_PATH = args.imagepath if args.imagepath else "images/poster.jpg"
CLUSTERS = args.clusters if args.clusters else 5WIDTH = 128
HEIGHT = 128


http : //chir.ag/projects/ntc/ntc.js(JavaScript文件)

http:// chir.ag/projects/ntc/(链接到创建者的网站)


with open('colors.json') as clr:color_dict = json.load(clr)


def TrainKMeans(img):new_width, new_height = calculate_new_size(img)image = img.resize((new_width, new_height), Image.ANTIALIAS)img_array = np.array(image)img_vector = img_array.reshape((img_array.shape[0] * img_array.shape[1], 3))'''----------Training K-Means Clustering Algorithm----------'''kmeans = KMeans(n_clusters = CLUSTERS, random_state=0)labels = kmeans.fit_predict(img_vector)hex_colors = [rgb_to_hex(center) for center in kmeans.cluster_centers_]color_name = {}for c in kmeans.cluster_centers_:h, name = findColorName(c)color_name[h] = nameimg_cor = [[*x] for x in img_vector]'''img_cor is a nested list of all the coordinates (pixel -- RGB value) present in theimage'''cluster_map = pd.DataFrame()cluster_map['position'] = img_corcluster_map['cluster'] = kmeans.labels_cluster_map['x'] = [x[0] for x in cluster_map['position']]cluster_map['y'] = [x[1] for x in cluster_map['position']]cluster_map['z'] = [x[2] for x in cluster_map['position']]cluster_map['color'] = [hex_colors[x] for x in cluster_map['cluster']]cluster_map['color_name'] = [color_name[x] for x in cluster_map['color']]print(cluster_map)return cluster_map, kmeans

如大家所见,上面的函数“ TrainKMeans ”接受一个图像文件作为参数。在第一步中,我们将图像调整为我们之前在程序中定义的尺寸,并且使用了自定义函数来调整图像的大小。

def calculate_new_size(image):'''We are resizing the image (one of the dimensions) to 128 px and then, scaling theother dimension with same height by width ratio.'''if image.width >= image.height:wperc = (WIDTH / float(image.width))hsize = int((float(image.height) * float(wperc)))new_width, new_height = WIDTH, hsizeelse:hperc = (HEIGHT / float(image.height))wsize = int((float(image.width) * float(hperc)))new_width, new_height = wsize, HEIGHTreturn new_width, new_height



def rgb_to_hex(rgb):'''Converting our rgb value to hex code.'''hex = color.to_hex([int(rgb[0])/255, int(rgb[1])/255, int(rgb[2])/255])print(hex)return hex



def findColorName(rgb):'''Finding color name :: returning hex code and nearest/actual color name'''aname, cname = get_colour_name((int(rgb[0]), int(rgb[1]), int(rgb[2])))hex = color.to_hex([int(rgb[0])/255, int(rgb[1])/255, int(rgb[2])/255])if aname is None:name = cnameelse:name = anamereturn hex, namedef closest_colour(requested_colour):'''We are basically calculating euclidean distance between our set of RGB valueswith all the RGB values that are present in our JSON. After that, we are looking at the combination RGB (from JSON) that is at least distance from inputRGB values, hence finding the closest color name.'''min_colors = {}for key, name in color_dict['color_names'].items():r_c, g_c, b_c = webcolors.hex_to_rgb("#"+key)rd = (r_c - requested_colour[0]) ** 2gd = (g_c - requested_colour[1]) ** 2bd = (b_c - requested_colour[2]) ** 2min_colors[math.sqrt(rd + gd + bd)] = name#print(min(min_colours.keys()))return min_colors[min(min_colors.keys())]def get_colour_name(requested_colour):'''In this function, we are converting our RGB set to color name using a third party module "webcolors".RGB set -> Hex Code -> Color NameBy default, it looks in CSS3 colors list (which is the best). If it cannot findhex code in CSS3 colors list, it raises a ValueError which we are handlingusing our own function in which we are finding the closest color to the inputRGB set.'''try:closest_name = actual_name = webcolors.rgb_to_name(requested_colour)except ValueError:closest_name = closest_colour(requested_colour)actual_name = Nonereturn actual_name, closest_name




def plotColorClusters(img):cluster_map, kmeans = TrainKMeans(img)fig = plt.figure()ax = Axes3D(fig)# grouping the data by color hex code and color name to find the total count of# pixels (data points) in a particular clustermydf = cluster_map.groupby(['color', 'color_name']).agg({'position':'count'}).reset_index().rename(columns={"position":"count"})mydf['Percentage'] = round((mydf['count']/mydf['count'].sum())*100, 1)print(mydf)# Plotting a scatter plot for all the clusters and their respective colorsax.scatter(cluster_map['x'], cluster_map['y'], cluster_map['z'], color = cluster_map['color'])plt.show()'''Subplots with image and a pie chart representing the share of each color identifiedin the entire photograph/image.'''plt.figure(figsize=(14, 8))plt.subplot(221)plt.imshow(img)plt.axis('off')plt.subplot(222)plt.pie(mydf['count'], labels=mydf['color_name'], colors=mydf['color'], autopct='%1.1f%%', startangle=90)plt.axis('equal')plt.show()def main():img = Image.open(IMG_PATH)plotColorClusters(img)




