Heavily inspired by this post

import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import numpy as np
import pandas as pd
from matplotlib.pyplot import cm

# create dataset
X, y, centers = make_blobs(
   n_samples=150, n_features=2,
   centers=3, cluster_std=0.8,
   shuffle=True, random_state=0, return_centers=True,
)

data = pd.DataFrame(data=X,columns=['feature_1','feature_2'])
data['cluster'] = y

# plot
data.plot(x='feature_1', y='feature_2', style='.');
from sklearn.cluster import KMeans

km = KMeans(
    n_clusters=3, init='random',
    n_init=10, max_iter=300, 
    tol=1e-04, random_state=0
)
y_km = km.fit_predict(X)
data['cluster_pred']=y_km
y_km
array([2, 1, 1, 1, 2, 1, 1, 2, 0, 1, 2, 2, 0, 1, 1, 0, 0, 2, 0, 2, 1, 2,
       1, 1, 0, 2, 2, 1, 0, 2, 0, 0, 0, 0, 1, 2, 2, 2, 1, 1, 0, 0, 1, 2,
       2, 2, 0, 2, 0, 1, 2, 1, 1, 2, 2, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0, 0, 1, 2, 2, 1, 1, 2, 2, 2, 0,
       0, 2, 2, 1, 2, 1, 2, 1, 0, 0, 2, 2, 2, 2, 0, 2, 2, 1, 0, 1, 1, 1,
       0, 1, 2, 0, 1, 0, 1, 1, 0, 0, 1, 2, 1, 1, 2, 2, 0, 2, 0, 0, 0, 0,
       2, 0, 0, 0, 1, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 0, 2, 2], dtype=int32)

#collapse
def plot_cluster(data, centers, key='cluster_pred'):
    fig,ax=plt.subplots()
    color=iter(cm.rainbow(np.linspace(0,1,len(data[key].unique()))))
    
    for cluster_pred, cluster in data.groupby(by=key, sort=True):
        
        x_centre = centers[cluster_pred, 0]
        y_centre = centers[cluster_pred, 1]
        
        c=next(color)
        for index,row in cluster.iterrows():
            xs = [x_centre,row['feature_1']]
            ys = [y_centre,row['feature_2']]
            
            ax.plot(xs,ys,'o-', color=c, label='predicted cluster %i' % cluster_pred)

Real:

plot_cluster(data=data, centers=centers, key='cluster')

Predicted

plot_cluster(data=data, centers=km.cluster_centers_, key='cluster_pred')
# calculate distortion for a range of number of cluster
distortions = []
for i in range(1, 11):
    km = KMeans(
        n_clusters=i, init='random',
        n_init=10, max_iter=300,
        tol=1e-04, random_state=0
    )
    km.fit(X)
    distortions.append(km.inertia_)

# plot
plt.plot(range(1, 11), distortions, marker='o')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')
plt.show()
import sklearn.metrics as metrics
metrics.fowlkes_mallows_score(labels_true=y, labels_pred=y_km)
0.946853765553474
scores = []
clusters=range(1, 11)
for i in clusters:
    km = KMeans(
        n_clusters=i, init='random',
        n_init=10, max_iter=300,
        tol=1e-04, random_state=0
    )
    km.fit(X)
    scores.append(metrics.fowlkes_mallows_score(labels_true=y, labels_pred=km.predict(X)))

# plot
plt.plot(clusters, scores, marker='o')
plt.xlabel('Number of clusters')
plt.ylabel('Score');