vumichien commited on
Commit
aa78c1f
·
1 Parent(s): e4f5752

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.datasets import make_blobs
5
+ import time
6
+ from sklearn.cluster import KMeans, MiniBatchKMeans
7
+ from sklearn.metrics.pairwise import pairwise_distances_argmin
8
+
9
+ theme = gr.themes.Monochrome(
10
+ primary_hue="indigo",
11
+ secondary_hue="blue",
12
+ neutral_hue="slate",
13
+ )
14
+ model_card = f"""
15
+ ## Description
16
+
17
+ This demo compares the performance of the **MiniBatchKMeans** and **KMeans**. The MiniBatchKMeans is faster, but gives slightly different results.
18
+ The points that are labelled differently between the two algorithms are also plotted.
19
+ You can play around with different ``number of samples`` and ``number of mini batch size`` to see the effect
20
+
21
+ ## Dataset
22
+
23
+ Simulation dataset
24
+ """
25
+
26
+
27
+ def do_train(n_samples, batch_size):
28
+
29
+ np.random.seed(0)
30
+
31
+ centers = np.random.rand(3, 2)
32
+ n_clusters = len(centers)
33
+ X, labels_true = make_blobs(n_samples=n_samples, centers=centers, cluster_std=0.7)
34
+
35
+ k_means = KMeans(init="k-means++", n_clusters=n_clusters, n_init=10)
36
+ t0 = time.time()
37
+ k_means.fit(X)
38
+ t_batch = time.time() - t0
39
+
40
+
41
+ mbk = MiniBatchKMeans(
42
+ init="k-means++",
43
+ n_clusters=n_clusters,
44
+ batch_size=batch_size,
45
+ n_init=10,
46
+ max_no_improvement=10,
47
+ verbose=0,
48
+ )
49
+ t0 = time.time()
50
+ mbk.fit(X)
51
+ t_mini_batch = time.time() - t0
52
+
53
+
54
+ k_means_cluster_centers = k_means.cluster_centers_
55
+ order = pairwise_distances_argmin(k_means.cluster_centers_, mbk.cluster_centers_)
56
+ mbk_means_cluster_centers = mbk.cluster_centers_[order]
57
+
58
+ k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)
59
+ mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers)
60
+
61
+
62
+ colors = ["#4EACC5", "#FF9C34", "#4E9A06"]
63
+
64
+ # KMeans
65
+ fig1, axes1 = plt.subplots()
66
+ for k, col in zip(range(n_clusters), colors):
67
+ my_members = k_means_labels == k
68
+ cluster_center = k_means_cluster_centers[k]
69
+ axes1.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor=col, marker=".", markersize=15)
70
+ axes1.plot(
71
+ cluster_center[0],
72
+ cluster_center[1],
73
+ "o",
74
+ markerfacecolor=col,
75
+ markeredgecolor="k",
76
+ markersize=12,
77
+ )
78
+ axes1.set_title("KMeans")
79
+ axes1.set_xticks(())
80
+ axes1.set_yticks(())
81
+
82
+ # MiniBatchKMeans
83
+ fig2, axes2 = plt.subplots()
84
+ for k, col in zip(range(n_clusters), colors):
85
+ my_members = mbk_means_labels == k
86
+ cluster_center = mbk_means_cluster_centers[k]
87
+ axes2.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor=col, marker=".", markersize=15)
88
+ axes2.plot(
89
+ cluster_center[0],
90
+ cluster_center[1],
91
+ "o",
92
+ markerfacecolor=col,
93
+ markeredgecolor="k",
94
+ markersize=12,
95
+ )
96
+ axes2.set_title("MiniBatchKMeans")
97
+ axes2.set_xticks(())
98
+ axes2.set_yticks(())
99
+
100
+ # Initialize the different array to all False
101
+ different = mbk_means_labels == 4
102
+ fig3, axes3 = plt.subplots()
103
+
104
+ for k in range(n_clusters):
105
+ different += (k_means_labels == k) != (mbk_means_labels == k)
106
+
107
+ identic = np.logical_not(different)
108
+ axes3.plot(X[identic, 0], X[identic, 1], "w", markerfacecolor="#bbbbbb", marker=".", markersize=15)
109
+ axes3.plot(X[different, 0], X[different, 1], "w", markerfacecolor="m", marker=".", markersize=15)
110
+ axes3.set_title("Difference")
111
+ axes3.set_xticks(())
112
+ axes3.set_yticks(())
113
+ text = f"KMeans Train time: {t_batch:.2f}s Inertia: {k_means.inertia_:.4f}. MiniBatchKMeans Train time: {t_mini_batch:.2f}s Inertia: {mbk.inertia_:.4f}"
114
+ plt.close()
115
+ return fig1, fig2, fig3, text
116
+
117
+
118
+
119
+ with gr.Blocks(theme=theme) as demo:
120
+ gr.Markdown('''
121
+ <div>
122
+ <h1 style='text-align: center'>Comparison of the K-Means and MiniBatchKMeans clustering algorithms</h1>
123
+ </div>
124
+ ''')
125
+ gr.Markdown(model_card)
126
+ gr.Markdown("Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the example from <a href=\"https://scikit-learn.org/stable/auto_examples/cluster/plot_mini_batch_kmeans.html#sphx-glr-auto-examples-cluster-plot-mini-batch-kmeans-py\">scikit-learn</a>")
127
+ n_samples = gr.Slider(minimum=500, maximum=5000, step=500, value=500, label="Number of samples")
128
+ batch_size = gr.Slider(minimum=50, maximum=500, step=50, value=50, label="Size of the mini batches")
129
+ with gr.Row():
130
+ with gr.Column():
131
+ plot1 = gr.Plot(label="KMeans")
132
+ with gr.Column():
133
+ plot2 = gr.Plot(label="MiniBatchKMeans")
134
+ with gr.Column():
135
+ plot3 = gr.Plot(label="Difference")
136
+ with gr.Row():
137
+ results = gr.Textbox(label="Results")
138
+
139
+ n_samples.change(fn=do_train, inputs=[n_samples, batch_size], outputs=[plot1, plot2, plot3, results])
140
+ batch_size.change(fn=do_train, inputs=[n_samples, batch_size], outputs=[plot1, plot2, plot3, results])
141
+
142
+ demo.launch()