Spaces:

sklearn-docs
/

Incremental-PCA

Sleeping

App Files Files Community

Incremental-PCA / app.py

vumichien

Update app.py

c27c944 about 2 years ago

raw

history blame contribute delete

3.8 kB

	import gradio as gr
	import numpy as np
	import time
	import matplotlib.pyplot as plt

	from sklearn.datasets import load_iris
	from sklearn.decomposition import PCA, IncrementalPCA


	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	)
	model_card = f"""
	## Description

	Incremental principal component analysis (IPCA) is a suitable alternative to Principal component analysis (PCA) when the dataset to be analyzed is too large to fit in memory.
	IPCA generates a low-rank representation of the input data utilizing a fixed amount of memory that is not reliant on the number of input data samples.

	In this demo, you can play around with different ``number of components`` and ``number of samples`` to explore the performance of IPCA and PCA, including a comparison of their respective outputs and running times.
	Note: Incremental PCA is comparatively slower to regular PCA, as it processes partial data sets sequentially.


	## Dataset

	Iris dataset
	"""
	iris = load_iris()
	X = iris.data
	y = iris.target

	def plot_pca(n_components, batch_size):
	# Create linkage matrix and then plot the dendrogram
	colors = ["navy", "turquoise", "darkorange"]

	ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
	t1 = time.time()
	X_ipca = ipca.fit_transform(X)
	ipca_time = time.time() - t1

	pca = PCA(n_components=n_components)
	t2 = time.time()
	X_pca = pca.fit_transform(X)
	pca_time = time.time() - t2

	fig1, axes1 = plt.subplots()
	for color, i, target_name in zip(colors, [0, 1, 2], iris.target_names):
	axes1.scatter(
	X_ipca[y == i, 0],
	X_ipca[y == i, 1],
	color=color,
	lw=2,
	label=target_name,
	)
	err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean()
	axes1.set_title(f"Incremental PCA of iris dataset")
	axes1.axis([-4, 4, -1.5, 1.5])
	axes1.legend(loc="best", shadow=False, scatterpoints=1)

	fig2, axes2 = plt.subplots()
	for color, i, target_name in zip(colors, [0, 1, 2], iris.target_names):
	axes2.scatter(
	X_pca[y == i, 0],
	X_pca[y == i, 1],
	color=color,
	lw=2,
	label=target_name,
	)
	axes2.set_title("PCA of iris dataset")
	axes2.axis([-4, 4, -1.5, 1.5])
	axes2.legend(loc="best", shadow=False, scatterpoints=1)

	text = f"PCA runing time: {pca_time:.6f} seconds. Incremental PCA runing time: {ipca_time:.6f} seconds. Mean absolute unsigned error: {err*100:.6f}%"

	return fig1, fig2, text



	with gr.Blocks(theme=theme) as demo:
	gr.Markdown('''
	<div>
	<h1 style='text-align: center'>Incremental PCA</h1>
	</div>
	''')
	gr.Markdown(model_card)
	gr.Markdown("Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the example from <a href=\"https://scikit-learn.org/stable/auto_examples/decomposition/plot_incremental_pca.html#sphx-glr-auto-examples-decomposition-plot-incremental-pca-py\">scikit-learn</a>")
	n_components = gr.Slider(minimum=2, maximum=4, step=1, value=2, label="Number of components to keep")
	batch_size = gr.Slider(minimum=10, maximum=50, step=10, value=10, label="The number of samples to use for each batch")

	with gr.Row():
	with gr.Column():
	plot_1 = gr.Plot(label="Incremental PCA")
	with gr.Column():
	plot_2 = gr.Plot(label="PCA")
	with gr.Row():
	resutls = gr.Textbox(label="Results")

	n_components.change(fn=plot_pca, inputs=[n_components, batch_size], outputs=[plot_1, plot_2, resutls])
	batch_size.change(fn=plot_pca, inputs=[n_components, batch_size], outputs=[plot_1, plot_2, resutls])

	demo.launch()