Spaces:

sklearn-docs
/

F-Test-vs-Mutual-Info-Regression

Sleeping

App Files Files Community

F-Test-vs-Mutual-Info-Regression / app.py

jucamohedano

add noise and variance sliders under feature1's sliders

533e635 about 2 years ago

raw

history blame contribute delete

10.4 kB

	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.feature_selection import f_regression, mutual_info_regression
	from functools import partial

	def default(n_samples,
	noise_var,
	noise_bias,
	feat2_freq,
	feat1_scale,
	feat1_power,
	feat2_shift,
	feat2_scale,
	feat2_func,
	counter,
	func_name):
	return train_models(
	func_name,
	counter,
	n_samples= n_samples,
	noise_var= noise_var,
	noise_bias= noise_bias,
	feat2_freq= feat2_freq,
	feat1_scale= feat1_scale,
	feat1_power= feat1_power,
	feat2_shift= feat2_shift,
	feat2_scale= feat2_scale,
	feat2_func= feat2_func,
	)

	def gaussian(n_samples,
	gaussian_center,
	gaussian_width,
	gaussian_scaling,
	counter,
	func_name):
	return train_models(
	func_name,
	counter,
	n_samples= n_samples,
	gaussian_center= gaussian_center,
	gaussian_width= gaussian_width,
	gaussian_scaling= gaussian_scaling,
	)

	def piecewise(n_samples,
	piecewise_thres,
	piecewise_scale,
	counter,
	func_name):
	return train_models(
	func_name,
	counter,
	n_samples= n_samples,
	piecewise_thres= piecewise_thres,
	piecewise_scale= piecewise_scale,
	)


	def train_models(func_name, counter, **kwargs):
	functions = dict()

	if func_name == "default":
	feat2_func_list = {
	"Use sine function for feature 2": np.sin,
	"Use cosine function for feature 2": np.cos,
	}

	functions.update({"feat2_func":feat2_func_list[kwargs["feat2_func"]]})
	np.random.seed(0)
	n_samples = kwargs["n_samples"]
	X = np.random.rand(n_samples, 3)

	if func_name == "piecewise":
	mask = X[:, 1] < (kwargs["piecewise_thres"]*0.1)


	functions.update ({
	"default":
	lambda X: (kwargs["feat1_scale"]* X[:, 0] ** kwargs["feat1_power"] +
	kwargs["feat2_scale"] * functions["feat2_func"](kwargs["feat2_freq"] * np.pi * X[:, 1] + kwargs["feat2_shift"]) +
	(kwargs["noise_var"]0.1) np.random.randn(n_samples) + (kwargs["noise_bias"]*0.1)),
	"Gaussian":
	lambda X: (X[:, 0] + np.exp(-(X[:, 1] - (kwargs["gaussian_center"]0.1))2 / (2 (kwargs["gaussian_width"]0.1)*2)) +
	(kwargs["gaussian_scaling"]0.1) np.random.randn(n_samples)),
	"piecewise":
	lambda X: (np.where(mask, kwargs["piecewise_scale"] * X[:, 0], kwargs["piecewise_scale"] * -X[:, 0]) +
	0.1 * np.random.randn(n_samples))
	})

	y = functions[func_name](X)
	f_test, _ = f_regression(X, y)
	f_test /= np.max(f_test)

	mi = mutual_info_regression(X, y)
	mi /= np.max(mi)

	fig, ax = plt.subplots()

	i = counter
	ax.scatter(X[:, i], y, edgecolor="black", s=20)
	ax.set_xlabel("$x_{}$".format(i + 1), fontsize=14)
	ax.set_ylabel("$y$", fontsize=14)
	ax.set_title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]), fontsize=16)

	return fig


	def iter_grid(n_rows, n_cols):
	# create a grid using gradio Block
	for _ in range(n_rows):
	with gr.Row():
	for _ in range(n_cols):
	with gr.Column():
	yield
	def plot_func(input_model, args):
	input_models = {"default": default,
	"Gaussian": gaussian,
	"piecewise": piecewise}
	counter = 0
	for _ in iter_grid(1,3):
	fn = partial(input_models[input_model], counter=counter, func_name=input_model)

	if counter >= len(input_models):
	break

	plot = gr.Plot(label=input_model)

	n_samples.change(fn=fn, inputs=args, outputs=plot)
	if input_model == "default":
	noise_var.change(fn=fn, inputs=args, outputs=plot)
	noise_bias.change(fn=fn, inputs=args, outputs=plot)
	feat2_freq.change(fn=fn, inputs=args, outputs=plot)
	feat1_scale.change(fn=fn, inputs=args, outputs=plot)
	feat1_power.change(fn=fn, inputs=args, outputs=plot)
	feat2_shift.change(fn=fn, inputs=args, outputs=plot)
	feat2_scale.change(fn=fn, inputs=args, outputs=plot)
	feat2_func.change(fn=fn, inputs=args, outputs=plot)
	elif input_model == "Gaussian":
	gaussian_center.change(fn=fn, inputs=args, outputs=plot)
	gaussian_width.change(fn=fn, inputs=args, outputs=plot)
	gaussian_scaling.change(fn=fn, inputs=args, outputs=plot)
	elif input_model == "piecewise":
	piecewise_thres.change(fn=fn, inputs=args, outputs=plot)
	piecewise_scale.change(fn=fn, inputs=args, outputs=plot)

	counter += 1

	title = "Comparison of F-test and mutual information"
	with gr.Blocks(title=title) as demo:
	gr.Markdown(f"## {title}")
	gr.Markdown("This example illustrates the differences between univariate \
	F-test statistics and mutual information. \
	The plots below show the dependency of `y` against individual `x_i` and normalized \
	values of univariate F-tests statistics and mutual information.\
	In general, the F-test evaluates linear dependencies and tends to prioritize \
	features with linear relationships, while mutual information assesses any type \
	of dependency between variables and tends to identify features with strong \
	relationships. In these examples, the most discriminative features identified \
	by each approach may vary.")
	gr.Markdown("In the follwing examples, we introduce parameterization to enable interaction \
	with various parameters of the equation.")


	n_samples = gr.Slider(minimum=500, maximum=1500, value=1000, step=100,
	label = "Number of Samples")

	with gr.Tab("Default Example function"):
	gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
	the target depends on them as follows:")
	gr.Markdown("- `y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1)`")
	gr.Markdown("that is the third feature is completely irrelevant.")

	gr.Markdown("Parametrized equation:")
	gr.Markdown("`y = f1_scale * x_1 *f1_power + f2_scale f2_func(f2_freq * np.pi * x_2 + f2_shift + variance) * random(samples) + bias`")


	with gr.Row():
	with gr.Column():
	feat1_scale = gr.Slider(minimum=1, maximum=10, step=1,
	label = "Scale feature 1")

	feat1_power = gr.Slider(minimum=1, maximum=4, step=1,
	label = "Raised feature 1 to the power")

	noise_var = gr.Slider(minimum=0, maximum=10, step=1,
	label = "Noise variance")

	noise_bias = gr.Slider(minimum=0, maximum=10, step=1,
	label = "Noise bias")

	with gr.Column():
	feat2_freq = gr.Slider(minimum=1, maximum=10, step=1, value=6,
	label = "Feature 2 frequency")

	feat2_shift = gr.Slider(minimum=1, maximum=5, step=1,
	label = "Shift feature 2")

	feat2_scale = gr.Slider(minimum=1, maximum=4, step=1,
	label = "Scale feature 2")

	feat2_func = gr.Radio(choices=["Use sine function for feature 2",
	"Use cosine function for feature 2"],
	value="Use sine function for feature 2")
	plot_func("default", [n_samples,
	noise_var,
	noise_bias,
	feat2_freq,
	feat1_scale,
	feat1_power,
	feat2_shift,
	feat2_scale,
	feat2_func,
	])

	with gr.Tab("Gaussian function"):
	gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
	the target depends on them as follows:")
	gr.Markdown("- `y = x_1 + np.exp(-(x_2-0.5)*2 / (2 0.1*2)) + 0.1 N(0, 1)`")
	gr.Markdown("that is the third feature is completely irrelevant.")

	gr.Markdown("Parametrized equation:")
	gr.Markdown("`y = x_1 + exponential(-(x_2 - center)*2 / (2 width)*2) + scaling random(samples)`")

	gaussian_center = gr.Slider(minimum=0, maximum=10, value=5, step=1,
	label = "Gaussian center")

	gaussian_width = gr.Slider(minimum=1, maximum=10, value=1, step=1,
	label = "Gaussian width")

	gaussian_scaling = gr.Slider(minimum=1, maximum=5, value=1, step=1,
	label = "Gaussian scaling")

	plot_func("Gaussian", [n_samples,
	gaussian_center,
	gaussian_width,
	gaussian_scaling
	])


	with gr.Tab("Piecewise function"):
	gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
	the target depends on them as follows:")
	gr.Markdown("- `mask = x_2 < 0.5`")
	gr.Markdown("- `y = x_1` if `mask` is True")
	gr.Markdown("- `y = -x_1` if `mask` is True")
	gr.Markdown("that is the third feature is completely irrelevant.")

	gr.Markdown("Parametrized equation:")
	gr.Markdown("- `mask = x_2 < threshold`")
	gr.Markdown("- `y = scaling*x_1` if `mask` is True")
	gr.Markdown("- `y = scaling*-x_1` if `mask` is True")
	piecewise_thres = gr.Slider(minimum=1, maximum=10, value=5, step=1,
	label = "Piecewise threshold")

	piecewise_scale = gr.Slider(minimum=1, maximum=10, value=1, step=1,
	label = "Piecewise scaling")

	plot_func("piecewise", [n_samples, piecewise_thres,
	piecewise_scale
	])


	demo.launch()