|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
|
|
from sklearn.feature_selection import f_regression, mutual_info_regression |
|
from functools import partial |
|
|
|
def default(n_samples, |
|
noise_var, |
|
noise_bias, |
|
feat2_freq, |
|
feat1_scale, |
|
feat1_power, |
|
feat2_shift, |
|
feat2_scale, |
|
feat2_func, |
|
counter, |
|
func_name): |
|
return train_models( |
|
func_name, |
|
counter, |
|
n_samples= n_samples, |
|
noise_var= noise_var, |
|
noise_bias= noise_bias, |
|
feat2_freq= feat2_freq, |
|
feat1_scale= feat1_scale, |
|
feat1_power= feat1_power, |
|
feat2_shift= feat2_shift, |
|
feat2_scale= feat2_scale, |
|
feat2_func= feat2_func, |
|
) |
|
|
|
def gaussian(n_samples, |
|
gaussian_center, |
|
gaussian_width, |
|
gaussian_scaling, |
|
counter, |
|
func_name): |
|
return train_models( |
|
func_name, |
|
counter, |
|
n_samples= n_samples, |
|
gaussian_center= gaussian_center, |
|
gaussian_width= gaussian_width, |
|
gaussian_scaling= gaussian_scaling, |
|
) |
|
|
|
def piecewise(n_samples, |
|
piecewise_thres, |
|
piecewise_scale, |
|
counter, |
|
func_name): |
|
return train_models( |
|
func_name, |
|
counter, |
|
n_samples= n_samples, |
|
piecewise_thres= piecewise_thres, |
|
piecewise_scale= piecewise_scale, |
|
) |
|
|
|
|
|
def train_models(func_name, counter, **kwargs): |
|
functions = dict() |
|
|
|
if func_name == "default": |
|
feat2_func_list = { |
|
"Use sine function for feature 2": np.sin, |
|
"Use cosine function for feature 2": np.cos, |
|
} |
|
|
|
functions.update({"feat2_func":feat2_func_list[kwargs["feat2_func"]]}) |
|
np.random.seed(0) |
|
n_samples = kwargs["n_samples"] |
|
X = np.random.rand(n_samples, 3) |
|
|
|
if func_name == "piecewise": |
|
mask = X[:, 1] < (kwargs["piecewise_thres"]*0.1) |
|
|
|
|
|
functions.update ({ |
|
"default": |
|
lambda X: (kwargs["feat1_scale"]* X[:, 0] ** kwargs["feat1_power"] + |
|
kwargs["feat2_scale"] * functions["feat2_func"](kwargs["feat2_freq"] * np.pi * X[:, 1] + kwargs["feat2_shift"]) + |
|
(kwargs["noise_var"]*0.1) * np.random.randn(n_samples) + (kwargs["noise_bias"]*0.1)), |
|
"Gaussian": |
|
lambda X: (X[:, 0] + np.exp(-(X[:, 1] - (kwargs["gaussian_center"]*0.1))**2 / (2 * (kwargs["gaussian_width"]*0.1)**2)) + |
|
(kwargs["gaussian_scaling"]*0.1) * np.random.randn(n_samples)), |
|
"piecewise": |
|
lambda X: (np.where(mask, kwargs["piecewise_scale"] * X[:, 0], kwargs["piecewise_scale"] * -X[:, 0]) + |
|
0.1 * np.random.randn(n_samples)) |
|
}) |
|
|
|
y = functions[func_name](X) |
|
f_test, _ = f_regression(X, y) |
|
f_test /= np.max(f_test) |
|
|
|
mi = mutual_info_regression(X, y) |
|
mi /= np.max(mi) |
|
|
|
fig, ax = plt.subplots() |
|
|
|
i = counter |
|
ax.scatter(X[:, i], y, edgecolor="black", s=20) |
|
ax.set_xlabel("$x_{}$".format(i + 1), fontsize=14) |
|
ax.set_ylabel("$y$", fontsize=14) |
|
ax.set_title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]), fontsize=16) |
|
|
|
return fig |
|
|
|
|
|
def iter_grid(n_rows, n_cols): |
|
|
|
for _ in range(n_rows): |
|
with gr.Row(): |
|
for _ in range(n_cols): |
|
with gr.Column(): |
|
yield |
|
def plot_func(input_model, args): |
|
input_models = {"default": default, |
|
"Gaussian": gaussian, |
|
"piecewise": piecewise} |
|
counter = 0 |
|
for _ in iter_grid(1,3): |
|
fn = partial(input_models[input_model], counter=counter, func_name=input_model) |
|
|
|
if counter >= len(input_models): |
|
break |
|
|
|
plot = gr.Plot(label=input_model) |
|
|
|
n_samples.change(fn=fn, inputs=args, outputs=plot) |
|
if input_model == "default": |
|
noise_var.change(fn=fn, inputs=args, outputs=plot) |
|
noise_bias.change(fn=fn, inputs=args, outputs=plot) |
|
feat2_freq.change(fn=fn, inputs=args, outputs=plot) |
|
feat1_scale.change(fn=fn, inputs=args, outputs=plot) |
|
feat1_power.change(fn=fn, inputs=args, outputs=plot) |
|
feat2_shift.change(fn=fn, inputs=args, outputs=plot) |
|
feat2_scale.change(fn=fn, inputs=args, outputs=plot) |
|
feat2_func.change(fn=fn, inputs=args, outputs=plot) |
|
elif input_model == "Gaussian": |
|
gaussian_center.change(fn=fn, inputs=args, outputs=plot) |
|
gaussian_width.change(fn=fn, inputs=args, outputs=plot) |
|
gaussian_scaling.change(fn=fn, inputs=args, outputs=plot) |
|
elif input_model == "piecewise": |
|
piecewise_thres.change(fn=fn, inputs=args, outputs=plot) |
|
piecewise_scale.change(fn=fn, inputs=args, outputs=plot) |
|
|
|
counter += 1 |
|
|
|
title = "Comparison of F-test and mutual information" |
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"## {title}") |
|
gr.Markdown("This example illustrates the differences between univariate \ |
|
F-test statistics and mutual information. \ |
|
The plots below show the dependency of `y` against individual `x_i` and normalized \ |
|
values of univariate F-tests statistics and mutual information.\ |
|
In general, the F-test evaluates linear dependencies and tends to prioritize \ |
|
features with linear relationships, while mutual information assesses any type \ |
|
of dependency between variables and tends to identify features with strong \ |
|
relationships. In these examples, the most discriminative features identified \ |
|
by each approach may vary.") |
|
gr.Markdown("In the follwing examples, we introduce parameterization to enable interaction \ |
|
with various parameters of the equation.") |
|
|
|
|
|
n_samples = gr.Slider(minimum=500, maximum=1500, value=1000, step=100, |
|
label = "Number of Samples") |
|
|
|
with gr.Tab("Default Example function"): |
|
gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \ |
|
the target depends on them as follows:") |
|
gr.Markdown("- `y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1)`") |
|
gr.Markdown("that is the third feature is completely irrelevant.") |
|
|
|
gr.Markdown("Parametrized equation:") |
|
gr.Markdown("`y = f1_scale * x_1 **f1_power + f2_scale * f2_func(f2_freq * np.pi * x_2 + f2_shift + variance) * random(samples) + bias`") |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
feat1_scale = gr.Slider(minimum=1, maximum=10, step=1, |
|
label = "Scale feature 1") |
|
|
|
feat1_power = gr.Slider(minimum=1, maximum=4, step=1, |
|
label = "Raised feature 1 to the power") |
|
|
|
noise_var = gr.Slider(minimum=0, maximum=10, step=1, |
|
label = "Noise variance") |
|
|
|
noise_bias = gr.Slider(minimum=0, maximum=10, step=1, |
|
label = "Noise bias") |
|
|
|
with gr.Column(): |
|
feat2_freq = gr.Slider(minimum=1, maximum=10, step=1, value=6, |
|
label = "Feature 2 frequency") |
|
|
|
feat2_shift = gr.Slider(minimum=1, maximum=5, step=1, |
|
label = "Shift feature 2") |
|
|
|
feat2_scale = gr.Slider(minimum=1, maximum=4, step=1, |
|
label = "Scale feature 2") |
|
|
|
feat2_func = gr.Radio(choices=["Use sine function for feature 2", |
|
"Use cosine function for feature 2"], |
|
value="Use sine function for feature 2") |
|
plot_func("default", [n_samples, |
|
noise_var, |
|
noise_bias, |
|
feat2_freq, |
|
feat1_scale, |
|
feat1_power, |
|
feat2_shift, |
|
feat2_scale, |
|
feat2_func, |
|
]) |
|
|
|
with gr.Tab("Gaussian function"): |
|
gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \ |
|
the target depends on them as follows:") |
|
gr.Markdown("- `y = x_1 + np.exp(-(x_2-0.5)**2 / (2 * 0.1**2)) + 0.1 * N(0, 1)`") |
|
gr.Markdown("that is the third feature is completely irrelevant.") |
|
|
|
gr.Markdown("Parametrized equation:") |
|
gr.Markdown("`y = x_1 + exponential(-(x_2 - center)**2 / (2 * width)**2) + scaling * random(samples)`") |
|
|
|
gaussian_center = gr.Slider(minimum=0, maximum=10, value=5, step=1, |
|
label = "Gaussian center") |
|
|
|
gaussian_width = gr.Slider(minimum=1, maximum=10, value=1, step=1, |
|
label = "Gaussian width") |
|
|
|
gaussian_scaling = gr.Slider(minimum=1, maximum=5, value=1, step=1, |
|
label = "Gaussian scaling") |
|
|
|
plot_func("Gaussian", [n_samples, |
|
gaussian_center, |
|
gaussian_width, |
|
gaussian_scaling |
|
]) |
|
|
|
|
|
with gr.Tab("Piecewise function"): |
|
gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \ |
|
the target depends on them as follows:") |
|
gr.Markdown("- `mask = x_2 < 0.5`") |
|
gr.Markdown("- `y = x_1` if `mask` is True") |
|
gr.Markdown("- `y = -x_1` if `mask` is True") |
|
gr.Markdown("that is the third feature is completely irrelevant.") |
|
|
|
gr.Markdown("Parametrized equation:") |
|
gr.Markdown("- `mask = x_2 < threshold`") |
|
gr.Markdown("- `y = scaling*x_1` if `mask` is True") |
|
gr.Markdown("- `y = scaling*-x_1` if `mask` is True") |
|
piecewise_thres = gr.Slider(minimum=1, maximum=10, value=5, step=1, |
|
label = "Piecewise threshold") |
|
|
|
piecewise_scale = gr.Slider(minimum=1, maximum=10, value=1, step=1, |
|
label = "Piecewise scaling") |
|
|
|
plot_func("piecewise", [n_samples, piecewise_thres, |
|
piecewise_scale |
|
]) |
|
|
|
|
|
demo.launch() |
|
|