Spaces:

sklearn-docs
/

F-Test-vs-Mutual-Info-Regression

Sleeping

App Files Files Community

jucamohedano commited on Jun 1, 2023

Commit

e9735b2

1 Parent(s): 5cb8c11

Add application and requirements.txt

Browse files

Files changed (2) hide show

app.py +266 -0
requirements.text +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import gradio as gr
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.feature_selection import f_regression, mutual_info_regression
+from functools import partial
+def default(n_samples,
+            noise_var,
+            noise_bias,
+            feat2_freq,
+            feat1_scale,
+            feat1_power,
+            feat2_shift,
+            feat2_scale,
+            feat2_func,
+            counter,
+            func_name):
+    return train_models(
+            func_name,
+            counter,
+            n_samples= n_samples,
+            noise_var= noise_var,
+            noise_bias= noise_bias,
+            feat2_freq= feat2_freq,
+            feat1_scale= feat1_scale,
+            feat1_power= feat1_power,
+            feat2_shift= feat2_shift,
+            feat2_scale= feat2_scale,
+            feat2_func= feat2_func,
+            )
+def gaussian(n_samples,
+            gaussian_center,
+            gaussian_width,
+            gaussian_scaling,
+            counter,
+            func_name):
+    return train_models(
+            func_name,
+            counter,
+            n_samples= n_samples,
+            gaussian_center= gaussian_center,
+            gaussian_width= gaussian_width,
+            gaussian_scaling= gaussian_scaling,
+            )
+def piecewise(n_samples,
+            piecewise_thres,
+            piecewise_scale,
+            counter,
+            func_name):
+    return train_models(
+            func_name,
+            counter,
+            n_samples= n_samples,
+            piecewise_thres= piecewise_thres,
+            piecewise_scale= piecewise_scale,
+            )
+def train_models(func_name, counter, **kwargs):
+    functions = dict()
+    if func_name == "default":
+        feat2_func_list = {
+            "Use sine function for feature 2": np.sin,
+            "Use cosine function for feature 2": np.cos,
+        }
+        functions.update({"feat2_func":feat2_func_list[kwargs["feat2_func"]]})
+    np.random.seed(0)
+    n_samples = kwargs["n_samples"]
+    X = np.random.rand(n_samples, 3)
+    if func_name == "piecewise":
+        mask = X[:, 1] < (kwargs["piecewise_thres"]*0.1)
+    functions.update ({
+    "default":
+    lambda X: (kwargs["feat1_scale"]* X[:, 0] ** kwargs["feat1_power"] +
+                kwargs["feat2_scale"] * functions["feat2_func"](kwargs["feat2_freq"] * np.pi * X[:, 1] + kwargs["feat2_shift"]) +
+                (kwargs["noise_var"]*0.1) * np.random.randn(n_samples) + (kwargs["noise_bias"]*0.1)),
+    "Gaussian":
+    lambda X: (X[:, 0] + np.exp(-(X[:, 1] - (kwargs["gaussian_center"]*0.1))**2 / (2 * (kwargs["gaussian_width"]*0.1)**2)) +
+                (kwargs["gaussian_scaling"]*0.1) * np.random.randn(n_samples)),
+    "piecewise":
+    lambda X: (np.where(mask, kwargs["piecewise_scale"] * X[:, 0], kwargs["piecewise_scale"] * -X[:, 0]) +
+                0.1 * np.random.randn(n_samples))
+    })
+    y = functions[func_name](X)
+    f_test, _ = f_regression(X, y)
+    f_test /= np.max(f_test)
+    mi = mutual_info_regression(X, y)
+    mi /= np.max(mi)
+    fig, ax = plt.subplots()
+    i = counter
+    ax.scatter(X[:, i], y, edgecolor="black", s=20)
+    ax.set_xlabel("$x_{}$".format(i + 1), fontsize=14)
+    ax.set_ylabel("$y$", fontsize=14)
+    ax.set_title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]), fontsize=16)
+    return fig
+def iter_grid(n_rows, n_cols):
+    # create a grid using gradio Block
+    for _ in range(n_rows):
+        with gr.Row():
+            for _ in range(n_cols):
+                with gr.Column():
+                    yield
+def plot_func(input_model, args):
+    input_models = {"default": default,
+                    "Gaussian": gaussian,
+                    "piecewise": piecewise}
+    counter = 0
+    for _ in iter_grid(1,3):
+        fn = partial(input_models[input_model], counter=counter, func_name=input_model)
+        if counter >= len(input_models):
+            break
+        plot = gr.Plot(label=input_model)
+        n_samples.change(fn=fn, inputs=args, outputs=plot)
+        if input_model == "default":
+            noise_var.change(fn=fn, inputs=args, outputs=plot)
+            noise_bias.change(fn=fn, inputs=args, outputs=plot)
+            feat2_freq.change(fn=fn, inputs=args, outputs=plot)
+            feat1_scale.change(fn=fn, inputs=args, outputs=plot)
+            feat1_power.change(fn=fn, inputs=args, outputs=plot)
+            feat2_shift.change(fn=fn, inputs=args, outputs=plot)
+            feat2_scale.change(fn=fn, inputs=args, outputs=plot)
+            feat2_func.change(fn=fn, inputs=args, outputs=plot)
+        elif input_model == "Gaussian":
+            gaussian_center.change(fn=fn, inputs=args, outputs=plot)
+            gaussian_width.change(fn=fn, inputs=args, outputs=plot)
+            gaussian_scaling.change(fn=fn, inputs=args, outputs=plot)
+        elif input_model == "piecewise":
+            piecewise_thres.change(fn=fn, inputs=args, outputs=plot)
+            piecewise_scale.change(fn=fn, inputs=args, outputs=plot)
+        counter += 1
+title = "Comparison of F-test and mutual information"
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"## {title}")
+    gr.Markdown("This example illustrates the differences between univariate \
+                F-test statistics and mutual information. \
+                The plots below show the dependency of `y` against individual `x_i` and normalized \
+                values of univariate F-tests statistics and mutual information.\
+                In general, the F-test evaluates linear dependencies and tends to prioritize \
+                features with linear relationships, while mutual information assesses any type \
+                of dependency between variables and tends to identify features with strong \
+                relationships. In these examples, the most discriminative features identified \
+                by each approach may vary.")
+    gr.Markdown("In the follwing examples, we introduce parameterization to enable interaction \
+                with various parameters of the equation.")
+    n_samples = gr.Slider(minimum=500, maximum=1500, value=1000, step=100,
+    label = "Number of Samples")
+    with gr.Tab("Default Example function"):
+        gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
+                the target depends on them as follows:")
+        gr.Markdown("- `y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1)`")
+        gr.Markdown("that is the third feature is completely irrelevant.")
+        gr.Markdown("Parametrized equation:")
+        gr.Markdown("`y = f1_scale * x_1 **f1_power + f2_scale * f2_func(f2_freq * np.pi * x_2 + f2_shift + variance) * random(samples) + bias`")
+        noise_var = gr.Slider(minimum=0, maximum=10, step=1,
+        label = "Noise variance")
+        noise_bias = gr.Slider(minimum=0, maximum=10, step=1,
+        label = "Noise bias")
+        with gr.Row():
+            with gr.Column():
+                    feat1_scale = gr.Slider(minimum=1, maximum=10, step=1,
+                    label = "Scale feature 1")
+                    feat1_power = gr.Slider(minimum=1, maximum=4, step=1,
+                    label = "Raised feature 1 to the power")
+            with gr.Column():
+                feat2_freq = gr.Slider(minimum=1, maximum=10, step=1, value=6,
+                label = "Feature 2 frequency")
+                feat2_shift = gr.Slider(minimum=1, maximum=5, step=1,
+                label = "Shift feature 2")
+                feat2_scale  = gr.Slider(minimum=1, maximum=4, step=1,
+                label = "Scale feature 2")
+                feat2_func  = gr.Radio(choices=["Use sine function for feature 2",
+                                                "Use cosine function for feature 2"],
+                                        value="Use sine function for feature 2")
+        plot_func("default", [n_samples,
+                    noise_var,
+                    noise_bias,
+                    feat2_freq,
+                    feat1_scale,
+                    feat1_power,
+                    feat2_shift,
+                    feat2_scale,
+                    feat2_func,
+                    ])
+    with gr.Tab("Gaussian function"):
+        gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
+                the target depends on them as follows:")
+        gr.Markdown("- `y = x_1 + np.exp(-(x_2-0.5)**2 / (2 * 0.1**2)) + 0.1 * N(0, 1)`")
+        gr.Markdown("that is the third feature is completely irrelevant.")
+        gr.Markdown("Parametrized equation:")
+        gr.Markdown("`y = x_1 + exponential(-(x_2 - center)**2 / (2 * width)**2) + scaling * random(samples)`")
+        gaussian_center  = gr.Slider(minimum=0, maximum=10, value=5, step=1,
+                                     label = "Gaussian center")
+        gaussian_width  = gr.Slider(minimum=1, maximum=10, value=1, step=1,
+        label = "Gaussian width")
+        gaussian_scaling  = gr.Slider(minimum=1, maximum=5, value=1, step=1,
+        label = "Gaussian scaling")
+        plot_func("Gaussian", [n_samples,
+                    gaussian_center,
+                    gaussian_width,
+                    gaussian_scaling
+                    ])
+    with gr.Tab("Piecewise function"):
+        gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
+                the target depends on them as follows:")
+        gr.Markdown("- `mask = x_2 < 0.5`")
+        gr.Markdown("- `y = x_1` if `mask` is True")
+        gr.Markdown("- `y = -x_1` if `mask` is True")
+        gr.Markdown("that is the third feature is completely irrelevant.")
+        gr.Markdown("Parametrized equation:")
+        gr.Markdown("- `mask = x_2 < threshold`")
+        gr.Markdown("- `y = scaling*x_1` if `mask` is True")
+        gr.Markdown("- `y = scaling*-x_1` if `mask` is True")
+        piecewise_thres  = gr.Slider(minimum=1, maximum=10, value=5, step=1,
+        label = "Piecewise threshold")
+        piecewise_scale  = gr.Slider(minimum=1, maximum=10, value=1, step=1,
+        label = "Piecewise scaling")
+        plot_func("piecewise", [n_samples, piecewise_thres,
+                   piecewise_scale
+                    ])
+demo.launch()

requirements.text ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ scikit-learn
2	+ matplotlib