jucamohedano commited on
Commit
e9735b2
·
1 Parent(s): 5cb8c11

Add application and requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +266 -0
  2. requirements.text +2 -0
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+
5
+ from sklearn.feature_selection import f_regression, mutual_info_regression
6
+ from functools import partial
7
+
8
+ def default(n_samples,
9
+ noise_var,
10
+ noise_bias,
11
+ feat2_freq,
12
+ feat1_scale,
13
+ feat1_power,
14
+ feat2_shift,
15
+ feat2_scale,
16
+ feat2_func,
17
+ counter,
18
+ func_name):
19
+ return train_models(
20
+ func_name,
21
+ counter,
22
+ n_samples= n_samples,
23
+ noise_var= noise_var,
24
+ noise_bias= noise_bias,
25
+ feat2_freq= feat2_freq,
26
+ feat1_scale= feat1_scale,
27
+ feat1_power= feat1_power,
28
+ feat2_shift= feat2_shift,
29
+ feat2_scale= feat2_scale,
30
+ feat2_func= feat2_func,
31
+ )
32
+
33
+ def gaussian(n_samples,
34
+ gaussian_center,
35
+ gaussian_width,
36
+ gaussian_scaling,
37
+ counter,
38
+ func_name):
39
+ return train_models(
40
+ func_name,
41
+ counter,
42
+ n_samples= n_samples,
43
+ gaussian_center= gaussian_center,
44
+ gaussian_width= gaussian_width,
45
+ gaussian_scaling= gaussian_scaling,
46
+ )
47
+
48
+ def piecewise(n_samples,
49
+ piecewise_thres,
50
+ piecewise_scale,
51
+ counter,
52
+ func_name):
53
+ return train_models(
54
+ func_name,
55
+ counter,
56
+ n_samples= n_samples,
57
+ piecewise_thres= piecewise_thres,
58
+ piecewise_scale= piecewise_scale,
59
+ )
60
+
61
+
62
+ def train_models(func_name, counter, **kwargs):
63
+ functions = dict()
64
+
65
+ if func_name == "default":
66
+ feat2_func_list = {
67
+ "Use sine function for feature 2": np.sin,
68
+ "Use cosine function for feature 2": np.cos,
69
+ }
70
+
71
+ functions.update({"feat2_func":feat2_func_list[kwargs["feat2_func"]]})
72
+ np.random.seed(0)
73
+ n_samples = kwargs["n_samples"]
74
+ X = np.random.rand(n_samples, 3)
75
+
76
+ if func_name == "piecewise":
77
+ mask = X[:, 1] < (kwargs["piecewise_thres"]*0.1)
78
+
79
+
80
+ functions.update ({
81
+ "default":
82
+ lambda X: (kwargs["feat1_scale"]* X[:, 0] ** kwargs["feat1_power"] +
83
+ kwargs["feat2_scale"] * functions["feat2_func"](kwargs["feat2_freq"] * np.pi * X[:, 1] + kwargs["feat2_shift"]) +
84
+ (kwargs["noise_var"]*0.1) * np.random.randn(n_samples) + (kwargs["noise_bias"]*0.1)),
85
+ "Gaussian":
86
+ lambda X: (X[:, 0] + np.exp(-(X[:, 1] - (kwargs["gaussian_center"]*0.1))**2 / (2 * (kwargs["gaussian_width"]*0.1)**2)) +
87
+ (kwargs["gaussian_scaling"]*0.1) * np.random.randn(n_samples)),
88
+ "piecewise":
89
+ lambda X: (np.where(mask, kwargs["piecewise_scale"] * X[:, 0], kwargs["piecewise_scale"] * -X[:, 0]) +
90
+ 0.1 * np.random.randn(n_samples))
91
+ })
92
+
93
+ y = functions[func_name](X)
94
+ f_test, _ = f_regression(X, y)
95
+ f_test /= np.max(f_test)
96
+
97
+ mi = mutual_info_regression(X, y)
98
+ mi /= np.max(mi)
99
+
100
+ fig, ax = plt.subplots()
101
+
102
+ i = counter
103
+ ax.scatter(X[:, i], y, edgecolor="black", s=20)
104
+ ax.set_xlabel("$x_{}$".format(i + 1), fontsize=14)
105
+ ax.set_ylabel("$y$", fontsize=14)
106
+ ax.set_title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]), fontsize=16)
107
+
108
+ return fig
109
+
110
+
111
+ def iter_grid(n_rows, n_cols):
112
+ # create a grid using gradio Block
113
+ for _ in range(n_rows):
114
+ with gr.Row():
115
+ for _ in range(n_cols):
116
+ with gr.Column():
117
+ yield
118
+ def plot_func(input_model, args):
119
+ input_models = {"default": default,
120
+ "Gaussian": gaussian,
121
+ "piecewise": piecewise}
122
+ counter = 0
123
+ for _ in iter_grid(1,3):
124
+ fn = partial(input_models[input_model], counter=counter, func_name=input_model)
125
+
126
+ if counter >= len(input_models):
127
+ break
128
+
129
+ plot = gr.Plot(label=input_model)
130
+
131
+ n_samples.change(fn=fn, inputs=args, outputs=plot)
132
+ if input_model == "default":
133
+ noise_var.change(fn=fn, inputs=args, outputs=plot)
134
+ noise_bias.change(fn=fn, inputs=args, outputs=plot)
135
+ feat2_freq.change(fn=fn, inputs=args, outputs=plot)
136
+ feat1_scale.change(fn=fn, inputs=args, outputs=plot)
137
+ feat1_power.change(fn=fn, inputs=args, outputs=plot)
138
+ feat2_shift.change(fn=fn, inputs=args, outputs=plot)
139
+ feat2_scale.change(fn=fn, inputs=args, outputs=plot)
140
+ feat2_func.change(fn=fn, inputs=args, outputs=plot)
141
+ elif input_model == "Gaussian":
142
+ gaussian_center.change(fn=fn, inputs=args, outputs=plot)
143
+ gaussian_width.change(fn=fn, inputs=args, outputs=plot)
144
+ gaussian_scaling.change(fn=fn, inputs=args, outputs=plot)
145
+ elif input_model == "piecewise":
146
+ piecewise_thres.change(fn=fn, inputs=args, outputs=plot)
147
+ piecewise_scale.change(fn=fn, inputs=args, outputs=plot)
148
+
149
+ counter += 1
150
+
151
+ title = "Comparison of F-test and mutual information"
152
+ with gr.Blocks(title=title) as demo:
153
+ gr.Markdown(f"## {title}")
154
+ gr.Markdown("This example illustrates the differences between univariate \
155
+ F-test statistics and mutual information. \
156
+ The plots below show the dependency of `y` against individual `x_i` and normalized \
157
+ values of univariate F-tests statistics and mutual information.\
158
+ In general, the F-test evaluates linear dependencies and tends to prioritize \
159
+ features with linear relationships, while mutual information assesses any type \
160
+ of dependency between variables and tends to identify features with strong \
161
+ relationships. In these examples, the most discriminative features identified \
162
+ by each approach may vary.")
163
+ gr.Markdown("In the follwing examples, we introduce parameterization to enable interaction \
164
+ with various parameters of the equation.")
165
+
166
+
167
+ n_samples = gr.Slider(minimum=500, maximum=1500, value=1000, step=100,
168
+ label = "Number of Samples")
169
+
170
+ with gr.Tab("Default Example function"):
171
+ gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
172
+ the target depends on them as follows:")
173
+ gr.Markdown("- `y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1)`")
174
+ gr.Markdown("that is the third feature is completely irrelevant.")
175
+
176
+ gr.Markdown("Parametrized equation:")
177
+ gr.Markdown("`y = f1_scale * x_1 **f1_power + f2_scale * f2_func(f2_freq * np.pi * x_2 + f2_shift + variance) * random(samples) + bias`")
178
+
179
+
180
+ noise_var = gr.Slider(minimum=0, maximum=10, step=1,
181
+ label = "Noise variance")
182
+
183
+ noise_bias = gr.Slider(minimum=0, maximum=10, step=1,
184
+ label = "Noise bias")
185
+
186
+ with gr.Row():
187
+ with gr.Column():
188
+ feat1_scale = gr.Slider(minimum=1, maximum=10, step=1,
189
+ label = "Scale feature 1")
190
+
191
+ feat1_power = gr.Slider(minimum=1, maximum=4, step=1,
192
+ label = "Raised feature 1 to the power")
193
+
194
+ with gr.Column():
195
+ feat2_freq = gr.Slider(minimum=1, maximum=10, step=1, value=6,
196
+ label = "Feature 2 frequency")
197
+
198
+ feat2_shift = gr.Slider(minimum=1, maximum=5, step=1,
199
+ label = "Shift feature 2")
200
+
201
+ feat2_scale = gr.Slider(minimum=1, maximum=4, step=1,
202
+ label = "Scale feature 2")
203
+
204
+ feat2_func = gr.Radio(choices=["Use sine function for feature 2",
205
+ "Use cosine function for feature 2"],
206
+ value="Use sine function for feature 2")
207
+ plot_func("default", [n_samples,
208
+ noise_var,
209
+ noise_bias,
210
+ feat2_freq,
211
+ feat1_scale,
212
+ feat1_power,
213
+ feat2_shift,
214
+ feat2_scale,
215
+ feat2_func,
216
+ ])
217
+
218
+ with gr.Tab("Gaussian function"):
219
+ gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
220
+ the target depends on them as follows:")
221
+ gr.Markdown("- `y = x_1 + np.exp(-(x_2-0.5)**2 / (2 * 0.1**2)) + 0.1 * N(0, 1)`")
222
+ gr.Markdown("that is the third feature is completely irrelevant.")
223
+
224
+ gr.Markdown("Parametrized equation:")
225
+ gr.Markdown("`y = x_1 + exponential(-(x_2 - center)**2 / (2 * width)**2) + scaling * random(samples)`")
226
+
227
+ gaussian_center = gr.Slider(minimum=0, maximum=10, value=5, step=1,
228
+ label = "Gaussian center")
229
+
230
+ gaussian_width = gr.Slider(minimum=1, maximum=10, value=1, step=1,
231
+ label = "Gaussian width")
232
+
233
+ gaussian_scaling = gr.Slider(minimum=1, maximum=5, value=1, step=1,
234
+ label = "Gaussian scaling")
235
+
236
+ plot_func("Gaussian", [n_samples,
237
+ gaussian_center,
238
+ gaussian_width,
239
+ gaussian_scaling
240
+ ])
241
+
242
+
243
+ with gr.Tab("Piecewise function"):
244
+ gr.Markdown("We consider 3 features `x_1`, `x_2`, `x_3` distributed uniformly over `[0, 1]`, \
245
+ the target depends on them as follows:")
246
+ gr.Markdown("- `mask = x_2 < 0.5`")
247
+ gr.Markdown("- `y = x_1` if `mask` is True")
248
+ gr.Markdown("- `y = -x_1` if `mask` is True")
249
+ gr.Markdown("that is the third feature is completely irrelevant.")
250
+
251
+ gr.Markdown("Parametrized equation:")
252
+ gr.Markdown("- `mask = x_2 < threshold`")
253
+ gr.Markdown("- `y = scaling*x_1` if `mask` is True")
254
+ gr.Markdown("- `y = scaling*-x_1` if `mask` is True")
255
+ piecewise_thres = gr.Slider(minimum=1, maximum=10, value=5, step=1,
256
+ label = "Piecewise threshold")
257
+
258
+ piecewise_scale = gr.Slider(minimum=1, maximum=10, value=1, step=1,
259
+ label = "Piecewise scaling")
260
+
261
+ plot_func("piecewise", [n_samples, piecewise_thres,
262
+ piecewise_scale
263
+ ])
264
+
265
+
266
+ demo.launch()
requirements.text ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn
2
+ matplotlib