""" Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html """ from sklearn.svm import SVC from sklearn.datasets import load_digits from sklearn.feature_selection import RFE import matplotlib.pyplot as plt from sklearn.datasets import make_classification import matplotlib.pyplot as plt from sklearn.feature_selection import RFECV from sklearn.model_selection import StratifiedKFold from sklearn.linear_model import LogisticRegression import gradio as gr def create_classification_data(informative, redundant): X, y = make_classification( n_samples=500, n_features=15, n_informative=informative, n_redundant=redundant, n_repeated=0, n_classes=8, n_clusters_per_class=1, class_sep=0.8, random_state=0, ) return X, y def run_rfecv(informative, redundant): X, y = create_classification_data(informative, redundant) min_features_to_select = 1 # Minimum number of features to consider clf = LogisticRegression() cv = StratifiedKFold(5) rfecv = RFECV( estimator=clf, step=1, cv=cv, scoring="accuracy", min_features_to_select=min_features_to_select, n_jobs=2, ) rfecv.fit(X, y) print(f"Optimal number of features: {rfecv.n_features_}") n_scores = len(rfecv.cv_results_["mean_test_score"]) fig = plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Mean test accuracy") plt.errorbar( range(min_features_to_select, n_scores + min_features_to_select), rfecv.cv_results_["mean_test_score"], yerr=rfecv.cv_results_["std_test_score"], ) plt.title("\n Recursive Feature Elimination \nwith correlated features") return plt title = " Recursive feature elimination with cross-validation " with gr.Blocks(title=title) as demo: gr.Markdown(f"# {title}") gr.Markdown( " This example the feature importnace when features have both redundant and useless features using Recursive feature elimination
" " Dataset: A classification set of 500 data points and 15 features in total
" " **Features**

**Informative features** : Number of features that actually having the signal to differentiate between classes.
" " **Redundant features** : Number of feature which are just some random linear combinations of informative features.
" ) gr.Markdown( " **Note** Total features - (informative features + redundant features) are Useless features.
" ) gr.Markdown( " Logistic Regression classifier is used as estimator to rank features.
" ) gr.Markdown( " **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**" ) with gr.Row(): informative = gr.Slider( minimum=0, maximum=10, step=1, value=3, label="Number of Informative features in data", ) redundant = gr.Slider( minimum=0, maximum=5, step=1, value=2, label="Number of Redundant features in data", ) btn = gr.Button(value="Submit") btn.click( run_rfecv, inputs=[informative, redundant], outputs=gr.Plot(label="RFE with cross validation"), ) gr.Markdown( " Plot demonstrate mean test accuracy for the corresponding feature selected .
" ) gr.Markdown( " Number of features selected with highest test accuracy will be nearly ~ equal to informative features .
" ) demo.launch()