import numpy as np import gradio as gr import matplotlib.pyplot as plt from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score plt.switch_backend("agg") def true_fn(X): return np.cos(1.5 * np.pi * X) def modelData(n_samples: int, degree: int, cv: int) -> "plt.Figure": """ This function demonstrate the principle of overfitting vs underfitting by modeling a dataset using Linear Regression. :param n_samples: the number of samples required in the data. :param degree: the number of degrees for the polynomial features. :returns: the matplotlib figures """ X = np.sort(np.random.rand(n_samples)) y = true_fn(X) + np.random.randn(n_samples) * .1 fig, ax = plt.subplots(1, 1, figsize=(24, 15)) poly_feats = PolynomialFeatures(degree=degree, include_bias=False) model = LinearRegression() pipeline = Pipeline([ ("polynomial_feats", poly_feats), ("lr", model) ]) pipeline.fit(X[:, np.newaxis], y) scores = cross_val_score( pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=cv ) X_test = np.linspace(0, 1, 1000) ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), "--", linewidth=2.5, color="#C73E1D", label="Model") ax.plot(X_test, true_fn(X_test), linewidth=2.5, color="#2E86AB", label="True function") ax.scatter(X, y, s=20, alpha=.75, edgecolors="#3B1F2B", label="Samples") ax.set_xlabel("x") ax.set_ylabel("y") ax.set_xticks(()) ax.set_yticks(()) ax.set_xlim((0, 1)) ax.set_ylim((-2, 2)) ax.legend(loc="best") ax.set_title(f"Degree : {degree} \n MSE: {-scores.mean():.2e}(+/- {scores.std():.2e})") return fig with gr.Blocks() as demo: gr.Markdown(""" # Underfitting vs Overfitting This space is a re-implementation of the original scikit-learn docs [Underfitting vs Overfitting](https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html#sphx-glr-auto-examples-model-selection-plot-underfitting-overfitting-py) In this space you can vary the sliders to get a picture of what an **underfitted** model looks like and what an **overfitted** model looks like. If you want more details you can always head onto the scikit-learn doc mentioned above. Have fun enjoying the tool 🤗 """) n_samples = gr.Slider(30, 10_000, label="n_samples", info="number of samples", step=1, value=100) degree = gr.Slider(1, 20, label="degree", info="the polynomial features degree", step=1, value=4) cv = gr.Slider(1, 10, label="cv", info="number of cross-validation to run", step=1, value=5) output = gr.Plot(label="Plot") btn = gr.Button("Show") btn.click(fn=modelData, inputs=[n_samples, degree, cv], outputs=output, api_name="overfitunderfit")