Spaces:

mlfoundations
/

OpenThoughts_data_explorer

Running

App Files Files Community

jmercat commited on Jun 2

Commit

b1b519f

1 Parent(s): 9c9517b

added info about correlation measures and p-values

Browse files

Files changed (1) hide show

app.py +25 -13

app.py CHANGED Viewed

@@ -563,7 +563,10 @@ def show_interactive_heatmap(df):
     with col2:
         corr_method = st.selectbox(
             "Correlation Method",
-            ["pearson", "spearman", "kendall"]
         )
     # Compute correlation matrix
@@ -646,25 +649,34 @@ def show_scatter_explorer(df, stderr_df):
                 # Format p-values appropriately
                 def format_pvalue(p):
                     if p < 0.001:
-                        return "p < 0.001"
                     else:
-                        return f"p = {p:.3f}"
                 with col1:
-                    st.metric("Pearson r", f"{pearson_r:.3f}")
-                    st.caption(format_pvalue(pearson_p))
                 with col2:
-                    st.metric("Spearman ρ", f"{spearman_r:.3f}")
-                    st.caption(format_pvalue(spearman_p))
                 with col3:
-                    st.metric("Kendall τ", f"{kendall_r:.3f}")
-                    st.caption(format_pvalue(kendall_p))
-                # Add explanation about p-values
-                st.info("ℹ️ **P-values < 0.001** indicate very strong statistical significance. This is common with benchmark correlations due to reasonable sample sizes and meaningful relationships.")
                 # Show data table
                 st.subheader("Data Points")
                 display_data = common_data.copy()

     with col2:
         corr_method = st.selectbox(
             "Correlation Method",
+            ["pearson", "spearman", "kendall"],
+            help="**Pearson's r** is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.\n" +
+                 "**Spearman's rho** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.\n" +
+                 "**Kendall's tau** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data."
         )
     # Compute correlation matrix
                 # Format p-values appropriately
                 def format_pvalue(p):
                     if p < 0.001:
+                        info = "P-values < 0.001 indicate very strong statistical significance. This results from good sample sizes and meaningful relationships."
+                        return "p < 0.001", info
+                    elif p < 0.05:
+                        info = "P-values < 0.05 indicate moderate statistical significance. This results from reasonable sample sizes and meaningful relationships."
+                        return f"p = {p:.3f}", info
+                    elif p < 0.1:
+                        info = "P-values < 0.1 indicate weak statistical significance. This results from low sample sizes and/or weak relationships."
+                        return f"p = {p:.3f}", info
                     else:
+                        info = "P-values > 0.1 indicate very weak statistical significance. This results from insufficient sample sizes and/or weak relationships."
+                        return f"p = {p:.3f}", info
                 with col1:
+                    p_value, info = format_pvalue(pearson_p)
+                    st.metric("Pearson r", f"{pearson_r:.3f}", help="Pearson's r is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.")
+                    st.caption(p_value, help=info)
                 with col2:
+                    p_value, info = format_pvalue(spearman_p)
+                    st.metric("Spearman ρ", f"{spearman_r:.3f}", help="Spearman's rho is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.")
+                    st.caption(p_value, help=info)
                 with col3:
+                    p_value, info = format_pvalue(kendall_p)
+                    st.metric("Kendall τ", f"{kendall_r:.3f}", help="Kendall's tau is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.")
+                    st.caption(p_value, help=info)
                 # Show data table
                 st.subheader("Data Points")
                 display_data = common_data.copy()