added info about correlation measures and p-values
Browse files
app.py
CHANGED
@@ -563,7 +563,10 @@ def show_interactive_heatmap(df):
|
|
563 |
with col2:
|
564 |
corr_method = st.selectbox(
|
565 |
"Correlation Method",
|
566 |
-
["pearson", "spearman", "kendall"]
|
|
|
|
|
|
|
567 |
)
|
568 |
|
569 |
# Compute correlation matrix
|
@@ -646,25 +649,34 @@ def show_scatter_explorer(df, stderr_df):
|
|
646 |
# Format p-values appropriately
|
647 |
def format_pvalue(p):
|
648 |
if p < 0.001:
|
649 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
650 |
else:
|
651 |
-
|
|
|
|
|
652 |
|
653 |
with col1:
|
654 |
-
|
655 |
-
st.
|
|
|
656 |
|
657 |
with col2:
|
658 |
-
|
659 |
-
st.
|
|
|
660 |
|
661 |
with col3:
|
662 |
-
|
663 |
-
st.
|
664 |
-
|
665 |
-
|
666 |
-
st.info("ℹ️ **P-values < 0.001** indicate very strong statistical significance. This is common with benchmark correlations due to reasonable sample sizes and meaningful relationships.")
|
667 |
-
|
668 |
# Show data table
|
669 |
st.subheader("Data Points")
|
670 |
display_data = common_data.copy()
|
|
|
563 |
with col2:
|
564 |
corr_method = st.selectbox(
|
565 |
"Correlation Method",
|
566 |
+
["pearson", "spearman", "kendall"],
|
567 |
+
help="**Pearson's r** is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.\n" +
|
568 |
+
"**Spearman's rho** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.\n" +
|
569 |
+
"**Kendall's tau** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data."
|
570 |
)
|
571 |
|
572 |
# Compute correlation matrix
|
|
|
649 |
# Format p-values appropriately
|
650 |
def format_pvalue(p):
|
651 |
if p < 0.001:
|
652 |
+
info = "P-values < 0.001 indicate very strong statistical significance. This results from good sample sizes and meaningful relationships."
|
653 |
+
return "p < 0.001", info
|
654 |
+
elif p < 0.05:
|
655 |
+
info = "P-values < 0.05 indicate moderate statistical significance. This results from reasonable sample sizes and meaningful relationships."
|
656 |
+
return f"p = {p:.3f}", info
|
657 |
+
elif p < 0.1:
|
658 |
+
info = "P-values < 0.1 indicate weak statistical significance. This results from low sample sizes and/or weak relationships."
|
659 |
+
return f"p = {p:.3f}", info
|
660 |
else:
|
661 |
+
info = "P-values > 0.1 indicate very weak statistical significance. This results from insufficient sample sizes and/or weak relationships."
|
662 |
+
return f"p = {p:.3f}", info
|
663 |
+
|
664 |
|
665 |
with col1:
|
666 |
+
p_value, info = format_pvalue(pearson_p)
|
667 |
+
st.metric("Pearson r", f"{pearson_r:.3f}", help="Pearson's r is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.")
|
668 |
+
st.caption(p_value, help=info)
|
669 |
|
670 |
with col2:
|
671 |
+
p_value, info = format_pvalue(spearman_p)
|
672 |
+
st.metric("Spearman ρ", f"{spearman_r:.3f}", help="Spearman's rho is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.")
|
673 |
+
st.caption(p_value, help=info)
|
674 |
|
675 |
with col3:
|
676 |
+
p_value, info = format_pvalue(kendall_p)
|
677 |
+
st.metric("Kendall τ", f"{kendall_r:.3f}", help="Kendall's tau is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.")
|
678 |
+
st.caption(p_value, help=info)
|
679 |
+
|
|
|
|
|
680 |
# Show data table
|
681 |
st.subheader("Data Points")
|
682 |
display_data = common_data.copy()
|