Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
-
import os
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import re
|
| 5 |
|
|
@@ -9,21 +13,28 @@ TOKEN = os.getenv("HF_TOKEN", None) # add in Space Settings β Secrets if repo
|
|
| 9 |
|
| 10 |
# ---------- Safe numeric helpers ----------
|
| 11 |
NUM_RE = re.compile(r"[-+]?\d*\.?\d+")
|
|
|
|
| 12 |
def _try_num(x: Any) -> float | None:
|
| 13 |
-
if x is None:
|
|
|
|
| 14 |
s = str(x).strip().lower()
|
| 15 |
mult = 1.0
|
| 16 |
-
if "k" in s and re.search(r"\d", s):
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
s = re.sub(r"[^0-9.\-]", "", s)
|
| 19 |
-
if s in {"","-","."}:
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
except Exception:
|
| 22 |
m = NUM_RE.search(str(x))
|
| 23 |
return float(m.group(0)) if m else None
|
| 24 |
|
| 25 |
def to_float(x: Any, default: float = 0.0) -> float:
|
| 26 |
-
if isinstance(x, (int,float)):
|
|
|
|
| 27 |
v = _try_num(x)
|
| 28 |
return float(v) if v is not None else float(default)
|
| 29 |
|
|
@@ -41,42 +52,101 @@ feat_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="feature_order.jso
|
|
| 41 |
thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN)
|
| 42 |
|
| 43 |
reg = joblib.load(model_path)
|
| 44 |
-
with open(feat_path) as f:
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def to_label(score: float) -> str:
|
| 48 |
-
low_lt =
|
| 49 |
-
med_lt = THRESH.get("medium_lt", 70)
|
| 50 |
return "low" if score < low_lt else ("medium" if score < med_lt else "high")
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# ---------- Prediction ----------
|
|
|
|
| 53 |
def predict_readiness(savings_amount, monthly_income, entertainment_spending,
|
| 54 |
sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age):
|
| 55 |
|
|
|
|
| 56 |
values = {
|
| 57 |
"savings_amount": max(0.0, to_float(savings_amount)),
|
| 58 |
"monthly_income": max(0.0, to_float(monthly_income)),
|
| 59 |
"entertainment_spending": max(0.0, to_float(entertainment_spending)),
|
| 60 |
"sales_skills_1_5": clip_range(sales_skills_1_5, 1, 5),
|
| 61 |
-
"independence_1_5": clip_range(independence_1_5, 1, 5),
|
| 62 |
"risk_tolerance_1_10": clip_range(risk_tolerance_1_10, 1, 10),
|
| 63 |
"age": int(round(clip_range(age, 13, 100))),
|
| 64 |
}
|
| 65 |
|
| 66 |
-
# Build feature vector in
|
| 67 |
x = [values.get(name, 0.0) for name in FEATURE_ORDER]
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
#
|
| 71 |
-
y_pred = reg.predict(
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
return {
|
| 78 |
-
"readiness_score_0_100": round(score, 2),
|
| 79 |
-
"readiness_label":
|
| 80 |
"thresholds": f"low<{low_lt}, medium {low_lt}β{med_lt-0.01:.2f}, highβ₯{med_lt}",
|
| 81 |
}
|
| 82 |
|
|
@@ -92,7 +162,9 @@ inputs = [
|
|
| 92 |
]
|
| 93 |
|
| 94 |
demo = gr.Interface(
|
| 95 |
-
fn=predict_readiness,
|
|
|
|
|
|
|
| 96 |
title="Entrepreneurial Readiness β Simple Regressor",
|
| 97 |
description="Enter details to estimate a 0β100 readiness score and label (low/medium/high)."
|
| 98 |
)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import joblib
|
| 4 |
+
import numpy as np
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from typing import Any, Dict, List, Tuple
|
| 7 |
from huggingface_hub import hf_hub_download
|
| 8 |
import re
|
| 9 |
|
|
|
|
| 13 |
|
| 14 |
# ---------- Safe numeric helpers ----------
|
| 15 |
NUM_RE = re.compile(r"[-+]?\d*\.?\d+")
|
| 16 |
+
|
| 17 |
def _try_num(x: Any) -> float | None:
|
| 18 |
+
if x is None:
|
| 19 |
+
return None
|
| 20 |
s = str(x).strip().lower()
|
| 21 |
mult = 1.0
|
| 22 |
+
if "k" in s and re.search(r"\d", s):
|
| 23 |
+
s = s.replace("k", ""); mult = 1000.0
|
| 24 |
+
if "m" in s and re.search(r"\d", s):
|
| 25 |
+
s = s.replace("m", ""); mult = 1_000_000.0
|
| 26 |
s = re.sub(r"[^0-9.\-]", "", s)
|
| 27 |
+
if s in {"", "-", "."}:
|
| 28 |
+
return None
|
| 29 |
+
try:
|
| 30 |
+
return float(s) * mult
|
| 31 |
except Exception:
|
| 32 |
m = NUM_RE.search(str(x))
|
| 33 |
return float(m.group(0)) if m else None
|
| 34 |
|
| 35 |
def to_float(x: Any, default: float = 0.0) -> float:
|
| 36 |
+
if isinstance(x, (int, float, np.integer, np.floating)):
|
| 37 |
+
return float(x)
|
| 38 |
v = _try_num(x)
|
| 39 |
return float(v) if v is not None else float(default)
|
| 40 |
|
|
|
|
| 52 |
thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN)
|
| 53 |
|
| 54 |
reg = joblib.load(model_path)
|
| 55 |
+
with open(feat_path) as f:
|
| 56 |
+
FEATURE_ORDER: List[str] = json.load(f)
|
| 57 |
+
with open(thresh_path) as f:
|
| 58 |
+
THRESH: Dict[str, float] = json.load(f)
|
| 59 |
+
|
| 60 |
+
# ---------- Label helpers ----------
|
| 61 |
+
|
| 62 |
+
def thresholds() -> Tuple[float, float]:
|
| 63 |
+
return THRESH.get("low_lt", 40.0), THRESH.get("medium_lt", 70.0)
|
| 64 |
+
|
| 65 |
|
| 66 |
def to_label(score: float) -> str:
|
| 67 |
+
low_lt, med_lt = thresholds()
|
|
|
|
| 68 |
return "low" if score < low_lt else ("medium" if score < med_lt else "high")
|
| 69 |
|
| 70 |
+
|
| 71 |
+
def anchors_from_thresholds() -> Dict[str, float]:
|
| 72 |
+
low_lt, med_lt = thresholds()
|
| 73 |
+
# Midpoints of the buckets: [0, low), [low, med), [med, 100]
|
| 74 |
+
return {
|
| 75 |
+
"low": low_lt / 2.0, # e.g., 20 if low_lt=40
|
| 76 |
+
"medium": (low_lt + med_lt) / 2.0, # e.g., 55 if 40/70
|
| 77 |
+
"high": (med_lt + 100.0) / 2.0 # e.g., 85 if 70/100
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def score_from_label_only(label: str) -> float:
|
| 82 |
+
a = anchors_from_thresholds()
|
| 83 |
+
return float(a.get(str(label).lower(), a["medium"]))
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def score_from_proba(model, X: np.ndarray, fallback_label: str | None = None) -> Tuple[float, str]:
|
| 87 |
+
anchors = anchors_from_thresholds()
|
| 88 |
+
if hasattr(model, "predict_proba"):
|
| 89 |
+
try:
|
| 90 |
+
proba = model.predict_proba(X)[0]
|
| 91 |
+
classes = getattr(model, "classes_", None)
|
| 92 |
+
if classes is None:
|
| 93 |
+
# assume order low, medium, high
|
| 94 |
+
classes = np.array(["low", "medium", "high"], dtype=object)
|
| 95 |
+
# Normalize labels to lowercase strings
|
| 96 |
+
classes_l = [str(c).lower() for c in classes]
|
| 97 |
+
# Expected score = sum(p_c * anchor_c)
|
| 98 |
+
expected = 0.0
|
| 99 |
+
for c, p in zip(classes_l, proba):
|
| 100 |
+
expected += float(p) * float(anchors.get(c, anchors["medium"]))
|
| 101 |
+
# Choose label: max-proba if not provided
|
| 102 |
+
pred_label = fallback_label
|
| 103 |
+
if pred_label is None:
|
| 104 |
+
pred_label = classes_l[int(np.argmax(proba))]
|
| 105 |
+
return clip01_100(expected), pred_label
|
| 106 |
+
except Exception:
|
| 107 |
+
pass
|
| 108 |
+
# No predict_proba β map the label to its anchor
|
| 109 |
+
if fallback_label is None:
|
| 110 |
+
fallback_label = "medium"
|
| 111 |
+
return clip01_100(score_from_label_only(fallback_label)), str(fallback_label).lower()
|
| 112 |
+
|
| 113 |
# ---------- Prediction ----------
|
| 114 |
+
|
| 115 |
def predict_readiness(savings_amount, monthly_income, entertainment_spending,
|
| 116 |
sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age):
|
| 117 |
|
| 118 |
+
# Coerce UI values safely
|
| 119 |
values = {
|
| 120 |
"savings_amount": max(0.0, to_float(savings_amount)),
|
| 121 |
"monthly_income": max(0.0, to_float(monthly_income)),
|
| 122 |
"entertainment_spending": max(0.0, to_float(entertainment_spending)),
|
| 123 |
"sales_skills_1_5": clip_range(sales_skills_1_5, 1, 5),
|
| 124 |
+
"independence_1_5": clip_range(independence_1_5, 1, 5),
|
| 125 |
"risk_tolerance_1_10": clip_range(risk_tolerance_1_10, 1, 10),
|
| 126 |
"age": int(round(clip_range(age, 13, 100))),
|
| 127 |
}
|
| 128 |
|
| 129 |
+
# Build feature vector in exact training order
|
| 130 |
x = [values.get(name, 0.0) for name in FEATURE_ORDER]
|
| 131 |
+
X = np.asarray([x], dtype=float)
|
| 132 |
+
|
| 133 |
+
# Try numeric prediction first
|
| 134 |
+
y_pred = reg.predict(X)
|
| 135 |
+
y0 = np.asarray(y_pred).ravel()[0]
|
| 136 |
+
|
| 137 |
+
# Case A: numeric output β clamp and label via thresholds
|
| 138 |
+
if isinstance(y0, (int, float, np.integer, np.floating)):
|
| 139 |
+
score = clip01_100(float(y0))
|
| 140 |
+
label = to_label(score)
|
| 141 |
+
else:
|
| 142 |
+
# Case B: string/label output (e.g., 'low'/'medium'/'high')
|
| 143 |
+
label_str = str(y0).lower()
|
| 144 |
+
score, label = score_from_proba(reg, X, fallback_label=label_str)
|
| 145 |
+
|
| 146 |
+
low_lt, med_lt = thresholds()
|
| 147 |
return {
|
| 148 |
+
"readiness_score_0_100": round(float(score), 2),
|
| 149 |
+
"readiness_label": label,
|
| 150 |
"thresholds": f"low<{low_lt}, medium {low_lt}β{med_lt-0.01:.2f}, highβ₯{med_lt}",
|
| 151 |
}
|
| 152 |
|
|
|
|
| 162 |
]
|
| 163 |
|
| 164 |
demo = gr.Interface(
|
| 165 |
+
fn=predict_readiness,
|
| 166 |
+
inputs=inputs,
|
| 167 |
+
outputs="json",
|
| 168 |
title="Entrepreneurial Readiness β Simple Regressor",
|
| 169 |
description="Enter details to estimate a 0β100 readiness score and label (low/medium/high)."
|
| 170 |
)
|