import os import json import joblib import numpy as np import gradio as gr from typing import Any, Dict, List, Tuple from huggingface_hub import hf_hub_download import re # ---------- Config ---------- MODEL_REPO_ID = "kirakayy/entrepreneurial-readiness" # change if you fork TOKEN = os.getenv("HF_TOKEN", None) # add in Space Settings → Secrets if repo is private # ---------- Safe numeric helpers ---------- NUM_RE = re.compile(r"[-+]?\d*\.?\d+") def _try_num(x: Any) -> float | None: if x is None: return None s = str(x).strip().lower() mult = 1.0 if "k" in s and re.search(r"\d", s): s = s.replace("k", ""); mult = 1000.0 if "m" in s and re.search(r"\d", s): s = s.replace("m", ""); mult = 1_000_000.0 s = re.sub(r"[^0-9.\-]", "", s) if s in {"", "-", "."}: return None try: return float(s) * mult except Exception: m = NUM_RE.search(str(x)) return float(m.group(0)) if m else None def to_float(x: Any, default: float = 0.0) -> float: if isinstance(x, (int, float, np.integer, np.floating)): return float(x) v = _try_num(x) return float(v) if v is not None else float(default) def clip_range(x: Any, lo: float, hi: float) -> float: v = to_float(x, lo) return float(np.clip(v, lo, hi)) def clip01_100(x: Any) -> float: v = to_float(x, 0.0) return float(np.clip(v, 0.0, 100.0)) # ---------- Load artifacts ---------- model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="model.joblib", token=TOKEN) feat_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="feature_order.json", token=TOKEN) thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN) reg = joblib.load(model_path) with open(feat_path) as f: FEATURE_ORDER: List[str] = json.load(f) with open(thresh_path) as f: THRESH: Dict[str, float] = json.load(f) # ---------- Label helpers ---------- def thresholds() -> Tuple[float, float]: return THRESH.get("low_lt", 40.0), THRESH.get("medium_lt", 70.0) def to_label(score: float) -> str: low_lt, med_lt = thresholds() return "low" if score < low_lt else ("medium" if score < med_lt else "high") def anchors_from_thresholds() -> Dict[str, float]: low_lt, med_lt = thresholds() # Midpoints of the buckets: [0, low), [low, med), [med, 100] return { "low": low_lt / 2.0, # e.g., 20 if low_lt=40 "medium": (low_lt + med_lt) / 2.0, # e.g., 55 if 40/70 "high": (med_lt + 100.0) / 2.0 # e.g., 85 if 70/100 } def score_from_label_only(label: str) -> float: a = anchors_from_thresholds() return float(a.get(str(label).lower(), a["medium"])) def score_from_proba(model, X: np.ndarray, fallback_label: str | None = None) -> Tuple[float, str]: anchors = anchors_from_thresholds() if hasattr(model, "predict_proba"): try: proba = model.predict_proba(X)[0] classes = getattr(model, "classes_", None) if classes is None: # assume order low, medium, high classes = np.array(["low", "medium", "high"], dtype=object) # Normalize labels to lowercase strings classes_l = [str(c).lower() for c in classes] # Expected score = sum(p_c * anchor_c) expected = 0.0 for c, p in zip(classes_l, proba): expected += float(p) * float(anchors.get(c, anchors["medium"])) # Choose label: max-proba if not provided pred_label = fallback_label if pred_label is None: pred_label = classes_l[int(np.argmax(proba))] return clip01_100(expected), pred_label except Exception: pass # No predict_proba → map the label to its anchor if fallback_label is None: fallback_label = "medium" return clip01_100(score_from_label_only(fallback_label)), str(fallback_label).lower() # ---------- Prediction ---------- def predict_readiness(savings_amount, monthly_income, entertainment_spending, sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age): # Coerce UI values safely values = { "savings_amount": max(0.0, to_float(savings_amount)), "monthly_income": max(0.0, to_float(monthly_income)), "entertainment_spending": max(0.0, to_float(entertainment_spending)), "sales_skills_1_5": clip_range(sales_skills_1_5, 1, 5), "independence_1_5": clip_range(independence_1_5, 1, 5), "risk_tolerance_1_10": clip_range(risk_tolerance_1_10, 1, 10), "age": int(round(clip_range(age, 13, 100))), } # Build feature vector in exact training order x = [values.get(name, 0.0) for name in FEATURE_ORDER] X = np.asarray([x], dtype=float) # Try numeric prediction first y_pred = reg.predict(X) y0 = np.asarray(y_pred).ravel()[0] # Case A: numeric output → clamp and label via thresholds if isinstance(y0, (int, float, np.integer, np.floating)): score = clip01_100(float(y0)) label = to_label(score) else: # Case B: string/label output (e.g., 'low'/'medium'/'high') label_str = str(y0).lower() score, label = score_from_proba(reg, X, fallback_label=label_str) low_lt, med_lt = thresholds() return { "readiness_score_0_100": round(float(score), 2), "readiness_label": label, "thresholds": f"low<{low_lt}, medium {low_lt}–{med_lt-0.01:.2f}, high≥{med_lt}", } # ---------- Gradio UI ---------- inputs = [ gr.Number(label="Savings Amount ($)", value=5000), gr.Number(label="Monthly Income ($)", value=3200), gr.Number(label="Entertainment Spending ($/mo)", value=150), gr.Slider(1, 5, step=1, value=4, label="Sales Skills (1–5)"), gr.Slider(1, 5, step=1, value=2, label="Independence (1=independent, 5=very dependent)"), gr.Slider(1, 10, step=1, value=7, label="Risk Tolerance (1–10)"), gr.Slider(13, 100, step=1, value=24, label="Age"), ] demo = gr.Interface( fn=predict_readiness, inputs=inputs, outputs="json", title="Entrepreneurial Readiness — Simple Regressor", description="Enter details to estimate a 0–100 readiness score and label (low/medium/high)." ) if __name__ == "__main__": demo.launch()