kirakayy commited on
Commit
69f0258
Β·
verified Β·
1 Parent(s): a24df2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -26
app.py CHANGED
@@ -1,5 +1,9 @@
1
- import os, json, joblib, numpy as np, gradio as gr
2
- from typing import Any, Dict, List
 
 
 
 
3
  from huggingface_hub import hf_hub_download
4
  import re
5
 
@@ -9,21 +13,28 @@ TOKEN = os.getenv("HF_TOKEN", None) # add in Space Settings β†’ Secrets if repo
9
 
10
  # ---------- Safe numeric helpers ----------
11
  NUM_RE = re.compile(r"[-+]?\d*\.?\d+")
 
12
  def _try_num(x: Any) -> float | None:
13
- if x is None: return None
 
14
  s = str(x).strip().lower()
15
  mult = 1.0
16
- if "k" in s and re.search(r"\d", s): s = s.replace("k",""); mult = 1000.0
17
- if "m" in s and re.search(r"\d", s): s = s.replace("m",""); mult = 1_000_000.0
 
 
18
  s = re.sub(r"[^0-9.\-]", "", s)
19
- if s in {"","-","."}: return None
20
- try: return float(s) * mult
 
 
21
  except Exception:
22
  m = NUM_RE.search(str(x))
23
  return float(m.group(0)) if m else None
24
 
25
  def to_float(x: Any, default: float = 0.0) -> float:
26
- if isinstance(x, (int,float)): return float(x)
 
27
  v = _try_num(x)
28
  return float(v) if v is not None else float(default)
29
 
@@ -41,42 +52,101 @@ feat_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="feature_order.jso
41
  thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN)
42
 
43
  reg = joblib.load(model_path)
44
- with open(feat_path) as f: FEATURE_ORDER: List[str] = json.load(f)
45
- with open(thresh_path) as f: THRESH: Dict[str, float] = json.load(f)
 
 
 
 
 
 
 
 
46
 
47
  def to_label(score: float) -> str:
48
- low_lt = THRESH.get("low_lt", 40)
49
- med_lt = THRESH.get("medium_lt", 70)
50
  return "low" if score < low_lt else ("medium" if score < med_lt else "high")
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ---------- Prediction ----------
 
53
  def predict_readiness(savings_amount, monthly_income, entertainment_spending,
54
  sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age):
55
 
 
56
  values = {
57
  "savings_amount": max(0.0, to_float(savings_amount)),
58
  "monthly_income": max(0.0, to_float(monthly_income)),
59
  "entertainment_spending": max(0.0, to_float(entertainment_spending)),
60
  "sales_skills_1_5": clip_range(sales_skills_1_5, 1, 5),
61
- "independence_1_5": clip_range(independence_1_5, 1, 5), # <-- keep 'independence'
62
  "risk_tolerance_1_10": clip_range(risk_tolerance_1_10, 1, 10),
63
  "age": int(round(clip_range(age, 13, 100))),
64
  }
65
 
66
- # Build feature vector in EXACT order your model expects
67
  x = [values.get(name, 0.0) for name in FEATURE_ORDER]
68
- x_arr = np.asarray([x], dtype=float) # ensure numeric dtype
69
-
70
- # Predict β†’ coerce β†’ clamp to 0–100
71
- y_pred = reg.predict(x_arr)
72
- raw = float(np.asarray(y_pred).ravel()[0]) if y_pred is not None else 0.0
73
- score = clip01_100(raw)
74
-
75
- low_lt = THRESH.get("low_lt", 40)
76
- med_lt = THRESH.get("medium_lt", 70)
 
 
 
 
 
 
 
77
  return {
78
- "readiness_score_0_100": round(score, 2),
79
- "readiness_label": to_label(score),
80
  "thresholds": f"low<{low_lt}, medium {low_lt}–{med_lt-0.01:.2f}, highβ‰₯{med_lt}",
81
  }
82
 
@@ -92,7 +162,9 @@ inputs = [
92
  ]
93
 
94
  demo = gr.Interface(
95
- fn=predict_readiness, inputs=inputs, outputs="json",
 
 
96
  title="Entrepreneurial Readiness β€” Simple Regressor",
97
  description="Enter details to estimate a 0–100 readiness score and label (low/medium/high)."
98
  )
 
1
+ import os
2
+ import json
3
+ import joblib
4
+ import numpy as np
5
+ import gradio as gr
6
+ from typing import Any, Dict, List, Tuple
7
  from huggingface_hub import hf_hub_download
8
  import re
9
 
 
13
 
14
  # ---------- Safe numeric helpers ----------
15
  NUM_RE = re.compile(r"[-+]?\d*\.?\d+")
16
+
17
  def _try_num(x: Any) -> float | None:
18
+ if x is None:
19
+ return None
20
  s = str(x).strip().lower()
21
  mult = 1.0
22
+ if "k" in s and re.search(r"\d", s):
23
+ s = s.replace("k", ""); mult = 1000.0
24
+ if "m" in s and re.search(r"\d", s):
25
+ s = s.replace("m", ""); mult = 1_000_000.0
26
  s = re.sub(r"[^0-9.\-]", "", s)
27
+ if s in {"", "-", "."}:
28
+ return None
29
+ try:
30
+ return float(s) * mult
31
  except Exception:
32
  m = NUM_RE.search(str(x))
33
  return float(m.group(0)) if m else None
34
 
35
  def to_float(x: Any, default: float = 0.0) -> float:
36
+ if isinstance(x, (int, float, np.integer, np.floating)):
37
+ return float(x)
38
  v = _try_num(x)
39
  return float(v) if v is not None else float(default)
40
 
 
52
  thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN)
53
 
54
  reg = joblib.load(model_path)
55
+ with open(feat_path) as f:
56
+ FEATURE_ORDER: List[str] = json.load(f)
57
+ with open(thresh_path) as f:
58
+ THRESH: Dict[str, float] = json.load(f)
59
+
60
+ # ---------- Label helpers ----------
61
+
62
+ def thresholds() -> Tuple[float, float]:
63
+ return THRESH.get("low_lt", 40.0), THRESH.get("medium_lt", 70.0)
64
+
65
 
66
  def to_label(score: float) -> str:
67
+ low_lt, med_lt = thresholds()
 
68
  return "low" if score < low_lt else ("medium" if score < med_lt else "high")
69
 
70
+
71
+ def anchors_from_thresholds() -> Dict[str, float]:
72
+ low_lt, med_lt = thresholds()
73
+ # Midpoints of the buckets: [0, low), [low, med), [med, 100]
74
+ return {
75
+ "low": low_lt / 2.0, # e.g., 20 if low_lt=40
76
+ "medium": (low_lt + med_lt) / 2.0, # e.g., 55 if 40/70
77
+ "high": (med_lt + 100.0) / 2.0 # e.g., 85 if 70/100
78
+ }
79
+
80
+
81
+ def score_from_label_only(label: str) -> float:
82
+ a = anchors_from_thresholds()
83
+ return float(a.get(str(label).lower(), a["medium"]))
84
+
85
+
86
+ def score_from_proba(model, X: np.ndarray, fallback_label: str | None = None) -> Tuple[float, str]:
87
+ anchors = anchors_from_thresholds()
88
+ if hasattr(model, "predict_proba"):
89
+ try:
90
+ proba = model.predict_proba(X)[0]
91
+ classes = getattr(model, "classes_", None)
92
+ if classes is None:
93
+ # assume order low, medium, high
94
+ classes = np.array(["low", "medium", "high"], dtype=object)
95
+ # Normalize labels to lowercase strings
96
+ classes_l = [str(c).lower() for c in classes]
97
+ # Expected score = sum(p_c * anchor_c)
98
+ expected = 0.0
99
+ for c, p in zip(classes_l, proba):
100
+ expected += float(p) * float(anchors.get(c, anchors["medium"]))
101
+ # Choose label: max-proba if not provided
102
+ pred_label = fallback_label
103
+ if pred_label is None:
104
+ pred_label = classes_l[int(np.argmax(proba))]
105
+ return clip01_100(expected), pred_label
106
+ except Exception:
107
+ pass
108
+ # No predict_proba β†’ map the label to its anchor
109
+ if fallback_label is None:
110
+ fallback_label = "medium"
111
+ return clip01_100(score_from_label_only(fallback_label)), str(fallback_label).lower()
112
+
113
  # ---------- Prediction ----------
114
+
115
  def predict_readiness(savings_amount, monthly_income, entertainment_spending,
116
  sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age):
117
 
118
+ # Coerce UI values safely
119
  values = {
120
  "savings_amount": max(0.0, to_float(savings_amount)),
121
  "monthly_income": max(0.0, to_float(monthly_income)),
122
  "entertainment_spending": max(0.0, to_float(entertainment_spending)),
123
  "sales_skills_1_5": clip_range(sales_skills_1_5, 1, 5),
124
+ "independence_1_5": clip_range(independence_1_5, 1, 5),
125
  "risk_tolerance_1_10": clip_range(risk_tolerance_1_10, 1, 10),
126
  "age": int(round(clip_range(age, 13, 100))),
127
  }
128
 
129
+ # Build feature vector in exact training order
130
  x = [values.get(name, 0.0) for name in FEATURE_ORDER]
131
+ X = np.asarray([x], dtype=float)
132
+
133
+ # Try numeric prediction first
134
+ y_pred = reg.predict(X)
135
+ y0 = np.asarray(y_pred).ravel()[0]
136
+
137
+ # Case A: numeric output β†’ clamp and label via thresholds
138
+ if isinstance(y0, (int, float, np.integer, np.floating)):
139
+ score = clip01_100(float(y0))
140
+ label = to_label(score)
141
+ else:
142
+ # Case B: string/label output (e.g., 'low'/'medium'/'high')
143
+ label_str = str(y0).lower()
144
+ score, label = score_from_proba(reg, X, fallback_label=label_str)
145
+
146
+ low_lt, med_lt = thresholds()
147
  return {
148
+ "readiness_score_0_100": round(float(score), 2),
149
+ "readiness_label": label,
150
  "thresholds": f"low<{low_lt}, medium {low_lt}–{med_lt-0.01:.2f}, highβ‰₯{med_lt}",
151
  }
152
 
 
162
  ]
163
 
164
  demo = gr.Interface(
165
+ fn=predict_readiness,
166
+ inputs=inputs,
167
+ outputs="json",
168
  title="Entrepreneurial Readiness β€” Simple Regressor",
169
  description="Enter details to estimate a 0–100 readiness score and label (low/medium/high)."
170
  )