import os
import json
import joblib
import numpy as np
import gradio as gr
from typing import Any, Dict, List, Tuple
from huggingface_hub import hf_hub_download
import re

# ---------- Config ----------
MODEL_REPO_ID = "kirakayy/entrepreneurial-readiness"   # change if you fork
TOKEN = os.getenv("HF_TOKEN", None)  # add in Space Settings → Secrets if repo is private

# ---------- Safe numeric helpers ----------
NUM_RE = re.compile(r"[-+]?\d*\.?\d+")

def _try_num(x: Any) -> float | None:
    if x is None:
        return None
    s = str(x).strip().lower()
    mult = 1.0
    if "k" in s and re.search(r"\d", s):
        s = s.replace("k", ""); mult = 1000.0
    if "m" in s and re.search(r"\d", s):
        s = s.replace("m", ""); mult = 1_000_000.0
    s = re.sub(r"[^0-9.\-]", "", s)
    if s in {"", "-", "."}:
        return None
    try:
        return float(s) * mult
    except Exception:
        m = NUM_RE.search(str(x))
        return float(m.group(0)) if m else None

def to_float(x: Any, default: float = 0.0) -> float:
    if isinstance(x, (int, float, np.integer, np.floating)):
        return float(x)
    v = _try_num(x)
    return float(v) if v is not None else float(default)

def clip_range(x: Any, lo: float, hi: float) -> float:
    v = to_float(x, lo)
    return float(np.clip(v, lo, hi))

def clip01_100(x: Any) -> float:
    v = to_float(x, 0.0)
    return float(np.clip(v, 0.0, 100.0))

# ---------- Load artifacts ----------
model_path  = hf_hub_download(repo_id=MODEL_REPO_ID, filename="model.joblib",          token=TOKEN)
feat_path   = hf_hub_download(repo_id=MODEL_REPO_ID, filename="feature_order.json",    token=TOKEN)
thresh_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="score_thresholds.json", token=TOKEN)

reg = joblib.load(model_path)
with open(feat_path) as f:
    FEATURE_ORDER: List[str] = json.load(f)
with open(thresh_path) as f:
    THRESH: Dict[str, float] = json.load(f)

# ---------- Label helpers ----------

def thresholds() -> Tuple[float, float]:
    return THRESH.get("low_lt", 40.0), THRESH.get("medium_lt", 70.0)


def to_label(score: float) -> str:
    low_lt, med_lt = thresholds()
    return "low" if score < low_lt else ("medium" if score < med_lt else "high")


def anchors_from_thresholds() -> Dict[str, float]:
    low_lt, med_lt = thresholds()
    # Midpoints of the buckets: [0, low), [low, med), [med, 100]
    return {
        "low": low_lt / 2.0,                         # e.g., 20 if low_lt=40
        "medium": (low_lt + med_lt) / 2.0,          # e.g., 55 if 40/70
        "high": (med_lt + 100.0) / 2.0              # e.g., 85 if 70/100
    }


def score_from_label_only(label: str) -> float:
    a = anchors_from_thresholds()
    return float(a.get(str(label).lower(), a["medium"]))


def score_from_proba(model, X: np.ndarray, fallback_label: str | None = None) -> Tuple[float, str]:
    anchors = anchors_from_thresholds()
    if hasattr(model, "predict_proba"):
        try:
            proba = model.predict_proba(X)[0]
            classes = getattr(model, "classes_", None)
            if classes is None:
                # assume order low, medium, high
                classes = np.array(["low", "medium", "high"], dtype=object)
            # Normalize labels to lowercase strings
            classes_l = [str(c).lower() for c in classes]
            # Expected score = sum(p_c * anchor_c)
            expected = 0.0
            for c, p in zip(classes_l, proba):
                expected += float(p) * float(anchors.get(c, anchors["medium"]))
            # Choose label: max-proba if not provided
            pred_label = fallback_label
            if pred_label is None:
                pred_label = classes_l[int(np.argmax(proba))]
            return clip01_100(expected), pred_label
        except Exception:
            pass
    # No predict_proba → map the label to its anchor
    if fallback_label is None:
        fallback_label = "medium"
    return clip01_100(score_from_label_only(fallback_label)), str(fallback_label).lower()

# ---------- Prediction ----------

def predict_readiness(savings_amount, monthly_income, entertainment_spending,
                      sales_skills_1_5, independence_1_5, risk_tolerance_1_10, age):

    # Coerce UI values safely
    values = {
        "savings_amount":         max(0.0, to_float(savings_amount)),
        "monthly_income":         max(0.0, to_float(monthly_income)),
        "entertainment_spending": max(0.0, to_float(entertainment_spending)),
        "sales_skills_1_5":       clip_range(sales_skills_1_5, 1, 5),
        "independence_1_5":       clip_range(independence_1_5, 1, 5),
        "risk_tolerance_1_10":    clip_range(risk_tolerance_1_10, 1, 10),
        "age":                    int(round(clip_range(age, 13, 100))),
    }

    # Build feature vector in exact training order
    x = [values.get(name, 0.0) for name in FEATURE_ORDER]
    X = np.asarray([x], dtype=float)

    # Try numeric prediction first
    y_pred = reg.predict(X)
    y0 = np.asarray(y_pred).ravel()[0]

    # Case A: numeric output → clamp and label via thresholds
    if isinstance(y0, (int, float, np.integer, np.floating)):
        score = clip01_100(float(y0))
        label = to_label(score)
    else:
        # Case B: string/label output (e.g., 'low'/'medium'/'high')
        label_str = str(y0).lower()
        score, label = score_from_proba(reg, X, fallback_label=label_str)

    low_lt, med_lt = thresholds()
    return {
        "readiness_score_0_100": round(float(score), 2),
        "readiness_label": label,
        "thresholds": f"low<{low_lt}, medium {low_lt}–{med_lt-0.01:.2f}, high≥{med_lt}",
    }

# ---------- Gradio UI ----------
inputs = [
    gr.Number(label="Savings Amount ($)", value=5000),
    gr.Number(label="Monthly Income ($)", value=3200),
    gr.Number(label="Entertainment Spending ($/mo)", value=150),
    gr.Slider(1, 5, step=1, value=4, label="Sales Skills (1–5)"),
    gr.Slider(1, 5, step=1, value=2, label="Independence (1=independent, 5=very dependent)"),
    gr.Slider(1, 10, step=1, value=7, label="Risk Tolerance (1–10)"),
    gr.Slider(13, 100, step=1, value=24, label="Age"),
]

demo = gr.Interface(
    fn=predict_readiness,
    inputs=inputs,
    outputs="json",
    title="Entrepreneurial Readiness — Simple Regressor",
    description="Enter details to estimate a 0–100 readiness score and label (low/medium/high)."
)

if __name__ == "__main__":
    demo.launch()