# server.py
# FastAPI backend for the Telecom LLM Leaderboard
# ------------------------------------------------
# Developer:
#   Name: Mohamed SANA
#   Email: docteur.mohamed.sana@gmail.com
#
# Quickstart:
#   pip install fastapi uvicorn "pydantic>=2" huggingface_hub python-dotenv
#   uvicorn server:app --reload --port 8000
#
# Configure (optional) environment variables for real HF queue uploads:
#   HF_TOKEN=<your token>
#   QUEUE_REPO=<org-or-user/submission-queue-dataset>
#   RESULTS_REPO=<org-or-user/results-dataset>  # (not used in this fake generator)
#   HF_HOME=<custom cache dir>  # optional
#   USE_LOCAL_DATA=<set true to load data from local file>
#   LOCAL_DATA_FILE=<the filename to load>
#
# The frontend expects:
#   GET  /api/datasets
#   GET  /api/results
#   POST /api/submit

from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel, EmailStr, Field
from datetime import datetime
from typing import List, Literal, Optional, Dict, Any
import os, json, random

# Optional: Hugging Face Hub for queue upload
HF_AVAILABLE = False
try:
    from huggingface_hub import HfApi
    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False

# ---------------- Config ----------------
HF_TOKEN = os.getenv("HF_TOKEN")
QUEUE_REPO = os.getenv("QUEUE_REPO", "otellm/model-submission")       # e.g., "your-org/telecom-eval-queue"
RESULTS_REPO = os.getenv("RESULTS_REPO", "otellm/model-results")   # future: read results remotely if desired
CACHE_PATH = os.getenv("HF_HOME", ".") # cache dir
USE_LOCAL_DATA = os.getenv("USE_LOCAL_DATA", True) # set False to load data from results repo
LOCAL_DATA_FILE = os.getenv("LOCAL_DATA_FILE", "benchmark.json") # the local data to loadt
DATASETS = os.getenv("DATASETS", "3GPP-TSG;NetBench;TeleQna;TeleLogs;TeleMath")

EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "submission-queue")
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "results-queue")

# ---------------- App ----------------
app = FastAPI(title="Telecom LLM Leaderboard API", version="1.0.0")
app.add_middleware(
    CORSMiddleware, allow_origins=["*"], allow_credentials=True,
    allow_methods=["*"], allow_headers=["*"]
)

# ---------------- Data Models ----------------
MetricType = Literal["raw", "llm-as-judge"]

class Score(BaseModel):
    dataset_name: str
    metric_type: MetricType
    score: float
    energy_consumed: float
    co2_consumed: float

class ModelResult(BaseModel):
    provider: str
    name: str
    repo: str
    updated_at: str
    scores: List[Score]

class SubmitPayload(BaseModel):
    model_provider: str = Field(..., examples=["TelcoAI"])
    model_name: str = Field(..., examples=["T-LLM-7B"])
    hf_repo: str = Field(..., examples=["telcoai/t-llm-7b"])
    contact_email: EmailStr
    notes: Optional[str] = ""


if isinstance(DATASETS, str):
    if ';' in DATASETS:
        DATASETS = DATASETS.split(";")
    elif '|' in DATASETS:
        DATASETS = DATASETS.split("|")
    elif ',' in DATASETS:
        DATASETS = DATASETS.split(",")
    else:
        DATASETS = DATASETS.split(" ")

    DATASETS = list(map(lambda s: s.strip(), DATASETS))


# ---------- fake data for test purpose
FAKE_MODELS = [
    ("Qwen", "Qwen3-32B", "qwen/qwen3-32b"),
    ("Qwen", "QwQ-32B", "qwen/qwq-32b"),
    ("OpenAI", "GPT-OSS-120B", "openai/gpt-oss-120b"),
    ("OpenAI", "GPT-OSS-20B", "openai/gpt-oss-20b"),
    ("DeepSeek", "R1-Distill-Llama-70B", "deepseek/r1-distill-llama-70b"),
    ("ByteDance", "Seed-OSS-36B", "bytedance/seed-oss-36b"),
    ("LLama", "Llama-8B", "llama/llama-8B-instruct"),
]

def _random_score() -> float:
    return round(random.uniform(45.0, 62.0), 2)

def generate_fake_model(provider: str, name: str, repo: str) -> ModelResult:
    metric_types = ["raw", "llm-as-judge"]
    now = datetime.utcnow().isoformat() + "Z"
    scores = []
    for d in DATASETS:
        mt = random.choice(metric_types)
        scores.append(Score(
            dataset_name=d,
            metric_type=mt, score=_random_score(),
            energy_consumed=round(random.uniform(0.1, 2.5), 3),
            co2_consumed=round(random.uniform(0.05, 1.0), 3),
        ))
    return ModelResult(provider=provider, name=name, repo=repo, updated_at=now, scores=scores)

def generate_fake_results() -> Dict[str, Any]:
    models = [generate_fake_model(p, n, r) for (p, n, r) in FAKE_MODELS]
    return {"models": [m.model_dump() for m in models]}


def load_benchmark() -> Dict[str, Any]:
    with open(LOCAL_DATA_FILE, 'r') as f:
        return json.load(f)

# ---------------- HF Upload Helper ----------------
def push_submission_to_queue(eval_entry: Dict[str, Any]) -> str:
    """
    Writes a JSON file locally under HF_HOME/submission-queue/<provider>/<model>_submission_<version>.json
    and uploads it to the dataset repo specified by QUEUE_REPO (if configured).
    Returns a submission id string.
    """
    model_provider = eval_entry.get("model_provider", "unknown-provider")
    model_name = eval_entry.get("model_name", "unknown-model")
    version = eval_entry.get("version", "v1")

    OUT_DIR = os.path.join(EVAL_REQUESTS_PATH, model_provider)
    os.makedirs(OUT_DIR, exist_ok=True)
    out_path = os.path.join(OUT_DIR, f"{model_name}_submission_{version}.json")

    with open(out_path, "w", encoding="utf-8") as f:
        f.write(json.dumps(eval_entry, ensure_ascii=False, indent=2))

    upload_ok = False
    if HF_AVAILABLE and HF_TOKEN and QUEUE_REPO:
        api = HfApi(token=HF_TOKEN)
        # path in repo: <provider>/<file>
        print("-------------", out_path.split("submission-queue/"), out_path)
        if '\\' in out_path:
            out_path = out_path.replace('\\', '/')
        path_in_repo = out_path.split("submission-queue/")[1]

        api.upload_file(
            path_or_fileobj=out_path,
            path_in_repo=path_in_repo,
            repo_id=QUEUE_REPO,
            repo_type="dataset",
            commit_message=f"Add {model_name} to eval queue",
        )
        upload_ok = True

    return f"{model_provider}-{model_name}-{version}{'' if upload_ok else '-local'}"

# ---------------- Routes ----------------

@app.get("/", include_in_schema=False)
async def index():
    return FileResponse("index.html")


@app.get("/api/health")
def health():
    return {"ok": True, "huggingface_available": HF_AVAILABLE, "queue_repo": QUEUE_REPO, "result_repo": RESULTS_REPO}

@app.get("/api/datasets")
def get_datasets():
    return {"datasets": DATASETS}

@app.get("/api/models")
def get_submitted_models():
    models = []

    if HF_AVAILABLE and HF_TOKEN and QUEUE_REPO:
        api = HfApi(token=HF_TOKEN)
        # path in repo: <provider>/<file>
        api.snapshot_download(
            repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
        )

        models_filepaths = []

        for root, _, files in os.walk(EVAL_REQUESTS_PATH):
            # We should only have json files in model results
            if len(files) == 0 or any([not f.endswith(".json") for f in files]):
                continue

            for file in files:
                models_filepaths.append(os.path.join(root, file))

        for filepath in models_filepaths:
            try:
                with open(filepath, 'r', encoding="utf-8") as f:
                    model: dict = json.load(f)

                    # --- security issue
                    model.pop("contact_email")

                    models.append(model)
            except:
                continue
    
    return {"models": models}


@app.get("/api/results")
def get_results():
    results = []

    if USE_LOCAL_DATA:
        return {"models": load_benchmark()}

    if HF_AVAILABLE and HF_TOKEN and QUEUE_REPO:
        api = HfApi(token=HF_TOKEN)
        # path in repo: <provider>/<file>
        api.snapshot_download(
            repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
        )

        model_result_filepaths = []

        for root, _, files in os.walk(EVAL_RESULTS_PATH):
            # We should only have json files in model results
            if len(files) == 0 or any([not f.endswith(".json") for f in files]):
                continue

            for file in files:
                model_result_filepaths.append(os.path.join(root, file))

        for filepath in model_result_filepaths:
            try:
                with open(filepath, 'r', encoding="utf-8") as f:
                    results.append(json.load(f))
            except:
                continue
        
            # print("----------------", results)

    return {"models": results}


    # For simplicity, this returns generated fake data.
    # If you want to read from a RESULTS_REPO, implement a reader here that loads
    # per-model folders and aggregates JSON files into the required shape.
    # return generate_fake_results()

@app.post("/api/submit")
def submit_model(payload: SubmitPayload = Body(...)):
    ts = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    eval_entry = {
        "model_provider": payload.model_provider,
        "model_name": payload.model_name,
        "hf_repo": payload.hf_repo,
        "contact_email": payload.contact_email,
        "notes": payload.notes or "",
        "status": "pending",
        "version": ts,
        "submitted_at": datetime.utcnow().isoformat() + "Z",
    }

    # Is the model info correctly filled?
    try:
        _ = HfApi.model_info(repo_id=payload.model_name)
    except Exception as e:
        # If queue upload fails, still persist locally and report an error
        raise HTTPException(status_code=500, detail=f"Could not get your model information. Please fill it up properly.")
    
    try:
        submission_id = push_submission_to_queue(eval_entry)
        return {"status": "pending", "id": submission_id}
    except Exception as e:
        # If queue upload fails, still persist locally and report an error
        raise HTTPException(status_code=500, detail=f"Failed to queue submission: {e}")

# ---------------- Optional: run via `python server.py` ----------------
if __name__ == "__main__":
    import uvicorn
    uvicorn.run("server:app", host="0.0.0.0", port=7600, reload=True)