Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /wandb /integration /xgboost /xgboost.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

6.5 kB

	"""xgboost init!"""

	import json
	import warnings
	from pathlib import Path
	from typing import TYPE_CHECKING, cast

	import xgboost as xgb # type: ignore
	from xgboost import Booster

	import wandb
	from wandb.sdk.lib import telemetry as wb_telemetry

	MINIMIZE_METRICS = [
	"rmse",
	"rmsle",
	"mae",
	"mape",
	"mphe",
	"logloss",
	"error",
	"error@t",
	"merror",
	]

	MAXIMIZE_METRICS = ["auc", "aucpr", "ndcg", "map", "ndcg@n", "map@n"]


	if TYPE_CHECKING:
	from typing import Callable, List, NamedTuple

	class CallbackEnv(NamedTuple):
	evaluation_result_list: List


	def wandb_callback() -> "Callable":
	"""Old style callback that will be deprecated in favor of WandbCallback. Please try the new logger for more features."""
	warnings.warn(
	"wandb_callback will be deprecated in favor of WandbCallback. Please use WandbCallback for more features.",
	UserWarning,
	stacklevel=2,
	)

	with wb_telemetry.context() as tel:
	tel.feature.xgboost_old_wandb_callback = True

	def callback(env: "CallbackEnv") -> None:
	for k, v in env.evaluation_result_list:
	wandb.log({k: v}, commit=False)
	wandb.log({})

	return callback


	class WandbCallback(xgb.callback.TrainingCallback):
	"""`WandbCallback` automatically integrates XGBoost with wandb.

	Args:
	log_model: (boolean) if True save and upload the model to Weights & Biases Artifacts
	log_feature_importance: (boolean) if True log a feature importance bar plot
	importance_type: (str) one of {weight, gain, cover, total_gain, total_cover} for tree model. weight for linear model.
	define_metric: (boolean) if True (default) capture model performance at the best step, instead of the last step, of training in your `wandb.summary`.

	Passing `WandbCallback` to XGBoost will:

	- log the booster model configuration to Weights & Biases
	- log evaluation metrics collected by XGBoost, such as rmse, accuracy etc. to Weights & Biases
	- log training metric collected by XGBoost (if you provide training data to eval_set)
	- log the best score and the best iteration
	- save and upload your trained model to Weights & Biases Artifacts (when `log_model = True`)
	- log feature importance plot when `log_feature_importance=True` (default).
	- Capture the best eval metric in `wandb.summary` when `define_metric=True` (default).

	Example:
	```python
	bst_params = dict(
	objective="reg:squarederror",
	colsample_bytree=0.3,
	learning_rate=0.1,
	max_depth=5,
	alpha=10,
	n_estimators=10,
	tree_method="hist",
	callbacks=[WandbCallback()],
	)

	xg_reg = xgb.XGBRegressor(**bst_params)
	xg_reg.fit(
	X_train,
	y_train,
	eval_set=[(X_test, y_test)],
	)
	```
	"""

	def __init__(
	self,
	log_model: bool = False,
	log_feature_importance: bool = True,
	importance_type: str = "gain",
	define_metric: bool = True,
	):
	self.log_model: bool = log_model
	self.log_feature_importance: bool = log_feature_importance
	self.importance_type: str = importance_type
	self.define_metric: bool = define_metric

	if wandb.run is None:
	raise wandb.Error("You must call wandb.init() before WandbCallback()")

	with wb_telemetry.context() as tel:
	tel.feature.xgboost_wandb_callback = True

	def before_training(self, model: Booster) -> Booster:
	"""Run before training is finished."""
	# Update W&B config
	config = model.save_config()
	wandb.config.update(json.loads(config))

	return model

	def after_training(self, model: Booster) -> Booster:
	"""Run after training is finished."""
	# Log the booster model as artifacts
	if self.log_model:
	self._log_model_as_artifact(model)

	# Plot feature importance
	if self.log_feature_importance:
	self._log_feature_importance(model)

	# Log the best score and best iteration
	if model.attr("best_score") is not None:
	wandb.log(
	{
	"best_score": float(cast(str, model.attr("best_score"))),
	"best_iteration": int(cast(str, model.attr("best_iteration"))),
	}
	)

	return model

	def after_iteration(self, model: Booster, epoch: int, evals_log: dict) -> bool:
	"""Run after each iteration. Return True when training should stop."""
	# Log metrics
	for data, metric in evals_log.items():
	for metric_name, log in metric.items():
	if self.define_metric:
	self._define_metric(data, metric_name)
	wandb.log({f"{data}-{metric_name}": log[-1]}, commit=False)
	else:
	wandb.log({f"{data}-{metric_name}": log[-1]}, commit=False)

	wandb.log({"epoch": epoch})

	self.define_metric = False

	return False

	def _log_model_as_artifact(self, model: Booster) -> None:
	model_name = f"{wandb.run.id}_model.json" # type: ignore
	model_path = Path(wandb.run.dir) / model_name # type: ignore
	model.save_model(str(model_path))

	model_artifact = wandb.Artifact(name=model_name, type="model")
	model_artifact.add_file(str(model_path))
	wandb.log_artifact(model_artifact)

	def _log_feature_importance(self, model: Booster) -> None:
	fi = model.get_score(importance_type=self.importance_type)
	fi_data = [[k, fi[k]] for k in fi]
	table = wandb.Table(data=fi_data, columns=["Feature", "Importance"])
	wandb.log(
	{
	"Feature Importance": wandb.plot.bar(
	table, "Feature", "Importance", title="Feature Importance"
	)
	}
	)

	def _define_metric(self, data: str, metric_name: str) -> None:
	if "loss" in str.lower(metric_name):
	wandb.define_metric(f"{data}-{metric_name}", summary="min")
	elif str.lower(metric_name) in MINIMIZE_METRICS:
	wandb.define_metric(f"{data}-{metric_name}", summary="min")
	elif str.lower(metric_name) in MAXIMIZE_METRICS:
	wandb.define_metric(f"{data}-{metric_name}", summary="max")
	else:
	pass