Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /wandb /plot /utils.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

6.77 kB

	from typing import Iterable, Sequence

	import wandb
	from wandb import util


	def test_missing(**kwargs):
	np = util.get_module("numpy", required="Logging plots requires numpy")
	pd = util.get_module("pandas", required="Logging dataframes requires pandas")
	scipy = util.get_module("scipy", required="Logging scipy matrices requires scipy")

	test_passed = True
	for k, v in kwargs.items():
	# Missing/empty params/datapoint arrays
	if v is None:
	wandb.termerror(f"{k} is None. Please try again.")
	test_passed = False
	if (k == "X") or (k == "X_test"):
	if isinstance(v, scipy.sparse.csr.csr_matrix):
	v = v.toarray()
	elif isinstance(v, (pd.DataFrame, pd.Series)):
	v = v.to_numpy()
	elif isinstance(v, list):
	v = np.asarray(v)

	# Warn the user about missing values
	missing = 0
	missing = np.count_nonzero(pd.isnull(v))
	if missing > 0:
	wandb.termwarn("%s contains %d missing values. " % (k, missing))
	test_passed = False
	# Ensure the dataset contains only integers
	non_nums = 0
	if v.ndim == 1:
	non_nums = sum(
	1
	for val in v
	if (
	not isinstance(val, (int, float, complex))
	and not isinstance(val, np.number)
	)
	)
	else:
	non_nums = sum(
	1
	for sl in v
	for val in sl
	if (
	not isinstance(val, (int, float, complex))
	and not isinstance(val, np.number)
	)
	)
	if non_nums > 0:
	wandb.termerror(
	f"{k} contains values that are not numbers. Please vectorize, "
	f"label encode or one hot encode {k} and call the plotting function again."
	)
	test_passed = False
	return test_passed


	def test_fitted(model):
	np = util.get_module("numpy", required="Logging plots requires numpy")
	_ = util.get_module("pandas", required="Logging dataframes requires pandas")
	_ = util.get_module("scipy", required="Logging scipy matrices requires scipy")
	scikit_utils = util.get_module(
	"sklearn.utils",
	required="roc requires the scikit utils submodule, install with `pip install scikit-learn`",
	)
	scikit_exceptions = util.get_module(
	"sklearn.exceptions",
	"roc requires the scikit preprocessing submodule, install with `pip install scikit-learn`",
	)

	try:
	model.predict(np.zeros((7, 3)))
	except scikit_exceptions.NotFittedError:
	wandb.termerror("Please fit the model before passing it in.")
	return False
	except AttributeError:
	# Some clustering models (LDA, PCA, Agglomerative) don't implement ``predict``
	try:
	scikit_utils.validation.check_is_fitted(
	model,
	[
	"coef_",
	"estimator_",
	"labels_",
	"n_clusters_",
	"children_",
	"components_",
	"n_components_",
	"n_iter_",
	"n_batch_iter_",
	"explained_variance_",
	"singular_values_",
	"mean_",
	],
	all_or_any=any,
	)
	except scikit_exceptions.NotFittedError:
	wandb.termerror("Please fit the model before passing it in.")
	return False
	else:
	return True
	except Exception:
	# Assume it's fitted, since ``NotFittedError`` wasn't raised
	return True


	def encode_labels(df):
	_ = util.get_module("pandas", required="Logging dataframes requires pandas")
	preprocessing = util.get_module(
	"sklearn.preprocessing",
	"roc requires the scikit preprocessing submodule, install with `pip install scikit-learn`",
	)

	le = preprocessing.LabelEncoder()
	# apply le on categorical feature columns
	categorical_cols = df.select_dtypes(
	exclude=["int", "float", "float64", "float32", "int32", "int64"]
	).columns
	df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))


	def test_types(**kwargs):
	np = util.get_module("numpy", required="Logging plots requires numpy")
	pd = util.get_module("pandas", required="Logging dataframes requires pandas")
	_ = util.get_module("scipy", required="Logging scipy matrices requires scipy")

	base = util.get_module(
	"sklearn.base",
	"roc requires the scikit base submodule, install with `pip install scikit-learn`",
	)

	test_passed = True
	for k, v in kwargs.items():
	# check for incorrect types
	if (
	(k == "X")
	or (k == "X_test")
	or (k == "y")
	or (k == "y_test")
	or (k == "y_true")
	or (k == "y_probas")
	or (k == "x_labels")
	or (k == "y_labels")
	or (k == "matrix_values")
	):
	# FIXME: do this individually
	if not isinstance(
	v,
	(
	Sequence,
	Iterable,
	np.ndarray,
	np.generic,
	pd.DataFrame,
	pd.Series,
	list,
	),
	):
	wandb.termerror(f"{k} is not an array. Please try again.")
	test_passed = False
	# check for classifier types
	if k == "model":
	if (not base.is_classifier(v)) and (not base.is_regressor(v)):
	wandb.termerror(
	f"{k} is not a classifier or regressor. Please try again."
	)
	test_passed = False
	elif k == "clf" or k == "binary_clf":
	if not (base.is_classifier(v)):
	wandb.termerror(f"{k} is not a classifier. Please try again.")
	test_passed = False
	elif k == "regressor":
	if not base.is_regressor(v):
	wandb.termerror(f"{k} is not a regressor. Please try again.")
	test_passed = False
	elif k == "clusterer":
	if not (getattr(v, "_estimator_type", None) == "clusterer"):
	wandb.termerror(f"{k} is not a clusterer. Please try again.")
	test_passed = False
	return test_passed