Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /deepspeed /runtime /precision_config.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

3.61 kB

	# Copyright (c) Microsoft Corporation.
	# SPDX-License-Identifier: Apache-2.0

	# DeepSpeed Team

	from deepspeed.runtime.config_utils import DeepSpeedConfigModel
	from .fp16.loss_scaler import (
	INITIAL_LOSS_SCALE,
	SCALE_WINDOW,
	DELAYED_SHIFT,
	CONSECUTIVE_HYSTERESIS,
	MIN_LOSS_SCALE,
	)

	#########################################
	# BFLOAT16 support
	#########################################
	# BFLOAT16 feature. By default, this feature is not enabled.
	# Users can configure in ds_config.json as below example:
	BFLOAT16_FORMAT = '''
	BFLOAT16 parameters should be of the format:
	"bf16": {
	"enabled": true,
	"immediate_grad_update": false,
	"check_grad_overflow": false
	}
	'''
	BFLOAT16 = "bf16"
	BFLOAT16_OLD = "bfloat16" # keeping for backwards compatibility


	def get_bfloat16_config(param_dict):
	bf16_config_dict = param_dict.get(BFLOAT16, None)
	if bf16_config_dict is None:
	bf16_config_dict = param_dict.get(BFLOAT16_OLD, {})
	return DeepSpeedBF16Config(**bf16_config_dict)


	class DeepSpeedBF16Config(DeepSpeedConfigModel):
	"""
	For bfloat16 configuration
	"""

	enabled: bool = False
	"""
	Enable bfloat16 mixed-precision training/inference
	"""

	immediate_grad_update: bool = False
	"""
	Apply gradient updates immediately rather than delayed.
	"""

	check_grad_overflow: bool = False
	"""
	Check for gradient overflows and underflows
	"""


	#########################################
	# FP16 support
	#########################################
	# FP16 feature. By default, this feature is not enabled.
	# Users can configure in ds_config.json as below example:
	FP16_FORMAT = '''
	FP16 parameters should be of the format:
	"fp16": {
	"enabled": true,
	"auto_cast": false,
	"loss_scale": 0,
	"initial_scale_power": 16,
	"loss_scale_window": 1000,
	"hysteresis": 2,
	"consecutive_hysteresis": false,
	"min_loss_scale": 1
	}
	'''
	FP16 = "fp16"


	def get_float16_config(param_dict):
	fp16_config_dict = param_dict.get(FP16, {})
	return DeepSpeedFP16Config(**fp16_config_dict)


	class DeepSpeedFP16Config(DeepSpeedConfigModel):
	"""
	For float16 configuration
	"""

	enabled: bool = False
	"""
	Enable fp16 mixed-precision training/inference
	"""

	auto_cast: bool = False
	"""
	Automatically cast inputs to fp16
	"""

	loss_scale: float = 0
	"""
	Loss scaling value. Default value of 0 means dynamic loss scaling instead of static loss scale.
	"""

	initial_scale_power: int = 16
	"""
	For dynamic loss scaling, set initial loss scale to 2^{initial_scale_power}.
	"""

	loss_scale_window: int = 1000
	"""
	Iteration intervals for raising/lowering dynamic loss scale value.
	"""

	hysteresis: int = 2
	"""
	Delay shift in dynamic loss scaling.
	"""

	consecutive_hysteresis: bool = False
	"""
	Refill hysteresis if iteration does not overflow/underflow.
	"""

	min_loss_scale: int = 1
	"""
	Minimum dynamic loss scale value.
	"""

	fp16_master_weights_and_grads: bool = False
	"""
	Maintain master weights in optimizer state as fp16 instead of fp32 (valid with DeepSpeedCPUAdam only).
	"""

	def initial_dynamic_scale(self):
	return 2**self.initial_scale_power

	def dynamic_loss_scale_args(self):
	return {
	INITIAL_LOSS_SCALE: 2**self.initial_scale_power,
	SCALE_WINDOW: self.loss_scale_window,
	DELAYED_SHIFT: self.hysteresis,
	CONSECUTIVE_HYSTERESIS: self.consecutive_hysteresis,
	MIN_LOSS_SCALE: self.min_loss_scale,
	}