|
|
|
|
|
|
|
|
|
""" |
|
Various symbolic constants used for model checkpointing |
|
""" |
|
|
|
|
|
|
|
|
|
OPTIMIZER_STATE_DICT = "optimizer_state_dict" |
|
FP32_GROUPS = "fp32_groups" |
|
FP32_FLAT_GROUPS = 'fp32_flat_groups' |
|
|
|
BASE_OPTIMIZER_STATE = 'base_optimizer_state' |
|
BASE_OPTIMIZER_STATE_STEP = 'base_optimizer_state_step' |
|
SINGLE_PARTITION_OF_FP32_GROUPS = "single_partition_of_fp32_groups" |
|
PARAM_GROUPS = 'param_groups' |
|
GROUP_PADDINGS = 'group_paddings' |
|
PARTITION_COUNT = 'partition_count' |
|
ZERO_STAGE = 'zero_stage' |
|
CLIP_GRAD = 'clip_grad' |
|
FP32_WEIGHT_KEY = "fp32" |
|
LOSS_SCALER = 'loss_scaler' |
|
|
|
|
|
|
|
|
|
PARAM = 'param' |
|
PARAM_SHAPES = 'param_shapes' |
|
BUFFER_NAMES = 'buffer_names' |
|
FROZEN_PARAM_SHAPES = 'frozen_param_shapes' |
|
FROZEN_PARAM_FRAGMENTS = 'frozen_param_fragments' |
|
|
|
|
|
|
|
|
|
MODEL_FILE_PREFIX = 'mp_rank_' |
|
ZERO_FILE_PREFIX = 'zero_pp_rank_' |
|
OPTIM_FILE_SUFFIX = '_optim_states.pt' |
|
MODEL_FILE_SUFFIX = '_model_states.pt' |
|
LAYER_FILE_PREFIX = 'layer_' |
|
BF16_ZERO_FILE_PREFIX = 'bf16_' + ZERO_FILE_PREFIX |
|
FP16_ZERO_FILE_PREFIX = 'fp16_' + ZERO_FILE_PREFIX |
|
|
|
|
|
|
|
|
|
DS_VERSION = 'ds_version' |
|
|
|
|
|
|
|
|
|
UNIVERSAL_CHECKPOINT_INFO = 'universal_checkpoint_info' |
|
UNIVERSAL_CHECKPOINT_VERSION_KEY = 'universal_checkpoint_version' |
|
|
|
UNIVERSAL_CHECKPOINT_VERSION_VALUE = 0.2 |
|
|
|
|
|
VOCAB_TENSOR = 'vocab_tensor' |
|
PADDED_VOCAB_SIZE = 'padded_vocab_size' |
|
ORIGINAL_VOCAB_SIZE = 'original_vocab_size' |
|
|
|
|
|
PARAM_SLICE_MAPPINGS = 'param_slice_mappings' |
|
CAT_DIM = "cat_dim" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PARAM_N_SUB_PARAMS = "param_n_sub_params" |
|
|
|
SUB_PARAM_SHAPE = "sub_param_shape" |
|
|
|
|
|
VOCABULARY_PARAMETER_PATTERNS = 'vocabulary_parameter_patterns' |
|
PIPELINE_REPLICATED_PARAMETER_PATTERNS = 'pipeline_replicated_parameter_patterns' |
|
PARAMETER_TO_AVERAGE_PATTERNS = 'parameter_to_average_patterns' |
|
PARAMETER_WITH_ROW_PARALLELISM_PATTERNS = 'parameter_with_row_parallelism_patterns' |
|
TP_REPLICATED_PARAMETER_PATTERNS = 'tp_replicated_parameter_patterns' |
|
PARAMETER_WITH_2_SUB_PARAMS_CAT_DIM_0 = 'parameter_with_2_sub_params_cat_dim_0' |
|
PARAMETER_WITH_SUB_PARAMS = 'parameter_with_sub_params' |
|
SUB_PARAMS_SHAPE = 'sub_params_shape' |
|
|