File size: 5,702 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Data efficiency library
See sample config at https://www.deepspeed.ai/docs/config-json/data-efficiency
"""
DATA_EFFICIENCY = "data_efficiency"
DATA_EFFICIENCY_ENABLED = "enabled"
DATA_EFFICIENCY_ENABLED_DEFAULT = False
DATA_EFFICIENCY_SEED = "seed"
DATA_EFFICIENCY_SEED_DEFAULT = 1234
#########################################
# Data efficiency - Data Sampling
#########################################
DATA_SAMPLING = "data_sampling"
DATA_SAMPLING_ENABLED = "enabled"
DATA_SAMPLING_ENABLED_DEFAULT = False
DATA_SAMPLING_NUM_EPOCHS = "num_epochs"
DATA_SAMPLING_NUM_EPOCHS_DEFAULT = 1000
DATA_SAMPLING_NUM_WORKERS = "num_workers"
DATA_SAMPLING_NUM_WORKERS_DEFAULT = 0
DATA_SAMPLING_PIN_MEMORY = "pin_memory"
DATA_SAMPLING_PIN_MEMORY_DEFAULT = False
#########################################
# Data efficiency - Data Sampling - Curriculum Learning
#########################################
CURRICULUM_LEARNING = "curriculum_learning"
CURRICULUM_LEARNING_ENABLED = "enabled"
CURRICULUM_LEARNING_ENABLED_DEFAULT = False
CURRICULUM_LEARNING_CLUSTER_PATH = "data_cluster_path"
CURRICULUM_LEARNING_METRICS = "curriculum_metrics"
CURRICULUM_LEARNING_SAMPLE_PATH = "index_to_sample_path"
CURRICULUM_LEARNING_METRIC_PATH = "index_to_metric_path"
CURRICULUM_LEARNING_CLUSTERING_TYPE = "clustering_type"
CURRICULUM_LEARNING_SINGLE_CLUSTER = "single_cluster"
CURRICULUM_LEARNING_CLUSTER_PREFIX = "cluster"
CURRICULUM_LEARNING_DIFFICULTY_TYPE = "difficulty_type"
CURRICULUM_LEARNING_VALUE_BASED = "value"
CURRICULUM_LEARNING_PERCENTILE_BASED = "percentile"
CURRICULUM_LEARNING_MIN_DIFFICULTY = "min_difficulty"
CURRICULUM_LEARNING_MAX_DIFFICULTY = "max_difficulty"
CURRICULUM_LEARNING_SCHEDULE_TYPE = "schedule_type"
CURRICULUM_LEARNING_SCHEDULE_CONFIG = "schedule_config"
CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY = "difficulty"
CURRICULUM_LEARNING_SCHEDULE_MAX_STEP = "max_step"
CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP = "total_curriculum_step"
CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP = "difficulty_step"
CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE = "root_degree"
CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE = "fixed_discrete"
CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT = "fixed_root"
CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR = "fixed_linear"
CURRICULUM_LEARNING_SCHEDULE_CUSTOM = "custom"
CURRICULUM_LEARNING_CURRENT_DIFFICULTY = "current_difficulty"
CURRICULUM_LEARNING_BATCH = "batch"
CURRICULUM_LEARNING_CONSUMED_SAMPLES = "consumed_samples"
CURRICULUM_LEARNING_STEP = "curriculum_step"
CURRICULUM_LEARNING_CURRENT_DIFFICULTIES = "current_difficulties"
CURRICULUM_LEARNING_DATA_CLUSTER_PATHS = "data_cluster_paths"
CURRICULUM_LEARNING_DATA_CLUSTER_CURRENT_POSITION = "data_cluster_current_position"
CURRICULUM_LEARNING_NP_RNG_STATE = "np_rng_state"
#########################################
# Data efficiency - Dynamic batching and LR scaling
#########################################
DYNAMIC_BATCHING = "dynamic_batching"
DYNAMIC_BATCHING_ENABLED = "enabled"
DYNAMIC_BATCHING_ENABLED_DEFAULT = False
DYNAMIC_BATCHING_METRICS_PATH = "metrics_path"
DYNAMIC_BATCHING_LR_SCALING_METHOD = "lr_scaling_method" # "linear" / "sqrt" / "none"
DYNAMIC_BATCHING_LR_SCALING_METHOD_DEFAULT = "linear"
DYNAMIC_BATCHING_MIN_BATCH_SIZE = "min_batch_size"
DYNAMIC_BATCHING_MIN_BATCH_SIZE_DEFAULT = 1
DYNAMIC_BATCHING_MAX_BATCH_SIZE = "max_batch_size"
DYNAMIC_BATCHING_MAX_BATCH_SIZE_DEFAULT = None
DYNAMIC_BATCHING_SEQUENCE_PICKING_ORDER = "sequence_picking_order" # "random" / "seqlen" / "dataloader"
DYNAMIC_BATCHING_SEQUENCE_PICKING_ORDER_DEFAULT = "dataloader" # "random" / "seqlen" / "dataloader"
DYNAMIC_BATCHING_MAX_TOKENS = "max_tokens"
DYNAMIC_BATCHING_VERBOSE = "verbose"
#########################################
# Curriculum Learning legacy implementation
#########################################
CURRICULUM_LEARNING_LEGACY = "curriculum_learning"
CURRICULUM_ENABLED_LEGACY = "enabled"
CURRICULUM_ENABLED_DEFAULT_LEGACY = False
#########################################
# Data efficiency - Data Routing
#########################################
DATA_ROUTING = "data_routing"
DATA_ROUTING_ENABLED = "enabled"
DATA_ROUTING_ENABLED_DEFAULT = False
#########################################
# Data efficiency - Data Routing - Random LTD
#########################################
RANDOM_LTD = "random_ltd"
RANDOM_LTD_ENABLED = "enabled"
RANDOM_LTD_ENABLED_DEFAULT = False
RANDOM_LTD_MODEL_MASK_NAME = "model_mask_name"
RANDOM_LTD_MODEL_TYPE = "model_type"
RANDOM_LTD_MICRO_BATCH_SIZE = "micro_batch_size"
RANDOM_LTD_GLOBAL_BATCH_SIZE = "global_batch_size"
RANDOM_LTD_SAMPLE_INDEX = "sample_idx"
RANDOM_LTD_ATTENTION_MASK = "attention_mask"
RANDOM_LTD_HIDDEN_STATE_ORDER = "hidden_state_order"
RANDOM_LTD_LAYER_NUM = "random_ltd_layer_num"
RANDOM_LTD_LAYER_ID = "random_ltd_layer_id"
RANDOM_LTD_TOTAL_LAYER_NUM = "total_layer_num"
RANDOM_LTD_CONSUMED_LAYER_TOKENS = "consumed_layer_tokens"
# scheduler
RANDOM_LTD_SCHEDULER = "random_ltd_schedule"
RANDOM_LTD_MAX_VALUE = "max_value"
RANDOM_LTD_MIN_VALUE = "min_value"
RANDOM_LTD_CURRENT_VALUE = "current_value"
RANDOM_LTD_SCHEDULE_CONFIG = "schedule_config"
RANDOM_LTD_INCREASE_STEP = "seq_per_step"
RANDOM_LTD_REQUIRE_STEP = "require_steps"
RANDOM_LTD_SCHEDULER_TYPE = "schedule_type"
RANDOM_LTD_CURR_STEP = "current_steps"
# learning rate schedulers
RANDOM_LTD_LAYER_TOKEN_LR_SCHEDULE = "layer_token_lr_schedule"
RANDOM_LTD_LAYER_TOKEN_LR_ENABLED = "enabled"
RANDOM_LTD_LAYER_TOKEN_LR_ENABLED_DEFAULT = False
RANDOM_LTD_TOTAL_LAYER_TOKENS = "total_layer_tokens"
RANDOM_LTD_WARMUP_TYPE = "warmup_type"
RANDOM_LTD_WARMUP_LAYER_TOKENS = "warmup_layer_tokens"
|