|
|
|
|
|
|
|
|
|
""" |
|
Data efficiency library |
|
See sample config at https://www.deepspeed.ai/docs/config-json/data-efficiency |
|
""" |
|
DATA_EFFICIENCY = "data_efficiency" |
|
DATA_EFFICIENCY_ENABLED = "enabled" |
|
DATA_EFFICIENCY_ENABLED_DEFAULT = False |
|
DATA_EFFICIENCY_SEED = "seed" |
|
DATA_EFFICIENCY_SEED_DEFAULT = 1234 |
|
|
|
|
|
|
|
|
|
DATA_SAMPLING = "data_sampling" |
|
DATA_SAMPLING_ENABLED = "enabled" |
|
DATA_SAMPLING_ENABLED_DEFAULT = False |
|
DATA_SAMPLING_NUM_EPOCHS = "num_epochs" |
|
DATA_SAMPLING_NUM_EPOCHS_DEFAULT = 1000 |
|
DATA_SAMPLING_NUM_WORKERS = "num_workers" |
|
DATA_SAMPLING_NUM_WORKERS_DEFAULT = 0 |
|
DATA_SAMPLING_PIN_MEMORY = "pin_memory" |
|
DATA_SAMPLING_PIN_MEMORY_DEFAULT = False |
|
|
|
|
|
|
|
|
|
CURRICULUM_LEARNING = "curriculum_learning" |
|
CURRICULUM_LEARNING_ENABLED = "enabled" |
|
CURRICULUM_LEARNING_ENABLED_DEFAULT = False |
|
CURRICULUM_LEARNING_CLUSTER_PATH = "data_cluster_path" |
|
CURRICULUM_LEARNING_METRICS = "curriculum_metrics" |
|
CURRICULUM_LEARNING_SAMPLE_PATH = "index_to_sample_path" |
|
CURRICULUM_LEARNING_METRIC_PATH = "index_to_metric_path" |
|
CURRICULUM_LEARNING_CLUSTERING_TYPE = "clustering_type" |
|
CURRICULUM_LEARNING_SINGLE_CLUSTER = "single_cluster" |
|
CURRICULUM_LEARNING_CLUSTER_PREFIX = "cluster" |
|
CURRICULUM_LEARNING_DIFFICULTY_TYPE = "difficulty_type" |
|
CURRICULUM_LEARNING_VALUE_BASED = "value" |
|
CURRICULUM_LEARNING_PERCENTILE_BASED = "percentile" |
|
CURRICULUM_LEARNING_MIN_DIFFICULTY = "min_difficulty" |
|
CURRICULUM_LEARNING_MAX_DIFFICULTY = "max_difficulty" |
|
CURRICULUM_LEARNING_SCHEDULE_TYPE = "schedule_type" |
|
CURRICULUM_LEARNING_SCHEDULE_CONFIG = "schedule_config" |
|
CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY = "difficulty" |
|
CURRICULUM_LEARNING_SCHEDULE_MAX_STEP = "max_step" |
|
CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP = "total_curriculum_step" |
|
CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP = "difficulty_step" |
|
CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE = "root_degree" |
|
CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE = "fixed_discrete" |
|
CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT = "fixed_root" |
|
CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR = "fixed_linear" |
|
CURRICULUM_LEARNING_SCHEDULE_CUSTOM = "custom" |
|
CURRICULUM_LEARNING_CURRENT_DIFFICULTY = "current_difficulty" |
|
|
|
CURRICULUM_LEARNING_BATCH = "batch" |
|
CURRICULUM_LEARNING_CONSUMED_SAMPLES = "consumed_samples" |
|
CURRICULUM_LEARNING_STEP = "curriculum_step" |
|
CURRICULUM_LEARNING_CURRENT_DIFFICULTIES = "current_difficulties" |
|
CURRICULUM_LEARNING_DATA_CLUSTER_PATHS = "data_cluster_paths" |
|
CURRICULUM_LEARNING_DATA_CLUSTER_CURRENT_POSITION = "data_cluster_current_position" |
|
CURRICULUM_LEARNING_NP_RNG_STATE = "np_rng_state" |
|
|
|
|
|
|
|
|
|
DYNAMIC_BATCHING = "dynamic_batching" |
|
DYNAMIC_BATCHING_ENABLED = "enabled" |
|
DYNAMIC_BATCHING_ENABLED_DEFAULT = False |
|
DYNAMIC_BATCHING_METRICS_PATH = "metrics_path" |
|
DYNAMIC_BATCHING_LR_SCALING_METHOD = "lr_scaling_method" |
|
DYNAMIC_BATCHING_LR_SCALING_METHOD_DEFAULT = "linear" |
|
DYNAMIC_BATCHING_MIN_BATCH_SIZE = "min_batch_size" |
|
DYNAMIC_BATCHING_MIN_BATCH_SIZE_DEFAULT = 1 |
|
DYNAMIC_BATCHING_MAX_BATCH_SIZE = "max_batch_size" |
|
DYNAMIC_BATCHING_MAX_BATCH_SIZE_DEFAULT = None |
|
DYNAMIC_BATCHING_SEQUENCE_PICKING_ORDER = "sequence_picking_order" |
|
DYNAMIC_BATCHING_SEQUENCE_PICKING_ORDER_DEFAULT = "dataloader" |
|
DYNAMIC_BATCHING_MAX_TOKENS = "max_tokens" |
|
DYNAMIC_BATCHING_VERBOSE = "verbose" |
|
|
|
|
|
|
|
|
|
CURRICULUM_LEARNING_LEGACY = "curriculum_learning" |
|
|
|
CURRICULUM_ENABLED_LEGACY = "enabled" |
|
CURRICULUM_ENABLED_DEFAULT_LEGACY = False |
|
|
|
|
|
|
|
|
|
DATA_ROUTING = "data_routing" |
|
DATA_ROUTING_ENABLED = "enabled" |
|
DATA_ROUTING_ENABLED_DEFAULT = False |
|
|
|
|
|
|
|
|
|
RANDOM_LTD = "random_ltd" |
|
RANDOM_LTD_ENABLED = "enabled" |
|
RANDOM_LTD_ENABLED_DEFAULT = False |
|
|
|
RANDOM_LTD_MODEL_MASK_NAME = "model_mask_name" |
|
RANDOM_LTD_MODEL_TYPE = "model_type" |
|
RANDOM_LTD_MICRO_BATCH_SIZE = "micro_batch_size" |
|
RANDOM_LTD_GLOBAL_BATCH_SIZE = "global_batch_size" |
|
RANDOM_LTD_SAMPLE_INDEX = "sample_idx" |
|
RANDOM_LTD_ATTENTION_MASK = "attention_mask" |
|
RANDOM_LTD_HIDDEN_STATE_ORDER = "hidden_state_order" |
|
RANDOM_LTD_LAYER_NUM = "random_ltd_layer_num" |
|
RANDOM_LTD_LAYER_ID = "random_ltd_layer_id" |
|
RANDOM_LTD_TOTAL_LAYER_NUM = "total_layer_num" |
|
RANDOM_LTD_CONSUMED_LAYER_TOKENS = "consumed_layer_tokens" |
|
|
|
|
|
RANDOM_LTD_SCHEDULER = "random_ltd_schedule" |
|
RANDOM_LTD_MAX_VALUE = "max_value" |
|
RANDOM_LTD_MIN_VALUE = "min_value" |
|
RANDOM_LTD_CURRENT_VALUE = "current_value" |
|
RANDOM_LTD_SCHEDULE_CONFIG = "schedule_config" |
|
RANDOM_LTD_INCREASE_STEP = "seq_per_step" |
|
RANDOM_LTD_REQUIRE_STEP = "require_steps" |
|
RANDOM_LTD_SCHEDULER_TYPE = "schedule_type" |
|
RANDOM_LTD_CURR_STEP = "current_steps" |
|
|
|
|
|
RANDOM_LTD_LAYER_TOKEN_LR_SCHEDULE = "layer_token_lr_schedule" |
|
RANDOM_LTD_LAYER_TOKEN_LR_ENABLED = "enabled" |
|
RANDOM_LTD_LAYER_TOKEN_LR_ENABLED_DEFAULT = False |
|
RANDOM_LTD_TOTAL_LAYER_TOKENS = "total_layer_tokens" |
|
RANDOM_LTD_WARMUP_TYPE = "warmup_type" |
|
RANDOM_LTD_WARMUP_LAYER_TOKENS = "warmup_layer_tokens" |
|
|