File size: 5,258 Bytes
9c6594c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

from typing import Optional

from pydantic import model_validator
from deepspeed.runtime.config_utils import DeepSpeedConfigModel


def get_monitor_config(param_dict):
    monitor_dict = {key: param_dict.get(key, {}) for key in ("tensorboard", "wandb", "csv_monitor", "comet")}
    return DeepSpeedMonitorConfig(**monitor_dict)


class TensorBoardConfig(DeepSpeedConfigModel):
    """Sets parameters for TensorBoard monitor."""

    enabled: bool = False
    """ Whether logging to Tensorboard is enabled. Requires `tensorboard` package is installed. """

    output_path: str = ""
    """
    Path to where the Tensorboard logs will be written. If not provided, the
    output path is set under the training script’s launching path.
    """

    job_name: str = "DeepSpeedJobName"
    """ Name for the current job. This will become a new directory inside `output_path`. """


class WandbConfig(DeepSpeedConfigModel):
    """Sets parameters for WandB monitor."""

    enabled: bool = False
    """ Whether logging to WandB is enabled. Requires `wandb` package is installed. """

    group: Optional[str] = None
    """ Name for the WandB group. This can be used to group together runs. """

    team: Optional[str] = None
    """ Name for the WandB team. """

    project: str = "deepspeed"
    """ Name for the WandB project. """


class CSVConfig(DeepSpeedConfigModel):
    """Sets parameters for CSV monitor."""

    enabled: bool = False
    """ Whether logging to local CSV files is enabled. """

    output_path: str = ""
    """
    Path to where the csv files will be written. If not provided, the output
    path is set under the training script’s launching path.
    """

    job_name: str = "DeepSpeedJobName"
    """ Name for the current job. This will become a new directory inside `output_path`. """


class CometConfig(DeepSpeedConfigModel):
    """
    Sets parameters for Comet monitor. For logging data Comet uses
    experiment object.
    https://www.comet.com/docs/v2/api-and-sdk/python-sdk/reference/Experiment/
    """

    enabled: bool = False
    """ Whether logging to Comet is enabled. Requires `comet_ml` package is installed. """

    samples_log_interval: int = 100
    """ Metrics will be submitted to Comet after processing every `samples_log_intervas` samples"""

    project: Optional[str] = None
    """
    Comet project name. Can be set through .comet.config file or environment variable COMET_PROJECT_NAME
    https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#explore-comet-configuration-options
    """

    workspace: Optional[str] = None
    """
    Comet workspace name. Can be set through .comet.config file or environment variable COMET_WORKSPACE
    https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#explore-comet-configuration-options
    """

    api_key: Optional[str] = None
    """
    Comet API key. Can be set through .comet.config file or environment variable COMET_API_KEY
    https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#explore-comet-configuration-options
    """

    experiment_name: Optional[str] = None
    """
    The name for comet experiment to be used for logging.
    Can be set through .comet.config file or environment variable COMET_EXPERIMENT_NAME
    https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#explore-comet-configuration-options
    """

    experiment_key: Optional[str] = None
    """
    The key for comet experiment to be used for logging. Must be an alphanumeric string whose length is between 32 and 50 characters.
    Can be set through .comet.config  or environment variable COMET_EXPERIMENT_KEY
    https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#explore-comet-configuration-options
    """

    online: Optional[bool] = None
    """
    If True, the data will be logged to Comet server, otherwise it will be stored locally in offline experiment
    Defaults to True.
    """

    mode: Optional[str] = None
    """
    Control how the Comet experiment is started, 3 options are possible.:
        - "get": Continue logging to an existing experiment identified by the `experiment_key` value.
        - "create": Always creates of a new experiment, useful for HPO sweeps.
        - "get_or_create" (default): Starts a fresh experiment if required, or persists logging to an existing one.
    """


class DeepSpeedMonitorConfig(DeepSpeedConfigModel):
    """Sets parameters for various monitoring methods."""

    tensorboard: TensorBoardConfig = {}
    """ TensorBoard monitor, requires `tensorboard` package is installed. """

    comet: CometConfig = {}
    """ Comet monitor, requires `comet_ml` package is installed """

    wandb: WandbConfig = {}
    """ WandB monitor, requires `wandb` package is installed. """

    csv_monitor: CSVConfig = {}
    """ Local CSV output of monitoring data. """

    @model_validator(mode="after")
    def check_enabled(self):
        enabled = self.tensorboard.enabled or self.wandb.enabled or self.csv_monitor.enabled or self.comet.enabled
        self.__dict__["enabled"] = enabled
        return self