Safetensors
patchtst
abao commited on
Commit
f38d112
·
verified ·
1 Parent(s): 8ffbcd5

final checkpoint files

Browse files
Files changed (4) hide show
  1. config.json +60 -0
  2. generation_config.json +4 -0
  3. model.safetensors +3 -0
  4. training_info.json +407 -0
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu",
3
+ "architectures": [
4
+ "PatchTSTForPrediction"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bias": true,
8
+ "channel_attention": true,
9
+ "channel_consistent_masking": false,
10
+ "channel_rope": false,
11
+ "context_length": 512,
12
+ "d_model": 512,
13
+ "distribution_output": null,
14
+ "do_mask_input": false,
15
+ "dropout": 0.0,
16
+ "ff_dropout": 0.0,
17
+ "ffn_dim": 512,
18
+ "head_dropout": 0.0,
19
+ "huber_delta": 1.0,
20
+ "init_std": 0.02,
21
+ "loss": "mse",
22
+ "mask_type": "random",
23
+ "mask_value": 0,
24
+ "max_wavelength": 500,
25
+ "mode": "predict",
26
+ "model_type": "patchtst",
27
+ "norm_eps": 1e-05,
28
+ "norm_type": "rmsnorm",
29
+ "num_attention_heads": 8,
30
+ "num_forecast_mask_patches": 3,
31
+ "num_hidden_layers": 8,
32
+ "num_input_channels": 1,
33
+ "num_parallel_samples": 100,
34
+ "num_poly_feats": 120,
35
+ "num_rff": 256,
36
+ "num_targets": 1,
37
+ "output_range": null,
38
+ "patch_length": 16,
39
+ "patch_stride": 16,
40
+ "path_dropout": 0.0,
41
+ "poly_degrees": 2,
42
+ "pooling_type": "mean",
43
+ "positional_dropout": 0.0,
44
+ "positional_encoding_type": "sincos",
45
+ "pre_norm": true,
46
+ "prediction_length": 128,
47
+ "pretrained_encoder_path": null,
48
+ "random_mask_ratio": 0.5,
49
+ "rff_scale": 1.0,
50
+ "rff_trainable": false,
51
+ "rope_percent": 0.75,
52
+ "scaling": "std",
53
+ "share_embedding": true,
54
+ "share_projection": true,
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.40.1",
57
+ "unmasked_channel_indices": null,
58
+ "use_cls_token": false,
59
+ "use_dynamics_embedding": true
60
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.40.1"
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ded0b494c8882f772564795938bd43cc93e6d8c3a7e36e4c9ebfbf4b331280
3
+ size 85440752
training_info.json ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "mode": "predict",
4
+ "context_length": 512,
5
+ "prediction_length": 128,
6
+ "distribution_output": null,
7
+ "loss": "mse",
8
+ "huber_delta": 1.0,
9
+ "patch_length": 16,
10
+ "patch_stride": 16,
11
+ "num_hidden_layers": 8,
12
+ "d_model": 512,
13
+ "num_attention_heads": 8,
14
+ "channel_attention": true,
15
+ "ffn_dim": 512,
16
+ "norm_type": "rmsnorm",
17
+ "norm_eps": 1e-05,
18
+ "attention_dropout": 0.0,
19
+ "positional_dropout": 0.0,
20
+ "path_dropout": 0.0,
21
+ "ff_dropout": 0.0,
22
+ "bias": true,
23
+ "activation_function": "gelu",
24
+ "pre_norm": true,
25
+ "use_cls_token": false,
26
+ "init_std": 0.02,
27
+ "scaling": "std",
28
+ "do_mask_input": null,
29
+ "mask_type": "random",
30
+ "random_mask_ratio": 0.5,
31
+ "num_forecast_mask_patches": 3,
32
+ "channel_consistent_masking": false,
33
+ "unmasked_channel_indices": null,
34
+ "mask_value": 0,
35
+ "pooling_type": "mean",
36
+ "head_dropout": 0.0,
37
+ "num_parallel_samples": 100,
38
+ "channel_rope": false,
39
+ "max_wavelength": 500,
40
+ "rope_percent": 0.75,
41
+ "pretrained_encoder_path": null,
42
+ "use_dynamics_embedding": true,
43
+ "num_poly_feats": 120,
44
+ "poly_degrees": 2,
45
+ "rff_trainable": false,
46
+ "rff_scale": 1.0,
47
+ "num_rff": 256
48
+ },
49
+ "train_config": {
50
+ "seed": 99,
51
+ "max_steps": 100000,
52
+ "save_steps": 50000,
53
+ "log_steps": 1000,
54
+ "per_device_train_batch_size": 1024,
55
+ "gradient_accumulation_steps": 1,
56
+ "max_grad_norm": 1.0,
57
+ "dataloader_num_workers": 16,
58
+ "dataloader_prefetch_factor": 2,
59
+ "tf32": false,
60
+ "torch_compile": true,
61
+ "optim": "adamw_torch_fused",
62
+ "learning_rate": 0.001,
63
+ "lr_scheduler_type": "cosine",
64
+ "warmup_ratio": 0.1,
65
+ "weight_decay": 0.0,
66
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
67
+ "ddp_backend": "nccl",
68
+ "ddp_find_unused_parameters": false,
69
+ "remove_unused_columns": false
70
+ },
71
+ "all_config": {
72
+ "run_name": "pft_chattn_emb_w_poly",
73
+ "wandb": {
74
+ "log": true,
75
+ "project_name": "dystformer",
76
+ "entity": "gilpinlab",
77
+ "group_name": "fine-tuning",
78
+ "resume": false,
79
+ "tags": null
80
+ },
81
+ "patchtst": {
82
+ "mode": "predict",
83
+ "context_length": 512,
84
+ "prediction_length": 128,
85
+ "distribution_output": null,
86
+ "loss": "mse",
87
+ "huber_delta": 1.0,
88
+ "patch_length": 16,
89
+ "patch_stride": 16,
90
+ "num_hidden_layers": 8,
91
+ "d_model": 512,
92
+ "num_attention_heads": 8,
93
+ "channel_attention": true,
94
+ "ffn_dim": 512,
95
+ "norm_type": "rmsnorm",
96
+ "norm_eps": 1e-05,
97
+ "attention_dropout": 0.0,
98
+ "positional_dropout": 0.0,
99
+ "path_dropout": 0.0,
100
+ "ff_dropout": 0.0,
101
+ "bias": true,
102
+ "activation_function": "gelu",
103
+ "pre_norm": true,
104
+ "use_cls_token": false,
105
+ "init_std": 0.02,
106
+ "scaling": "std",
107
+ "do_mask_input": null,
108
+ "mask_type": "random",
109
+ "random_mask_ratio": 0.5,
110
+ "num_forecast_mask_patches": 3,
111
+ "channel_consistent_masking": false,
112
+ "unmasked_channel_indices": null,
113
+ "mask_value": 0,
114
+ "pooling_type": "mean",
115
+ "head_dropout": 0.0,
116
+ "num_parallel_samples": 100,
117
+ "channel_rope": false,
118
+ "max_wavelength": 500,
119
+ "rope_percent": 0.75,
120
+ "pretrained_encoder_path": null,
121
+ "use_dynamics_embedding": true,
122
+ "num_poly_feats": 120,
123
+ "poly_degrees": 2,
124
+ "rff_trainable": false,
125
+ "rff_scale": 1.0,
126
+ "num_rff": 256
127
+ },
128
+ "chronos": {
129
+ "model_id": "amazon/chronos-t5-mini",
130
+ "model_type": "seq2seq",
131
+ "random_init": false,
132
+ "tie_embeddings": true,
133
+ "context_length": 512,
134
+ "prediction_length": 64,
135
+ "num_samples": 20,
136
+ "n_tokens": 4096,
137
+ "n_special_tokens": 2,
138
+ "pad_token_id": 0,
139
+ "eos_token_id": 1,
140
+ "use_eos_token": true,
141
+ "tokenizer_class": "MeanScaleUniformBins",
142
+ "tokenizer_kwargs": {
143
+ "low_limit": -15.0,
144
+ "high_limit": 15.0
145
+ },
146
+ "temperature": 1.0,
147
+ "top_k": 50,
148
+ "top_p": 1.0
149
+ },
150
+ "train": {
151
+ "seed": 99,
152
+ "max_steps": 100000,
153
+ "save_steps": 50000,
154
+ "log_steps": 1000,
155
+ "per_device_train_batch_size": 1024,
156
+ "gradient_accumulation_steps": 1,
157
+ "max_grad_norm": 1.0,
158
+ "dataloader_num_workers": 16,
159
+ "dataloader_prefetch_factor": 2,
160
+ "tf32": false,
161
+ "torch_compile": true,
162
+ "optim": "adamw_torch_fused",
163
+ "learning_rate": 0.001,
164
+ "lr_scheduler_type": "cosine",
165
+ "warmup_ratio": 0.1,
166
+ "weight_decay": 0.0,
167
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
168
+ "ddp_backend": "nccl",
169
+ "ddp_find_unused_parameters": false,
170
+ "remove_unused_columns": false
171
+ },
172
+ "scheduler": {
173
+ "enabled": false,
174
+ "schedule_value_name": "noise_scale",
175
+ "schedule_name": "cosine",
176
+ "epoch_stop": 0.5,
177
+ "init_value": 1.0,
178
+ "final_value": 0.0,
179
+ "eps": 0.008,
180
+ "num_steps": 4,
181
+ "decay_rate": 8.0
182
+ },
183
+ "eval": {
184
+ "mode": "predict",
185
+ "data_path": "/stor/work/AMDG_Gilpin_Summer2024/data/test/",
186
+ "checkpoint_path": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints",
187
+ "device": "cuda:7",
188
+ "torch_dtype": "float32",
189
+ "batch_size": 32,
190
+ "num_systems": 10,
191
+ "sliding_context": false,
192
+ "metric_names": [
193
+ "mse",
194
+ "mae",
195
+ "smape",
196
+ "r2_score",
197
+ "spearman"
198
+ ],
199
+ "forecast_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/forecasts",
200
+ "labels_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/labels",
201
+ "completions_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/completions",
202
+ "patch_input_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/patch_input",
203
+ "timestep_masks_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/timestep_masks",
204
+ "metrics_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/metrics",
205
+ "metrics_fname": "metrics.json",
206
+ "overwrite": false,
207
+ "seed": 42,
208
+ "parallel_sample_reduction": "mean",
209
+ "limit_prediction_length": true,
210
+ "prediction_length": 64,
211
+ "num_test_instances": 1,
212
+ "window_style": "sampled",
213
+ "window_stride": 1,
214
+ "split_coords": false,
215
+ "verbose": false,
216
+ "use_channel_sampler": false,
217
+ "channel_sampler": {
218
+ "num_channels": 3,
219
+ "num_samples": 2
220
+ }
221
+ },
222
+ "run_metrics": {
223
+ "wandb_run_id": null,
224
+ "plot_dir": "figs",
225
+ "save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/run_metrics",
226
+ "save_fname": "metrics.json"
227
+ },
228
+ "train_data_dirs": [
229
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train",
230
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train_z5_z10",
231
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train",
232
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train_z5_z10"
233
+ ],
234
+ "extra_train_data_paths": null,
235
+ "probability": null,
236
+ "shuffle_buffer_length": 100000,
237
+ "min_past": 60,
238
+ "max_missing_prop": 0.9,
239
+ "fixed_dim": 3,
240
+ "augmentations": {
241
+ "augmentation_rate": 0.2,
242
+ "probabilities": [
243
+ 0.3333333333333333,
244
+ 0.3333333333333333,
245
+ 0.3333333333333333,
246
+ 0.0,
247
+ 0.0
248
+ ],
249
+ "dim_range": [
250
+ 3,
251
+ 8
252
+ ],
253
+ "lag_range": [
254
+ 1,
255
+ 10
256
+ ],
257
+ "phase_surrogate_cutoff": 1.0,
258
+ "mode_range": [
259
+ 5,
260
+ 15
261
+ ],
262
+ "max_wavenumber": 10.0,
263
+ "max_amp": 10.0
264
+ },
265
+ "sampling": {
266
+ "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/",
267
+ "sys_class": "continuous_no_delay",
268
+ "test_split": 0.3,
269
+ "split_prefix": null,
270
+ "rseed": 999,
271
+ "ic_rseed": 888,
272
+ "num_points": 4096,
273
+ "num_periods": 40,
274
+ "num_periods_min": 20,
275
+ "num_periods_max": 60,
276
+ "num_ics": 1,
277
+ "num_param_perturbations": 4,
278
+ "param_scale": 0.5,
279
+ "split_coords": false,
280
+ "standardize": false,
281
+ "verbose": false,
282
+ "multiprocessing": true,
283
+ "debug_system": null,
284
+ "silence_integration_errors": false,
285
+ "save_params": true,
286
+ "save_traj_stats": false,
287
+ "ignore_probability": 0.0,
288
+ "sign_match_probability": 0.5,
289
+ "atol": 1e-10,
290
+ "rtol": 1e-09,
291
+ "reference_traj": {
292
+ "length": 4096,
293
+ "transient": 0.5,
294
+ "n_periods": 40,
295
+ "atol": 1e-07,
296
+ "rtol": 1e-06
297
+ }
298
+ },
299
+ "validator": {
300
+ "enable": true,
301
+ "verbose": false,
302
+ "transient_time_frac": 0.05,
303
+ "plot_save_dir": null,
304
+ "save_failed_trajs": false,
305
+ "attractor_tests": [
306
+ "check_not_linear",
307
+ "check_boundedness",
308
+ "check_not_fixed_point",
309
+ "check_zero_one_test",
310
+ "check_power_spectrum",
311
+ "check_stationarity"
312
+ ]
313
+ },
314
+ "events": {
315
+ "max_duration": 300,
316
+ "instability_threshold": 10000.0,
317
+ "min_step": 1e-10,
318
+ "verbose": true
319
+ },
320
+ "skew": {
321
+ "num_pairs": 5000,
322
+ "pairs_rseed": 123,
323
+ "sys_idx_low": 0,
324
+ "sys_idx_high": null,
325
+ "normalization_strategy": "flow_rms",
326
+ "randomize_driver_indices": true,
327
+ "transform_scales": true,
328
+ "train_nonskew_path": null,
329
+ "test_nonskew_path": null,
330
+ "coupling_map_type": "additive",
331
+ "coupling_map": {
332
+ "transform_scales": false,
333
+ "randomize_driver_indices": true,
334
+ "normalization_strategy": "flow_rms",
335
+ "random_seed": 0
336
+ }
337
+ },
338
+ "analysis": {
339
+ "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data",
340
+ "split": "copy/final_skew40/train",
341
+ "num_samples": 1,
342
+ "one_dim_target": false,
343
+ "save_dir": "outputs",
344
+ "plots_dir": "figures",
345
+ "compute_quantile_limits": false,
346
+ "compute_max_lyapunov_exponents": false,
347
+ "filter_ensemble": true,
348
+ "filter_json_fname": "failed_samples",
349
+ "verbose": true,
350
+ "attractor_tests": [
351
+ "check_zero_one_test"
352
+ ],
353
+ "check_not_transient": {
354
+ "max_transient_prop": 0.2,
355
+ "atol": 0.001
356
+ },
357
+ "check_stationarity": {
358
+ "p_value": 0.05
359
+ },
360
+ "check_boundedness": {
361
+ "threshold": 10000.0,
362
+ "max_zscore": 5,
363
+ "eps": 1e-10
364
+ },
365
+ "check_zero_one_test": {
366
+ "threshold": 0.2,
367
+ "strategy": "score"
368
+ }
369
+ }
370
+ },
371
+ "job_info": {
372
+ "cuda_available": true,
373
+ "device_count": 4,
374
+ "device_names": {
375
+ "0": "AMD Instinct MI100",
376
+ "1": "AMD Instinct MI100",
377
+ "2": "AMD Instinct MI100",
378
+ "3": "AMD Instinct MI100"
379
+ },
380
+ "mem_info": {
381
+ "0": [
382
+ 4438360064,
383
+ 34342961152
384
+ ],
385
+ "1": [
386
+ 4429185024,
387
+ 34342961152
388
+ ],
389
+ "2": [
390
+ 4456448000,
391
+ 34342961152
392
+ ],
393
+ "3": [
394
+ 4462739456,
395
+ 34342961152
396
+ ]
397
+ },
398
+ "torchelastic_launched": true,
399
+ "world_size": 4,
400
+ "python_version": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
401
+ "torch_version": "2.2.2+rocm5.7",
402
+ "numpy_version": "1.26.4",
403
+ "gluonts_version": "0.15.1",
404
+ "transformers_version": "4.40.1",
405
+ "accelerate_version": "0.34.2"
406
+ }
407
+ }