final checkpoint files

Browse files

Files changed (4) hide show

config.json +60 -0
generation_config.json +4 -0
model.safetensors +3 -0
training_info.json +407 -0

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "activation_function": "gelu",
+  "architectures": [
+    "PatchTSTForPrediction"
+  ],
+  "attention_dropout": 0.0,
+  "bias": true,
+  "channel_attention": true,
+  "channel_consistent_masking": false,
+  "channel_rope": false,
+  "context_length": 512,
+  "d_model": 512,
+  "distribution_output": null,
+  "do_mask_input": false,
+  "dropout": 0.0,
+  "ff_dropout": 0.0,
+  "ffn_dim": 512,
+  "head_dropout": 0.0,
+  "huber_delta": 1.0,
+  "init_std": 0.02,
+  "loss": "mse",
+  "mask_type": "random",
+  "mask_value": 0,
+  "max_wavelength": 500,
+  "mode": "predict",
+  "model_type": "patchtst",
+  "norm_eps": 1e-05,
+  "norm_type": "rmsnorm",
+  "num_attention_heads": 8,
+  "num_forecast_mask_patches": 3,
+  "num_hidden_layers": 8,
+  "num_input_channels": 1,
+  "num_parallel_samples": 100,
+  "num_poly_feats": 120,
+  "num_rff": 256,
+  "num_targets": 1,
+  "output_range": null,
+  "patch_length": 16,
+  "patch_stride": 16,
+  "path_dropout": 0.0,
+  "poly_degrees": 2,
+  "pooling_type": "mean",
+  "positional_dropout": 0.0,
+  "positional_encoding_type": "sincos",
+  "pre_norm": true,
+  "prediction_length": 128,
+  "pretrained_encoder_path": null,
+  "random_mask_ratio": 0.5,
+  "rff_scale": 1.0,
+  "rff_trainable": false,
+  "rope_percent": 0.75,
+  "scaling": "std",
+  "share_embedding": true,
+  "share_projection": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "unmasked_channel_indices": null,
+  "use_cls_token": false,
+  "use_dynamics_embedding": true
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.40.1"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48ded0b494c8882f772564795938bd43cc93e6d8c3a7e36e4c9ebfbf4b331280
+size 85440752

training_info.json ADDED Viewed

	@@ -0,0 +1,407 @@

+{
+    "model_config": {
+        "mode": "predict",
+        "context_length": 512,
+        "prediction_length": 128,
+        "distribution_output": null,
+        "loss": "mse",
+        "huber_delta": 1.0,
+        "patch_length": 16,
+        "patch_stride": 16,
+        "num_hidden_layers": 8,
+        "d_model": 512,
+        "num_attention_heads": 8,
+        "channel_attention": true,
+        "ffn_dim": 512,
+        "norm_type": "rmsnorm",
+        "norm_eps": 1e-05,
+        "attention_dropout": 0.0,
+        "positional_dropout": 0.0,
+        "path_dropout": 0.0,
+        "ff_dropout": 0.0,
+        "bias": true,
+        "activation_function": "gelu",
+        "pre_norm": true,
+        "use_cls_token": false,
+        "init_std": 0.02,
+        "scaling": "std",
+        "do_mask_input": null,
+        "mask_type": "random",
+        "random_mask_ratio": 0.5,
+        "num_forecast_mask_patches": 3,
+        "channel_consistent_masking": false,
+        "unmasked_channel_indices": null,
+        "mask_value": 0,
+        "pooling_type": "mean",
+        "head_dropout": 0.0,
+        "num_parallel_samples": 100,
+        "channel_rope": false,
+        "max_wavelength": 500,
+        "rope_percent": 0.75,
+        "pretrained_encoder_path": null,
+        "use_dynamics_embedding": true,
+        "num_poly_feats": 120,
+        "poly_degrees": 2,
+        "rff_trainable": false,
+        "rff_scale": 1.0,
+        "num_rff": 256
+    },
+    "train_config": {
+        "seed": 99,
+        "max_steps": 100000,
+        "save_steps": 50000,
+        "log_steps": 1000,
+        "per_device_train_batch_size": 1024,
+        "gradient_accumulation_steps": 1,
+        "max_grad_norm": 1.0,
+        "dataloader_num_workers": 16,
+        "dataloader_prefetch_factor": 2,
+        "tf32": false,
+        "torch_compile": true,
+        "optim": "adamw_torch_fused",
+        "learning_rate": 0.001,
+        "lr_scheduler_type": "cosine",
+        "warmup_ratio": 0.1,
+        "weight_decay": 0.0,
+        "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
+        "ddp_backend": "nccl",
+        "ddp_find_unused_parameters": false,
+        "remove_unused_columns": false
+    },
+    "all_config": {
+        "run_name": "pft_chattn_emb_w_poly",
+        "wandb": {
+            "log": true,
+            "project_name": "dystformer",
+            "entity": "gilpinlab",
+            "group_name": "fine-tuning",
+            "resume": false,
+            "tags": null
+        },
+        "patchtst": {
+            "mode": "predict",
+            "context_length": 512,
+            "prediction_length": 128,
+            "distribution_output": null,
+            "loss": "mse",
+            "huber_delta": 1.0,
+            "patch_length": 16,
+            "patch_stride": 16,
+            "num_hidden_layers": 8,
+            "d_model": 512,
+            "num_attention_heads": 8,
+            "channel_attention": true,
+            "ffn_dim": 512,
+            "norm_type": "rmsnorm",
+            "norm_eps": 1e-05,
+            "attention_dropout": 0.0,
+            "positional_dropout": 0.0,
+            "path_dropout": 0.0,
+            "ff_dropout": 0.0,
+            "bias": true,
+            "activation_function": "gelu",
+            "pre_norm": true,
+            "use_cls_token": false,
+            "init_std": 0.02,
+            "scaling": "std",
+            "do_mask_input": null,
+            "mask_type": "random",
+            "random_mask_ratio": 0.5,
+            "num_forecast_mask_patches": 3,
+            "channel_consistent_masking": false,
+            "unmasked_channel_indices": null,
+            "mask_value": 0,
+            "pooling_type": "mean",
+            "head_dropout": 0.0,
+            "num_parallel_samples": 100,
+            "channel_rope": false,
+            "max_wavelength": 500,
+            "rope_percent": 0.75,
+            "pretrained_encoder_path": null,
+            "use_dynamics_embedding": true,
+            "num_poly_feats": 120,
+            "poly_degrees": 2,
+            "rff_trainable": false,
+            "rff_scale": 1.0,
+            "num_rff": 256
+        },
+        "chronos": {
+            "model_id": "amazon/chronos-t5-mini",
+            "model_type": "seq2seq",
+            "random_init": false,
+            "tie_embeddings": true,
+            "context_length": 512,
+            "prediction_length": 64,
+            "num_samples": 20,
+            "n_tokens": 4096,
+            "n_special_tokens": 2,
+            "pad_token_id": 0,
+            "eos_token_id": 1,
+            "use_eos_token": true,
+            "tokenizer_class": "MeanScaleUniformBins",
+            "tokenizer_kwargs": {
+                "low_limit": -15.0,
+                "high_limit": 15.0
+            },
+            "temperature": 1.0,
+            "top_k": 50,
+            "top_p": 1.0
+        },
+        "train": {
+            "seed": 99,
+            "max_steps": 100000,
+            "save_steps": 50000,
+            "log_steps": 1000,
+            "per_device_train_batch_size": 1024,
+            "gradient_accumulation_steps": 1,
+            "max_grad_norm": 1.0,
+            "dataloader_num_workers": 16,
+            "dataloader_prefetch_factor": 2,
+            "tf32": false,
+            "torch_compile": true,
+            "optim": "adamw_torch_fused",
+            "learning_rate": 0.001,
+            "lr_scheduler_type": "cosine",
+            "warmup_ratio": 0.1,
+            "weight_decay": 0.0,
+            "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
+            "ddp_backend": "nccl",
+            "ddp_find_unused_parameters": false,
+            "remove_unused_columns": false
+        },
+        "scheduler": {
+            "enabled": false,
+            "schedule_value_name": "noise_scale",
+            "schedule_name": "cosine",
+            "epoch_stop": 0.5,
+            "init_value": 1.0,
+            "final_value": 0.0,
+            "eps": 0.008,
+            "num_steps": 4,
+            "decay_rate": 8.0
+        },
+        "eval": {
+            "mode": "predict",
+            "data_path": "/stor/work/AMDG_Gilpin_Summer2024/data/test/",
+            "checkpoint_path": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints",
+            "device": "cuda:7",
+            "torch_dtype": "float32",
+            "batch_size": 32,
+            "num_systems": 10,
+            "sliding_context": false,
+            "metric_names": [
+                "mse",
+                "mae",
+                "smape",
+                "r2_score",
+                "spearman"
+            ],
+            "forecast_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/forecasts",
+            "labels_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/labels",
+            "completions_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/completions",
+            "patch_input_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/patch_input",
+            "timestep_masks_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/timestep_masks",
+            "metrics_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/metrics",
+            "metrics_fname": "metrics.json",
+            "overwrite": false,
+            "seed": 42,
+            "parallel_sample_reduction": "mean",
+            "limit_prediction_length": true,
+            "prediction_length": 64,
+            "num_test_instances": 1,
+            "window_style": "sampled",
+            "window_stride": 1,
+            "split_coords": false,
+            "verbose": false,
+            "use_channel_sampler": false,
+            "channel_sampler": {
+                "num_channels": 3,
+                "num_samples": 2
+            }
+        },
+        "run_metrics": {
+            "wandb_run_id": null,
+            "plot_dir": "figs",
+            "save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/run_metrics",
+            "save_fname": "metrics.json"
+        },
+        "train_data_dirs": [
+            "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train",
+            "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train_z5_z10",
+            "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train",
+            "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train_z5_z10"
+        ],
+        "extra_train_data_paths": null,
+        "probability": null,
+        "shuffle_buffer_length": 100000,
+        "min_past": 60,
+        "max_missing_prop": 0.9,
+        "fixed_dim": 3,
+        "augmentations": {
+            "augmentation_rate": 0.2,
+            "probabilities": [
+                0.3333333333333333,
+                0.3333333333333333,
+                0.3333333333333333,
+                0.0,
+                0.0
+            ],
+            "dim_range": [
+                3,
+                8
+            ],
+            "lag_range": [
+                1,
+                10
+            ],
+            "phase_surrogate_cutoff": 1.0,
+            "mode_range": [
+                5,
+                15
+            ],
+            "max_wavenumber": 10.0,
+            "max_amp": 10.0
+        },
+        "sampling": {
+            "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/",
+            "sys_class": "continuous_no_delay",
+            "test_split": 0.3,
+            "split_prefix": null,
+            "rseed": 999,
+            "ic_rseed": 888,
+            "num_points": 4096,
+            "num_periods": 40,
+            "num_periods_min": 20,
+            "num_periods_max": 60,
+            "num_ics": 1,
+            "num_param_perturbations": 4,
+            "param_scale": 0.5,
+            "split_coords": false,
+            "standardize": false,
+            "verbose": false,
+            "multiprocessing": true,
+            "debug_system": null,
+            "silence_integration_errors": false,
+            "save_params": true,
+            "save_traj_stats": false,
+            "ignore_probability": 0.0,
+            "sign_match_probability": 0.5,
+            "atol": 1e-10,
+            "rtol": 1e-09,
+            "reference_traj": {
+                "length": 4096,
+                "transient": 0.5,
+                "n_periods": 40,
+                "atol": 1e-07,
+                "rtol": 1e-06
+            }
+        },
+        "validator": {
+            "enable": true,
+            "verbose": false,
+            "transient_time_frac": 0.05,
+            "plot_save_dir": null,
+            "save_failed_trajs": false,
+            "attractor_tests": [
+                "check_not_linear",
+                "check_boundedness",
+                "check_not_fixed_point",
+                "check_zero_one_test",
+                "check_power_spectrum",
+                "check_stationarity"
+            ]
+        },
+        "events": {
+            "max_duration": 300,
+            "instability_threshold": 10000.0,
+            "min_step": 1e-10,
+            "verbose": true
+        },
+        "skew": {
+            "num_pairs": 5000,
+            "pairs_rseed": 123,
+            "sys_idx_low": 0,
+            "sys_idx_high": null,
+            "normalization_strategy": "flow_rms",
+            "randomize_driver_indices": true,
+            "transform_scales": true,
+            "train_nonskew_path": null,
+            "test_nonskew_path": null,
+            "coupling_map_type": "additive",
+            "coupling_map": {
+                "transform_scales": false,
+                "randomize_driver_indices": true,
+                "normalization_strategy": "flow_rms",
+                "random_seed": 0
+            }
+        },
+        "analysis": {
+            "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data",
+            "split": "copy/final_skew40/train",
+            "num_samples": 1,
+            "one_dim_target": false,
+            "save_dir": "outputs",
+            "plots_dir": "figures",
+            "compute_quantile_limits": false,
+            "compute_max_lyapunov_exponents": false,
+            "filter_ensemble": true,
+            "filter_json_fname": "failed_samples",
+            "verbose": true,
+            "attractor_tests": [
+                "check_zero_one_test"
+            ],
+            "check_not_transient": {
+                "max_transient_prop": 0.2,
+                "atol": 0.001
+            },
+            "check_stationarity": {
+                "p_value": 0.05
+            },
+            "check_boundedness": {
+                "threshold": 10000.0,
+                "max_zscore": 5,
+                "eps": 1e-10
+            },
+            "check_zero_one_test": {
+                "threshold": 0.2,
+                "strategy": "score"
+            }
+        }
+    },
+    "job_info": {
+        "cuda_available": true,
+        "device_count": 4,
+        "device_names": {
+            "0": "AMD Instinct MI100",
+            "1": "AMD Instinct MI100",
+            "2": "AMD Instinct MI100",
+            "3": "AMD Instinct MI100"
+        },
+        "mem_info": {
+            "0": [
+                4438360064,
+                34342961152
+            ],
+            "1": [
+                4429185024,
+                34342961152
+            ],
+            "2": [
+                4456448000,
+                34342961152
+            ],
+            "3": [
+                4462739456,
+                34342961152
+            ]
+        },
+        "torchelastic_launched": true,
+        "world_size": 4,
+        "python_version": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
+        "torch_version": "2.2.2+rocm5.7",
+        "numpy_version": "1.26.4",
+        "gluonts_version": "0.15.1",
+        "transformers_version": "4.40.1",
+        "accelerate_version": "0.34.2"
+    }
+}