nimafathi commited on
Commit
289564a
·
verified ·
1 Parent(s): 10f623e

Upload HDLM model with complete HF integration

Browse files
Files changed (1) hide show
  1. config.json +102 -0
config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ngpus": 4,
3
+ "gradient_accumulation_steps": 8,
4
+ "pretrain_autoregressive_path": "/home/toolkit/research-diffcodegen/exp_local/openwebtext/mdlm-autoregressive/org-DiTAR-absorb-v2/checkpoints-meta/checkpoint.pth",
5
+ "tokenizer": {
6
+ "tokens": 50257,
7
+ "model": "gpt2"
8
+ },
9
+ "training": {
10
+ "batch_size": 512,
11
+ "accum": 8,
12
+ "n_iters": 1000000,
13
+ "snapshot_freq": 100,
14
+ "log_freq": 10,
15
+ "eval_freq": 100,
16
+ "snapshot_freq_for_preemption": 3000,
17
+ "weight": "standard",
18
+ "snapshot_sampling": true,
19
+ "ema": 0.9999,
20
+ "warmup_iter": -1
21
+ },
22
+ "data": {
23
+ "train": "openwebtext-train",
24
+ "valid": "wikitext103",
25
+ "cache_dir": "/home/toolkit/research-diffcodegen/data",
26
+ "debug": false
27
+ },
28
+ "graph": {
29
+ "type": "QGamma",
30
+ "gamma": 0.01,
31
+ "file": "/home/toolkit/research-diffcodegen/data",
32
+ "report_all": false,
33
+ "expanded_sigma": true
34
+ },
35
+ "noise": {
36
+ "type": "loglinear",
37
+ "sigma_min": 0.0001,
38
+ "sigma_max": 2.0,
39
+ "ar_diffusion": false,
40
+ "expanded_sigma": true
41
+ },
42
+ "sampling": {
43
+ "predictor": "analytic",
44
+ "steps_per_level": 1,
45
+ "noise_removal": true,
46
+ "strategy": "direct",
47
+ "strategy_param": 0.9
48
+ },
49
+ "annealing": {
50
+ "type": "block",
51
+ "efficient": false,
52
+ "width": 1024,
53
+ "tau": 2048,
54
+ "eval_tau": 512,
55
+ "steps_per_level": 1,
56
+ "sampling_method": "SAR",
57
+ "diffusion_loss_weight": 1.0,
58
+ "ce_loss_weight": 4.0,
59
+ "sampling_eps": 0.0001,
60
+ "attention": {
61
+ "context_type": "block_causal",
62
+ "block_type": "full"
63
+ },
64
+ "match_inference": true
65
+ },
66
+ "eval": {
67
+ "batch_size": 32,
68
+ "perplexity": true,
69
+ "perplexity_batch_size": 16
70
+ },
71
+ "optim": {
72
+ "weight_decay": 0.0,
73
+ "optimizer": "AdamW",
74
+ "lr": 0.0003,
75
+ "beta1": 0.9,
76
+ "beta2": 0.999,
77
+ "eps": 1e-08,
78
+ "warmup": 10000,
79
+ "grad_clip": 1.0,
80
+ "scheduler": "lambda"
81
+ },
82
+ "experiment": {
83
+ "name": "QGamma0.01-v2",
84
+ "wandb_project": "debug-QGamma"
85
+ },
86
+ "model": {
87
+ "name": "gamma_hdlm",
88
+ "type": "ddit",
89
+ "hidden_size": 768,
90
+ "cond_dim": 128,
91
+ "length": 1024,
92
+ "n_blocks": 12,
93
+ "n_heads": 12,
94
+ "scale_by_sigma": false,
95
+ "dropout": 0.1,
96
+ "transformer_sigma_conditioning": true,
97
+ "hybrid_sigma_embedding": true,
98
+ "post_process_logits": true,
99
+ "use_timestep_embedding": true
100
+ },
101
+ "model_type": "gamma_hybrid"
102
+ }