hdlm-group
/

hdlm-base-gamma-0.05

Text Generation

diffusion-language-model

Model card Files Files and versions

hdlm-base-gamma-0.05 / config.json

nimafathi's picture

Upload HDLM model with complete HF integration

1ead04f verified 3 months ago

history blame contribute delete

2.38 kB

	{
	"ngpus": 4,
	"gradient_accumulation_steps": 8,
	"pretrain_autoregressive_path": "/home/toolkit/research-diffcodegen/exp_local/openwebtext/mdlm-autoregressive/org-DiTAR-absorb-v2/checkpoints-meta/checkpoint.pth",
	"tokenizer": {
	"tokens": 50257,
	"model": "gpt2"
	},
	"training": {
	"batch_size": 512,
	"accum": 8,
	"n_iters": 1000000,
	"snapshot_freq": 500,
	"log_freq": 100,
	"eval_freq": 500,
	"snapshot_freq_for_preemption": 3000,
	"weight": "standard",
	"snapshot_sampling": true,
	"ema": 0.9999,
	"warmup_iter": -1
	},
	"data": {
	"train": "openwebtext-train",
	"valid": "wikitext103",
	"cache_dir": "/home/toolkit/research-diffcodegen/data",
	"debug": false
	},
	"graph": {
	"type": "QGamma",
	"gamma": 0.05,
	"file": "/home/toolkit/research-diffcodegen/data",
	"report_all": false,
	"expanded_sigma": true
	},
	"noise": {
	"type": "loglinear",
	"sigma_min": 0.0001,
	"sigma_max": 2.0,
	"ar_diffusion": false,
	"expanded_sigma": true
	},
	"sampling": {
	"predictor": "analytic",
	"steps_per_level": 1,
	"noise_removal": true,
	"strategy": "direct",
	"strategy_param": 0.9
	},
	"annealing": {
	"type": "block",
	"efficient": false,
	"width": 1024,
	"tau": 2048,
	"eval_tau": 256,
	"steps_per_level": 1,
	"sampling_method": "SAR",
	"diffusion_loss_weight": 1.0,
	"ce_loss_weight": 4.0,
	"sampling_eps": 0.0001,
	"attention": {
	"context_type": "block_causal",
	"block_type": "full"
	},
	"match_inference": true
	},
	"eval": {
	"batch_size": 32,
	"perplexity": true,
	"perplexity_batch_size": 16
	},
	"optim": {
	"weight_decay": 0.0,
	"optimizer": "AdamW",
	"lr": 0.0003,
	"beta1": 0.9,
	"beta2": 0.999,
	"eps": 1e-08,
	"warmup": 10000,
	"grad_clip": 1.0,
	"scheduler": "lambda"
	},
	"experiment": {
	"name": "QGamma0.05-v2",
	"wandb_project": "debug-QGamma"
	},
	"model": {
	"name": "gamma_hdlm",
	"type": "ddit",
	"hidden_size": 768,
	"cond_dim": 128,
	"length": 1024,
	"n_blocks": 12,
	"n_heads": 12,
	"scale_by_sigma": false,
	"dropout": 0.1,
	"transformer_sigma_conditioning": true,
	"hybrid_sigma_embedding": true,
	"post_process_logits": true,
	"use_timestep_embedding": true
	},
	"model_type": "gamma_hybrid"
	}