| { | |
| "__name__": "Config: Transformer config for WanModel_S2V", | |
| "_class_name": "WanModel_S2V", | |
| "_diffusers_version": "0.34.0", | |
| "adain_mode": "attn_norm", | |
| "add_last_motion": true, | |
| "audio_dim": 1024, | |
| "audio_inject_layers": [ | |
| 0, | |
| 4, | |
| 8, | |
| 12, | |
| 16, | |
| 20, | |
| 24, | |
| 27, | |
| 30, | |
| 33, | |
| 36, | |
| 39 | |
| ], | |
| "cond_dim": 16, | |
| "dim": 5120, | |
| "enable_adain": true, | |
| "enable_framepack": true, | |
| "enable_motioner": false, | |
| "enable_tsm": false, | |
| "eps": 1e-06, | |
| "ffn_dim": 13824, | |
| "framepack_drop_mode": "padd", | |
| "freq_dim": 256, | |
| "in_dim": 16, | |
| "model_type": "s2v", | |
| "motion_token_num": 1024, | |
| "num_audio_token": 4, | |
| "num_heads": 40, | |
| "num_layers": 40, | |
| "out_dim": 16, | |
| "text_len": 512, | |
| "trainable_token_pos_emb": false, | |
| "zero_init": true, | |
| "zero_timestep": true | |
| } | |