{ | |
"_class_name": "ACEStepTransformer2DModel", | |
"_diffusers_version": "0.32.2", | |
"attention_head_dim": 128, | |
"in_channels": 8, | |
"inner_dim": 2560, | |
"lyric_encoder_vocab_size": 6693, | |
"lyric_hidden_size": 1024, | |
"max_height": 16, | |
"max_position": 32768, | |
"max_width": 32768, | |
"mlp_ratio": 2.5, | |
"num_attention_heads": 20, | |
"num_layers": 24, | |
"out_channels": 8, | |
"patch_size": [ | |
16, | |
1 | |
], | |
"rope_theta": 1000000.0, | |
"speaker_embedding_dim": 512, | |
"ssl_encoder_depths": [ | |
8, | |
8 | |
], | |
"ssl_latent_dims": [ | |
1024, | |
768 | |
], | |
"ssl_names": [ | |
"mert", | |
"m-hubert" | |
], | |
"text_embedding_dim": 768 | |
} | |