{ "autoencoder": { "hidden_dim": 512, "n_convs": [ 6, 6 ], "strides": [ 1, 2 ], "stride_kernel_sizes": [ null, 7 ], "mels": 100, "n_q": 8, "codebook_size": 512, "codebook_dim": 256, "semantic_dim": 768 }, "flow_matcher": { "sigma_min": 0.0001, "mels": 100, "channels": 256, "n_convs": [ 8, 8, 8, 8 ], "strides": [ 1, 1, 1, 2 ] }, "mel_transform": { "sample_rate": 24000, "n_fft": 1024, "n_mels": 100, "hop_length": 256, "center": true, "power": 1.0 } }