{ "auto_map": { "AutoProcessor": "processor.MuLanProcessor" }, "duration": 10, "fbank_kwargs": { "dither": 0.0, "frame_length": 25.0, "frame_shift": 10.0, "htk_compat": true, "num_mel_bins": 128, "sample_frequency": 16000, "use_energy": false, "window_type": "hanning" }, "feature_extractor_type": "AudioSpectrogramTransformerFeatureExtractor", "freq_mask": { "mask_param": 48 }, "n_frames": 1000, "n_mels": 128, "processor_class": "MuLanProcessor", "sample_rate": 16000, "spec_mean": -4.2677393, "spec_norm": true, "spec_std": 4.5689974, "time_mask": { "mask_param": 192 }, "training": true }