[grid] scheduler_type = ["huggingface_cosine_with_restarts", "huggingface_linear", "cosine", "onecycle"] smoothing_probability = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] # mamba_ker_size = [3, 4] # mamba_layer_number = [3, 4, 5] # hidden_dim_gated = [128] # num_transformer_heads = [2, 4, 8] # tr_layer_number = [1, 2] # out_features = [128, 256] # num_graph_heads = [2, 4] # dropout = [0.0, 0.1, 0.2] # positional_encoding = [true, false] [defaults] hidden_dim = 128 # # hidden_dim_gated = 128 # num_transformer_heads = 2 # tr_layer_number = 1 # out_features = 128 # # num_graph_heads = 2 # dropout = 0 # positional_encoding = false