set -xe BS=32 LR=5e-2 HS=32 LEN=32 INITIAL_F=0 END_F=0 EPOCH=25 accelerate launch --config_file brain.yaml train.py \ --data_dir data \ --do_train \ --output_dir en_checkpoints_len${LEN}_sparse \ --hidden_size $HS \ --train_batch_size $BS \ --max_seq_length $LEN \ --learning_rate $LR \ --num_train_epochs $EPOCH \ --num_neg_samples 400 \ --initial_file_number $INITIAL_F \ --end_file_number $END_F \ --num_workers 8 \ --fp16 \ --run_name "BS${BS}_LR${LR}_HS${HS}_LEN${LEN}_f${INITIAL_F}_EPOCH${EPOCH}" \ --vocab_path vocab_wiki_4k_en.json \ --train_full data \ --sparse \ --use_frequency \ --use_bpe \ --bpe_tokenizer wiki_bpe_tokenizer_4000_bytelevel.json