#!/bin/bash GPU_IDS="0,1" DATA_ROOT="$ROOT_DIR/video-dataset-disney" CAPTION_COLUMN="prompt.txt" VIDEO_COLUMN="videos.txt" OUTPUT_DIR="ltx-video" ID_TOKEN="BW_STYLE" # Model arguments model_cmd="--model_name ltx_video \ --pretrained_model_name_or_path Lightricks/LTX-Video" # Dataset arguments dataset_cmd="--data_root $DATA_ROOT \ --video_column $VIDEO_COLUMN \ --caption_column $CAPTION_COLUMN \ --id_token $ID_TOKEN \ --video_resolution_buckets 49x512x768 \ --caption_dropout_p 0.05" # Dataloader arguments dataloader_cmd="--dataloader_num_workers 0 --precompute_conditions" # Diffusion arguments diffusion_cmd="--flow_resolution_shifting" # Training arguments training_cmd="--training_type lora \ --seed 42 \ --batch_size 1 \ --train_steps 10 \ --rank 128 \ --lora_alpha 128 \ --target_modules to_q to_k to_v to_out.0 \ --gradient_accumulation_steps 1 \ --gradient_checkpointing \ --checkpointing_steps 5 \ --checkpointing_limit 2 \ --enable_slicing \ --enable_tiling" # Optimizer arguments optimizer_cmd="--optimizer adamw \ --lr 3e-5 \ --beta1 0.9 \ --beta2 0.95 \ --weight_decay 1e-4 \ --epsilon 1e-8 \ --max_grad_norm 1.0" # Validation arguments validation_prompts=$(cat <<EOF $ID_TOKEN A black and white animated scene unfolds with an anthropomorphic goat surrounded by musical notes and symbols, suggesting a playful environment. Mickey Mouse appears, leaning forward in curiosity as the goat remains still. The goat then engages with Mickey, who bends down to converse or react. The dynamics shift as Mickey grabs the goat, potentially in surprise or playfulness, amidst a minimalistic background. The scene captures the evolving relationship between the two characters in a whimsical, animated setting, emphasizing their interactions and emotions.@@@49x512x768:::$ID_TOKEN A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage@@@49x512x768 EOF ) validation_cmd="--validation_prompts \"$validation_prompts\" \ --validation_steps 5 \ --num_validation_videos 1" # Miscellaneous arguments miscellaneous_cmd="--tracker_name finetrainers-ltxv \ --output_dir $OUTPUT_DIR \ --nccl_timeout 1800 \ --report_to wandb" cmd="accelerate launch --config_file $ROOT_DIR/accelerate_configs/uncompiled_2.yaml --gpu_ids $GPU_IDS $ROOT_DIR/train.py \ $model_cmd \ $dataset_cmd \ $dataloader_cmd \ $diffusion_cmd \ $training_cmd \ $optimizer_cmd \ $validation_cmd \ $miscellaneous_cmd" echo "Running command: $cmd" eval $cmd echo "Removing $OUTPUT_DIR" rm -rf $OUTPUT_DIR echo "Removing precomputed stuff in $DATA_ROOT" rm -rf $DATA_ROOT/*_precomputed