kundaja-green commited on
Commit
0718f3e
·
1 Parent(s): f5d8bca

Fix: Correct paths in dataset TOML file and git lfs in start

Browse files
Files changed (1) hide show
  1. start.sh +11 -9
start.sh CHANGED
@@ -1,5 +1,5 @@
1
  #!/bin/bash
2
- # --- Final Definitive Startup Script (v23.7 - Re-adds necessary LoRA arguments) ---
3
 
4
  set -e
5
  echo "--- Startup Script Initialized ---"
@@ -15,28 +15,30 @@ VAE_PATH="$MODELS_DIR/Wan2.1_VAE.pth"
15
  CLIP_PATH="$MODELS_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
16
  T5_PATH="$MODELS_DIR/models_t5_umt5-xxl-enc-bf16.pth"
17
 
18
- echo "--- Checking for model files in persistent storage... ---"
19
  if [ ! -f "$DIT_PATH" ]; then
20
- echo "Downloading DiT model..."
21
  huggingface-cli download jujutechnology/WANfortraining wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors --local-dir $MODELS_DIR --local-dir-use-symlinks False
22
  fi
23
  if [ ! -f "$VAE_PATH" ]; then
24
- echo "Downloading VAE model..."
25
  huggingface-cli download jujutechnology/WANfortraining Wan2.1_VAE.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
26
  fi
27
  if [ ! -f "$CLIP_PATH" ]; then
28
- echo "Downloading CLIP model..."
29
  huggingface-cli download jujutechnology/WANfortraining models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
30
  fi
31
  if [ ! -f "$T5_PATH" ]; then
32
- echo "Downloading T5 model..."
33
  huggingface-cli download jujutechnology/WANfortraining models_t5_umt5-xxl-enc-bf16.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
34
  fi
 
35
 
36
- echo "--- All models are present. Starting training... ---"
37
- ls -lh $MODELS_DIR
 
 
 
38
 
39
- # --- Run the training command with CORRECT LoRA arguments ---
 
 
40
  accelerate launch wan_train_network.py \
41
  --task="i2v-14B" \
42
  --dit="$DIT_PATH" \
 
1
  #!/bin/bash
2
+ # --- Final Definitive Startup Script (v24 - Adds git lfs pull to fix num_samples=0) ---
3
 
4
  set -e
5
  echo "--- Startup Script Initialized ---"
 
15
  CLIP_PATH="$MODELS_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
16
  T5_PATH="$MODELS_DIR/models_t5_umt5-xxl-enc-bf16.pth"
17
 
18
+ echo "--- Checking for model files... ---"
19
  if [ ! -f "$DIT_PATH" ]; then
 
20
  huggingface-cli download jujutechnology/WANfortraining wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors --local-dir $MODELS_DIR --local-dir-use-symlinks False
21
  fi
22
  if [ ! -f "$VAE_PATH" ]; then
 
23
  huggingface-cli download jujutechnology/WANfortraining Wan2.1_VAE.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
24
  fi
25
  if [ ! -f "$CLIP_PATH" ]; then
 
26
  huggingface-cli download jujutechnology/WANfortraining models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
27
  fi
28
  if [ ! -f "$T5_PATH" ]; then
 
29
  huggingface-cli download jujutechnology/WANfortraining models_t5_umt5-xxl-enc-bf16.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
30
  fi
31
+ echo "--- Models are present. ---"
32
 
33
+ # --- CRITICAL STEP: Force checkout of Git LFS files in the repo ---
34
+ echo "--- Ensuring all dataset images are fully downloaded (git lfs pull)... ---"
35
+ git lfs pull
36
+ echo "--- LFS checkout complete. Verifying file sizes: ---"
37
+ ls -lh /code/dataset/ebPhotos-001/ # This will now show megabyte-sized files
38
 
39
+ echo "--- Starting training... ---"
40
+
41
+ # --- Run the training command ---
42
  accelerate launch wan_train_network.py \
43
  --task="i2v-14B" \
44
  --dit="$DIT_PATH" \