kundaja-green commited on
Commit
1f00881
·
1 Parent(s): 747a5ab

update path to DiT

Browse files
Files changed (1) hide show
  1. start.sh +50 -30
start.sh CHANGED
@@ -1,5 +1,6 @@
1
  #!/bin/bash
2
- # --- Final Definitive Startup Script (v18 - Correct Single DiT File) ---
 
3
 
4
  # Exit immediately if a command exits with a non-zero status.
5
  set -e
@@ -8,16 +9,34 @@ echo "--- Startup Script Initialized ---"
8
  echo "--- Models are mounted from two separate repositories. ---"
9
 
10
  # --- Define the mount points for each repository ---
11
- WAN_AI_REPO_DIR="/Wan2.1-I2V-14B-720P"
12
- COMFY_REPO_DIR="/Wan_2.1_ComfyUI_repackaged"
13
  OUTPUT_DIR="/data/output"
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # --- Define the full, correct paths for each model component ---
16
- # Corrected the DiT filename to the exact, non-sharded file from the repo.
17
- DIT_PATH="$COMFY_REPO_DIR/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors"
18
- VAE_PATH="$WAN_AI_REPO_DIR/Wan2.1_VAE.pth"
19
- CLIP_PATH="$WAN_AI_REPO_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
20
- T5_PATH="$WAN_AI_REPO_DIR/models_t5_umt5-xxl-enc-bf16.pth"
21
 
22
  echo "DiT Path: $DIT_PATH"
23
  echo "VAE Path: $VAE_PATH"
@@ -25,34 +44,35 @@ echo "CLIP Path: $CLIP_PATH"
25
  echo "T5 Path: $T5_PATH"
26
  echo "Output Path: $OUTPUT_DIR"
27
 
28
- # Verify that key files from both repositories exist
29
  if [ ! -f "$DIT_PATH" ]; then
30
- echo "CRITICAL ERROR: DiT model not found. Check README.md linking for 'Comfy-Org/Wan_2.1_ComfyUI_repackaged'."
31
  exit 1
32
  fi
 
33
  if [ ! -f "$T5_PATH" ]; then
34
  echo "CRITICAL ERROR: T5 model not found. Check README.md linking for 'Wan-AI/Wan2.1-I2V-14B-720P'."
35
  exit 1
36
  fi
37
 
38
- echo "All model repositories appear to be linked correctly. Starting training..."
39
- # Run the training command with the correct paths from each repository
40
  accelerate launch wan_train_network.py \
41
- --task "i2v-14B" \
42
- --dit "$DIT_PATH" \
43
- --vae "$VAE_PATH" \
44
- --clip "$CLIP_PATH" \
45
- --t5 "$T5_PATH" \
46
- --dataset_config "dataset/testtoml.toml" \
47
- --output_dir "$OUTPUT_DIR" \
48
- --output_name "My_HF_Lora_v1" \
49
- --save_every_n_epochs "10" \
50
- --max_train_epochs "70" \
51
- --network_module "networks.lora_wan" \
52
- --network_dim "32" \
53
- --network_alpha "4" \
54
- --learning_rate "2e-5" \
55
- --optimizer_type "adamw" \
56
- --mixed_precision "bf16" \
57
- --gradient_checkpointing \
58
- --sdpa
 
1
  #!/bin/bash
2
+
3
+ # --- Final Definitive Startup Script (v19 - Corrects Split 720p DiT File) ---
4
 
5
  # Exit immediately if a command exits with a non-zero status.
6
  set -e
 
9
  echo "--- Models are mounted from two separate repositories. ---"
10
 
11
  # --- Define the mount points for each repository ---
12
+ WAN_AI_REPO_DIR="/Wan-AI/Wan2.1-I2V-14B-720P"
13
+ COMFY_REPO_DIR="/Comfy-Org/Wan_2.1_ComfyUI_repackaged"
14
  OUTPUT_DIR="/data/output"
15
 
16
+
17
+ # --- Combine the split DiT model files into one ---
18
+ # Define the path for the final combined file and the pattern for the split parts
19
+ # This targets the 720p model as originally intended.
20
+ COMBINED_DIT_PATH="${COMFY_REPO_DIR}/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors"
21
+ SPLIT_FILES_PATTERN="${COMFY_REPO_DIR}/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors.*"
22
+
23
+ # Check if the combined file already exists. If not, create it.
24
+ if [ ! -f "$COMBINED_DIT_PATH" ]; then
25
+ echo "Combined DiT model not found. Concatenating split files for the 720p model..."
26
+ # Using 'ls -v' to ensure correct numerical order (0, 1, ... 10)
27
+ ls -v ${SPLIT_FILES_PATTERN} | xargs cat > ${COMBINED_DIT_PATH}
28
+ echo "DiT model successfully combined at: ${COMBINED_DIT_PATH}"
29
+ else
30
+ echo "Combined DiT model already exists. Skipping concatenation."
31
+ fi
32
+
33
+
34
  # --- Define the full, correct paths for each model component ---
35
+ # Correct the DIT_PATH to point to the newly combined file
36
+ DIT_PATH="${COMBINED_DIT_PATH}"
37
+ VAE_PATH="${WAN_AI_REPO_DIR}/Wan2.1-VAE.pth"
38
+ CLIP_PATH="${WAN_AI_REPO_DIR}/models/clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
39
+ T5_PATH="${WAN_AI_REPO_DIR}/models/t5_umt5-xxl-enc-bf16.pth"
40
 
41
  echo "DiT Path: $DIT_PATH"
42
  echo "VAE Path: $VAE_PATH"
 
44
  echo "T5 Path: $T5_PATH"
45
  echo "Output Path: $OUTPUT_DIR"
46
 
47
+ # --- Verify that key files from both repositories exist ---
48
  if [ ! -f "$DIT_PATH" ]; then
49
+ echo "CRITICAL ERROR: DiT model not found after attempting to combine. Check README.md linking for 'Comfy-Org/Wan_2.1_ComfyUI_repackaged'."
50
  exit 1
51
  fi
52
+
53
  if [ ! -f "$T5_PATH" ]; then
54
  echo "CRITICAL ERROR: T5 model not found. Check README.md linking for 'Wan-AI/Wan2.1-I2V-14B-720P'."
55
  exit 1
56
  fi
57
 
58
+ echo "--- All model repositories appear to be linked correctly. Starting training... ---"
59
+ # --- Run the training command with the correct paths from each repository ---
60
  accelerate launch wan_train_network.py \
61
+ --task="i2v-lora" \
62
+ --dit="$DIT_PATH" \
63
+ --vae="$VAE_PATH" \
64
+ --clip="$CLIP_PATH" \
65
+ --t5="$T5_PATH" \
66
+ --dataset_config="dataset/test.toml" \
67
+ --output_dir="$OUTPUT_DIR" \
68
+ --output_name="my-I2V-Lora" \
69
+ --ckpt_lora_r_encoder="8" \
70
+ --max_train_epochs="70" \
71
+ --network_module="networks.lora_wan" \
72
+ --network_dim="32" \
73
+ --network_alpha="4" \
74
+ --learning_rate="1e-5" \
75
+ --optimizer_type="adamw" \
76
+ --mixed_precision="bf16" \
77
+ --gradient_checkpointing \
78
+ --sdpa