Spaces:

jujutechnology
/

wanloratrainer-gui

Runtime error

App Files Files Community

kundaja-green commited on 23 days ago

Commit

1f00881

1 Parent(s): 747a5ab

update path to DiT

Browse files

Files changed (1) hide show

start.sh +50 -30

start.sh CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/bin/bash
-# --- Final Definitive Startup Script (v18 - Correct Single DiT File) ---
 # Exit immediately if a command exits with a non-zero status.
 set -e
@@ -8,16 +9,34 @@ echo "--- Startup Script Initialized ---"
 echo "--- Models are mounted from two separate repositories. ---"
 # --- Define the mount points for each repository ---
-WAN_AI_REPO_DIR="/Wan2.1-I2V-14B-720P"
-COMFY_REPO_DIR="/Wan_2.1_ComfyUI_repackaged"
 OUTPUT_DIR="/data/output"
 # --- Define the full, correct paths for each model component ---
-# Corrected the DiT filename to the exact, non-sharded file from the repo.
-DIT_PATH="$COMFY_REPO_DIR/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors"
-VAE_PATH="$WAN_AI_REPO_DIR/Wan2.1_VAE.pth"
-CLIP_PATH="$WAN_AI_REPO_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
-T5_PATH="$WAN_AI_REPO_DIR/models_t5_umt5-xxl-enc-bf16.pth"
 echo "DiT Path: $DIT_PATH"
 echo "VAE Path: $VAE_PATH"
@@ -25,34 +44,35 @@ echo "CLIP Path: $CLIP_PATH"
 echo "T5 Path: $T5_PATH"
 echo "Output Path: $OUTPUT_DIR"
-# Verify that key files from both repositories exist
 if [ ! -f "$DIT_PATH" ]; then
-    echo "CRITICAL ERROR: DiT model not found. Check README.md linking for 'Comfy-Org/Wan_2.1_ComfyUI_repackaged'."
     exit 1
 fi
 if [ ! -f "$T5_PATH" ]; then
     echo "CRITICAL ERROR: T5 model not found. Check README.md linking for 'Wan-AI/Wan2.1-I2V-14B-720P'."
     exit 1
 fi
-echo "All model repositories appear to be linked correctly. Starting training..."
-# Run the training command with the correct paths from each repository
 accelerate launch wan_train_network.py \
-     --task "i2v-14B" \
-     --dit "$DIT_PATH" \
-     --vae "$VAE_PATH" \
-     --clip "$CLIP_PATH" \
-     --t5 "$T5_PATH" \
-     --dataset_config "dataset/testtoml.toml" \
-     --output_dir "$OUTPUT_DIR" \
-     --output_name "My_HF_Lora_v1" \
-     --save_every_n_epochs "10" \
-     --max_train_epochs "70" \
-     --network_module "networks.lora_wan" \
-     --network_dim "32" \
-     --network_alpha "4" \
-     --learning_rate "2e-5" \
-     --optimizer_type "adamw" \
-     --mixed_precision "bf16" \
-     --gradient_checkpointing \
-     --sdpa

 #!/bin/bash
+# --- Final Definitive Startup Script (v19 - Corrects Split 720p DiT File) ---
 # Exit immediately if a command exits with a non-zero status.
 set -e
 echo "--- Models are mounted from two separate repositories. ---"
 # --- Define the mount points for each repository ---
+WAN_AI_REPO_DIR="/Wan-AI/Wan2.1-I2V-14B-720P"
+COMFY_REPO_DIR="/Comfy-Org/Wan_2.1_ComfyUI_repackaged"
 OUTPUT_DIR="/data/output"
+# --- Combine the split DiT model files into one ---
+# Define the path for the final combined file and the pattern for the split parts
+# This targets the 720p model as originally intended.
+COMBINED_DIT_PATH="${COMFY_REPO_DIR}/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors"
+SPLIT_FILES_PATTERN="${COMFY_REPO_DIR}/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors.*"
+# Check if the combined file already exists. If not, create it.
+if [ ! -f "$COMBINED_DIT_PATH" ]; then
+    echo "Combined DiT model not found. Concatenating split files for the 720p model..."
+    # Using 'ls -v' to ensure correct numerical order (0, 1, ... 10)
+    ls -v ${SPLIT_FILES_PATTERN} | xargs cat > ${COMBINED_DIT_PATH}
+    echo "DiT model successfully combined at: ${COMBINED_DIT_PATH}"
+else
+    echo "Combined DiT model already exists. Skipping concatenation."
+fi
 # --- Define the full, correct paths for each model component ---
+# Correct the DIT_PATH to point to the newly combined file
+DIT_PATH="${COMBINED_DIT_PATH}"
+VAE_PATH="${WAN_AI_REPO_DIR}/Wan2.1-VAE.pth"
+CLIP_PATH="${WAN_AI_REPO_DIR}/models/clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
+T5_PATH="${WAN_AI_REPO_DIR}/models/t5_umt5-xxl-enc-bf16.pth"
 echo "DiT Path: $DIT_PATH"
 echo "VAE Path: $VAE_PATH"
 echo "T5 Path: $T5_PATH"
 echo "Output Path: $OUTPUT_DIR"
+# --- Verify that key files from both repositories exist ---
 if [ ! -f "$DIT_PATH" ]; then
+    echo "CRITICAL ERROR: DiT model not found after attempting to combine. Check README.md linking for 'Comfy-Org/Wan_2.1_ComfyUI_repackaged'."
     exit 1
 fi
 if [ ! -f "$T5_PATH" ]; then
     echo "CRITICAL ERROR: T5 model not found. Check README.md linking for 'Wan-AI/Wan2.1-I2V-14B-720P'."
     exit 1
 fi
+echo "--- All model repositories appear to be linked correctly. Starting training... ---"
+# --- Run the training command with the correct paths from each repository ---
 accelerate launch wan_train_network.py \
+    --task="i2v-lora" \
+    --dit="$DIT_PATH" \
+    --vae="$VAE_PATH" \
+    --clip="$CLIP_PATH" \
+    --t5="$T5_PATH" \
+    --dataset_config="dataset/test.toml" \
+    --output_dir="$OUTPUT_DIR" \
+    --output_name="my-I2V-Lora" \
+    --ckpt_lora_r_encoder="8" \
+    --max_train_epochs="70" \
+    --network_module="networks.lora_wan" \
+    --network_dim="32" \
+    --network_alpha="4" \
+    --learning_rate="1e-5" \
+    --optimizer_type="adamw" \
+    --mixed_precision="bf16" \
+    --gradient_checkpointing \
+    --sdpa