nunchaku version?

#3
by hugless - opened

anyone knows of a nunchaku version?

since it a nvidia thing i cant but maybe try yourself:

https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/discussions/4#68ed1f6bf4401628c7c8260f

import torch
from diffusers import QwenImageEditPlusPipeline
from diffusers.quantizers import PipelineQuantizationConfig

# ----------------------------
# Settings
# ----------------------------
model_name = "Qwen/Qwen-Image-Edit-2509"
save_dir = "./qwen_image_edit_lora_quantized"
dtype = torch.bfloat16

# ----------------------------
# Load the base pipeline
# ----------------------------
pipeline = QwenImageEditPlusPipeline.from_pretrained(
    model_name,
    torch_dtype=dtype
)
print("Pipeline loaded.")

# ----------------------------
# Load multiple LoRAs
# ----------------------------
# The format is (file_path, alpha)
# alpha = strength / influence of the LoRA
lora_files = [
    ("./loras/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-fp32.safetensors", 0.1),
    ("./loras/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-fp32.safetensors", 0.5),
    ("./loras/Qwen-Image-Lightning-8steps-V2.0.safetensors", 0.1),
    ("./loras/Qwen-Image-Lightning-4steps-V2.0.safetensors", 0.3),
    ("./loras/qwen_snofs.safetensors", 1.0),
    ("./loras/qwen_image_snapchat.safetensors", 1.0),
    ("./loras/qwen_MCNL_v1.0.safetensors", 0.85),
    ("./loras/Qwen4Play_v1.safetensors", 0.6),
    ("./loras/CockQwen-v3.safetensors", 0.25),
    ("./loras/Skin Fix Qwen.safetensors", 0.2),
]

# Load LoRAs one by one
for lora_path, alpha in lora_files:
    # Applying LoRA weights to the pipeline
    pipeline.load_lora_weights(lora_path, alpha=alpha)
print("All LoRAs loaded.")

# ----------------------------
# Quantization config
# ----------------------------
quant_config = PipelineQuantizationConfig(
    quant_backend="bitsandbytes_4bit",    
    quant_kwargs={
        "load_in_4bit": True,
        "bnb_4bit_compute_dtype": dtype,
        "bnb_4bit_quant_type": "nf4",
        "bnb_4bit_quant_storage": "uint8",
        "bnb_4bit_use_double_quant": False,
        "llm_int8_enable_fp32_cpu_offload": False,
        "llm_int8_has_fp16_weight": False,
        "llm_int8_skip_modules": [
            "transformer_blocks.0.img_mod",
            "transformer_blocks.33.img_mod",
            "transformer_blocks.58.attn.to_k",
            "transformer_blocks.59.attn.to_out",
            "time_text_embed",
            "img_in",
            "txt_in",

            "transformer_blocks.0.img_mod.1",
            "transformer_blocks.0.attn.to_q",
            "transformer_blocks.0.attn.to_k",
            "transformer_blocks.0.attn.to_v",
            "transformer_blocks.0.attn.add_k_proj",
            "transformer_blocks.0.attn.add_v_proj",
            "transformer_blocks.0.attn.add_q_proj",
            "transformer_blocks.0.attn.to_out.0",
            "transformer_blocks.0.attn.to_add_out",
            "transformer_blocks.0.img_mlp.net.0.proj",
            "transformer_blocks.0.img_mlp.net.2",
            "transformer_blocks.0.txt_mod.1",
            "transformer_blocks.0.txt_mlp.net.0.proj",
            "transformer_blocks.0.txt_mlp.net.2",
            "transformer_blocks.59.img_mod.1",
            "transformer_blocks.59.attn.to_q",
            "transformer_blocks.59.attn.to_k",
            "transformer_blocks.59.attn.to_v",
            "transformer_blocks.59.attn.add_k_proj",
            "transformer_blocks.59.attn.add_v_proj",
            "transformer_blocks.59.attn.add_q_proj",
            "transformer_blocks.59.attn.to_out.0",
            "transformer_blocks.59.attn.to_add_out",
            "transformer_blocks.59.img_mlp.net.0.proj",
            "transformer_blocks.59.img_mlp.net.2",
            "transformer_blocks.59.txt_mod.1",
            "transformer_blocks.59.txt_mlp.net.0.proj",
            "transformer_blocks.59.txt_mlp.net.2",
            "norm_out.linear",
            "proj_out"
        ],
    },     
    components_to_quantize=["transformer", "text_encoder"]
)

# ----------------------------
# Quantize the pipeline
# ----------------------------
# This will apply 4-bit NF4 quantization to the selected modules
pipeline.quantize(quant_config)
print("Pipeline quantized.")

# ----------------------------
# Optional: CPU offload for large models
# ----------------------------
pipeline.enable_model_cpu_offload()

# ----------------------------
# Save the quantized pipeline with LoRAs applied
# ----------------------------
pipeline.save_pretrained(save_dir)
print(f"Pipeline + LoRAs quantized and saved at {save_dir}")

maybe something like this will work but cant test should produce v5 for v5.1 you have to check Pr00ts original safetensors, there is the actual loras and weights in clear text

or just load a qwen nf4 version and add loras:

v5
<lora:ImageEdit\\Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors:0.1:0>
<lora:ImageEdit\\Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors:0.5:0>
<lora:ImageEdit\\Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors:0.1:0>
<lora:ImageEdit\\Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors:0.3:0>
<lora:ImageEdit\\qwen_snofs.safetensors:1:0>
<lora:ImageEdit\\qwen_image_snapchat.safetensors:1:0>
<lora:ImageEdit\\qwen_MCNL_v1.0.safetensors:0.85:0>
<lora:ImageEdit\\Qwen4Play_v1.safetensors:0.6:0>
<lora:ImageEdit\\CockQwen-v3.safetensors:0.25:0>
<lora:ImageEdit\\Skin Fix Qwen.safetensors:0.2:0>

v5.1
<lora:ImageEdit\\Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors:0.5:0>
<lora:ImageEdit\\Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors:0.5:0>
<lora:ImageEdit\\Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors:0:0>
<lora:ImageEdit\\Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors:0:0>
<lora:ImageEdit\\Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-bf16.safetensors:0:0>
<lora:ImageEdit\\snofs-v11.safetensors:1:0>
<lora:ImageEdit\\qwen_image_snapchat.safetensors:1:0>
<lora:ImageEdit\\qwen_MCNL_v1.0.safetensors:0.85:0>
<lora:ImageEdit\\Qwen4Play_v2.safetensors:0.7:0>
<lora:ImageEdit\\CockQwen-v3.safetensors:0.25:0>
<lora:ImageEdit\\Skin Fix Qwen.safetensors:0:0:0>

I might try with the LORAs first, there is no official support for them yet, but there is a PR that i managed to make it work, the only limitation is you need to have cpu offload off. Thanks!!

Sign up or log in to comment