|
|
|
import torch |
|
import os |
|
import argparse |
|
from tqdm import tqdm |
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer |
|
|
|
def main(source_model_id, output_path): |
|
""" |
|
Loads a Qwen2.5 model, removes all '.bias' tensors, adds placeholder |
|
'q_norm.weight' and 'k_norm.weight' tensors, and saves the result. |
|
This creates an architecturally compatible donor for a Qwen3 merge. |
|
""" |
|
print(f"Loading source donor model: {source_model_id}") |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
source_model_id, |
|
torch_dtype=torch.bfloat16, |
|
device_map="cpu", |
|
trust_remote_code=True |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(source_model_id, trust_remote_code=True) |
|
config = model.config |
|
|
|
source_state_dict = model.state_dict() |
|
new_state_dict = {} |
|
|
|
|
|
print("Removing all '.bias' tensors...") |
|
for name, tensor in tqdm(source_state_dict.items(), desc="Filtering Tensors"): |
|
if not name.endswith(".bias"): |
|
new_state_dict[name] = tensor |
|
|
|
|
|
print("Adding placeholder 'q_norm' and 'k_norm' tensors...") |
|
|
|
|
|
norm_dim = config.hidden_size // config.num_attention_heads |
|
placeholder_norm = torch.ones(norm_dim, dtype=torch.bfloat16) |
|
|
|
for i in tqdm(range(config.num_hidden_layers), desc="Adding Norm Tensors"): |
|
q_norm_name = f"model.layers.{i}.self_attn.q_norm.weight" |
|
k_norm_name = f"model.layers.{i}.self_attn.k_norm.weight" |
|
new_state_dict[q_norm_name] = placeholder_norm.clone() |
|
new_state_dict[k_norm_name] = placeholder_norm.clone() |
|
|
|
|
|
|
|
print("Loading the new state dict back into the model shell...") |
|
model.load_state_dict(new_state_dict, strict=False, assign=True) |
|
|
|
print(f"Saving the architecturally aligned model to: {output_path}") |
|
os.makedirs(output_path, exist_ok=True) |
|
model.save_pretrained(output_path) |
|
tokenizer.save_pretrained(output_path) |
|
|
|
print("\nDonor preparation complete!") |
|
print(f"The aligned donor is ready at '{output_path}'.") |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description="Prepare a Qwen2.5 donor model for merging with Qwen3.") |
|
parser.add_argument("--source_model", type=str, default="Qwen/Qwen2.5-72B-Instruct", help="The Hugging Face model ID of the source model.") |
|
parser.add_argument("--output_path", type=str, required=True, help="The local directory path to save the prepared donor model.") |
|
args = parser.parse_args() |
|
|
|
|
|
main(args.source_model, args.output_path) |
|
|