Spaces:
Paused
Paused
# Copyright 2020-2025 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import torch | |
from huggingface_hub import HfApi | |
from trl.import_utils import is_mergekit_available | |
if is_mergekit_available(): | |
from mergekit.config import MergeConfiguration | |
from mergekit.merge import MergeOptions, run_merge | |
def upload_model_to_hf(folder_path: str, repo_id: str): | |
api = HfApi() | |
# Create the repository if it doesn't exist | |
repo = api.create_repo(repo_id, repo_type="model") | |
# Upload the folder to the specified repository | |
api.upload_folder( | |
folder_path=folder_path, | |
repo_id=repo.repo_id, | |
repo_type=repo.repo_type, | |
) | |
class MergeConfig: | |
r""" | |
Configuration class for merging two models using `mergekit`. | |
This class provides a structured way to configure and generate merge configurations for various merge methods, | |
such as `linear`, `ties`, `dare_ties`, and `slerp`. | |
Args: | |
method (`str`, *optional*, defaults to `"linear"`): | |
Merge method to use. Supported methods include: | |
- `"linear"`: Linearly combines two models with specified weights. | |
- `"ties"`: Combines two models using the TIES method with density parameters. | |
- `"dare_ties"`: A variant of TIES for domain adaptation. | |
- `"slerp"`: Combines models using spherical linear interpolation. | |
Note: | |
For more details about the merge methods and how they are implemented, see the | |
[MergeKit GitHub repository](https://github.com/arcee-ai/mergekit?tab=readme-ov-file#merge-methods). | |
Attributes: | |
method (`str`): The merge method to use. | |
policy_model_path (`str` or `None`): Path to the policy model. | |
target_model_path (`str` or `None`): Path to the target model. | |
policy_model_weight (`float`): Weight for the policy model (for `linear` and `ties` methods). | |
target_model_weight (`float`): Weight for the target model (for `linear` and `ties` methods). | |
policy_model_density (`list[float]`): Density parameters for the policy model (for `ties` and `dare_ties`). | |
target_model_density (`list[float]`): Density parameters for the target model (for `ties` and `dare_ties`). | |
normalize (`float` or `None`): Normalization factor for the TIES method. | |
t_values (`float` or `None`): Interpolation factor for the SLERP method. | |
dtype (`str`): Data type to use for merging, e.g., `"float16"`. | |
""" | |
def __init__(self, method: str = "linear"): | |
if not is_mergekit_available(): | |
raise ImportError("MergeConfig requires the `mergekit` extra. To install, run `pip install mergekit`.") | |
self.method = method | |
self.policy_model_path = None | |
self.target_model_path = None | |
# Initialize relevant parameters based on the method | |
if method == "linear": | |
self.policy_model_weight = 0.5 | |
self.target_model_weight = 0.5 | |
self.dtype = "float16" | |
elif method == "ties": | |
self.policy_model_weight = 1.0 | |
self.policy_model_density = [1.0, 0.7, 0.1] | |
self.target_model_weight = 1.0 | |
self.target_model_density = [1.0] | |
self.normalize = 1.0 | |
self.dtype = "float16" | |
elif method == "dare_ties": | |
self.policy_model_weight = 1.0 | |
self.policy_model_density = [1.0, 0.7, 0.1] | |
self.target_model_weight = 1.0 | |
self.target_model_density = [1.0] | |
self.normalize = 1.0 | |
self.dtype = "float16" | |
elif method == "slerp": | |
self.t_values = 0.5 | |
self.dtype = "float16" | |
else: | |
raise ValueError(f"Unsupported merge method: {method}") | |
def create_merge_config_linear(self) -> "MergeConfiguration": | |
""" | |
Creates a merge configuration for a linear merge of two models with specified weights. | |
""" | |
# Create the merge configuration dictionary | |
merge_config_dict = { | |
"dtype": self.dtype, | |
"merge_method": "linear", | |
"models": [ | |
{"model": self.policy_model_path, "parameters": {"weight": self.policy_model_weight}}, | |
{"model": self.target_model_path, "parameters": {"weight": self.target_model_weight}}, | |
], | |
} | |
# Create the MergeConfiguration from the dictionary | |
merge_config = MergeConfiguration.model_validate(merge_config_dict) | |
return merge_config | |
def create_merge_config_ties(self) -> "MergeConfiguration": | |
""" | |
Creates a merge configuration for a TIES merge of two models, with specified weights and densities. | |
""" | |
# Create the TIES merge configuration dictionary | |
merge_config_dict = { | |
"merge_method": "ties", | |
"slices": None, # Optional slices if needed | |
"models": [ | |
{ | |
"model": { | |
"model": {"path": self.target_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"parameters": {"density": self.target_model_density, "weight": self.target_model_weight}, | |
}, | |
{ | |
"model": { | |
"model": {"path": self.policy_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"parameters": {"density": self.policy_model_density, "weight": self.policy_model_weight}, | |
}, | |
], | |
"parameters": {"normalize": self.normalize}, | |
"base_model": { | |
"model": {"path": self.policy_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"dtype": self.dtype, | |
"tokenizer_source": None, | |
"tokenizer": None, | |
"chat_template": None, | |
"out_dtype": None, | |
} | |
# Create the MergeConfiguration from the dictionary | |
merge_config = MergeConfiguration.model_validate(merge_config_dict) | |
return merge_config | |
def create_merge_config_dare_ties(self) -> "MergeConfiguration": | |
""" | |
Creates a merge configuration for a DARE TIES merge of two models, with specified weights and densities. | |
""" | |
# Create the DARE TIES merge configuration dictionary | |
merge_config_dict = { | |
"merge_method": "dare_ties", | |
"slices": None, # Optional slices if needed | |
"models": [ | |
{ | |
"model": { | |
"model": {"path": self.target_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"parameters": {"density": self.target_model_density, "weight": self.target_model_weight}, | |
}, | |
{ | |
"model": { | |
"model": {"path": self.policy_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"parameters": {"density": self.policy_model_density, "weight": self.policy_model_weight}, | |
}, | |
], | |
"parameters": {"normalize": self.normalize}, | |
"base_model": { | |
"model": {"path": self.policy_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"dtype": self.dtype, | |
"tokenizer_source": None, | |
"tokenizer": None, | |
"chat_template": None, | |
"out_dtype": None, | |
} | |
# Create the MergeConfiguration from the dictionary | |
merge_config = MergeConfiguration.model_validate(merge_config_dict) | |
return merge_config | |
def create_merge_config_slerp(self) -> "MergeConfiguration": | |
""" | |
Creates a merge configuration for a SLERP merge of a model with a base model. | |
""" | |
# Create the SLERP merge configuration dictionary | |
merge_config_dict = { | |
"merge_method": "slerp", | |
"slices": None, # Optional slices if needed | |
"models": [ | |
{ | |
"model": { | |
"model": {"path": self.target_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"parameters": None, # No specific parameters for SLERP model | |
} | |
], | |
"parameters": { | |
"t": self.t_values # Set the t values for SLERP | |
}, | |
"base_model": { | |
"model": {"path": self.policy_model_path, "revision": None}, | |
"lora": None, | |
"override_architecture": None, | |
}, | |
"dtype": self.dtype, | |
"tokenizer_source": None, | |
"tokenizer": None, | |
"chat_template": None, | |
"out_dtype": None, | |
} | |
# Create the MergeConfiguration from the dictionary | |
merge_config = MergeConfiguration.model_validate(merge_config_dict) | |
return merge_config | |
def create(self) -> "MergeConfiguration": | |
if self.method == "linear": | |
return self.create_merge_config_linear() | |
elif self.method == "ties": | |
return self.create_merge_config_ties() | |
elif self.method == "dare_ties": | |
return self.create_merge_config_dare_ties() | |
elif self.method == "slerp": | |
return self.create_merge_config_slerp() | |
def merge_models(config: MergeConfig, out_path: str): | |
""" | |
Merge two models using mergekit | |
Args: | |
config (`MergeConfig`): The merge configuration. | |
out_path (`str`): The output path for the merged model. | |
""" | |
if not is_mergekit_available(): | |
raise ImportError("merge_models requires the `mergekit` extra. To install, run `pip install mergekit`.") | |
run_merge( | |
config, | |
out_path=out_path, | |
options=MergeOptions( | |
cuda=torch.cuda.is_available(), | |
copy_tokenizer=True, | |
lazy_unpickle=False, | |
low_cpu_memory=False, | |
), | |
) | |