Spaces:
Paused
Paused
# Copyright 2020-2025 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import tempfile | |
import unittest | |
import torch | |
from datasets import load_dataset | |
from parameterized import parameterized | |
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer | |
from transformers.testing_utils import require_peft | |
from trl import ORPOConfig, ORPOTrainer | |
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE | |
class ORPOTrainerTester(unittest.TestCase): | |
def setUp(self): | |
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" | |
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) | |
self.tokenizer.pad_token = self.tokenizer.eos_token | |
# get t5 as seq2seq example: | |
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration" | |
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) | |
self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE | |
def test_orpo_trainer(self, name, config_name): | |
with tempfile.TemporaryDirectory() as tmp_dir: | |
training_args = ORPOConfig( | |
output_dir=tmp_dir, | |
per_device_train_batch_size=2, | |
max_steps=3, | |
remove_unused_columns=False, | |
gradient_accumulation_steps=1, | |
learning_rate=9e-1, | |
eval_strategy="steps", | |
beta=0.1, | |
report_to="none", | |
) | |
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) | |
if name == "qwen": | |
model = self.model | |
tokenizer = self.tokenizer | |
elif name == "t5": | |
model = self.t5_model | |
tokenizer = self.t5_tokenizer | |
training_args.is_encoder_decoder = True | |
trainer = ORPOTrainer( | |
model=model, | |
args=training_args, | |
processing_class=tokenizer, | |
train_dataset=dummy_dataset["train"], | |
eval_dataset=dummy_dataset["test"], | |
) | |
previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} | |
trainer.train() | |
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"]) | |
# Check that the parameters have changed | |
for n, param in previous_trainable_params.items(): | |
new_param = trainer.model.get_parameter(n) | |
if param.sum() != 0: # ignore 0 biases | |
self.assertFalse(torch.equal(param, new_param)) | |
def test_orpo_trainer_with_lora(self, config_name): | |
from peft import LoraConfig | |
lora_config = LoraConfig( | |
r=16, | |
lora_alpha=32, | |
lora_dropout=0.05, | |
bias="none", | |
task_type="CAUSAL_LM", | |
) | |
with tempfile.TemporaryDirectory() as tmp_dir: | |
training_args = ORPOConfig( | |
output_dir=tmp_dir, | |
per_device_train_batch_size=2, | |
max_steps=3, | |
remove_unused_columns=False, | |
gradient_accumulation_steps=4, | |
learning_rate=9e-1, | |
eval_strategy="steps", | |
beta=0.1, | |
report_to="none", | |
) | |
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) | |
trainer = ORPOTrainer( | |
model=self.model, | |
args=training_args, | |
processing_class=self.tokenizer, | |
train_dataset=dummy_dataset["train"], | |
eval_dataset=dummy_dataset["test"], | |
peft_config=lora_config, | |
) | |
previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} | |
trainer.train() | |
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"]) | |
# Check that the parameters have changed | |
for n, param in previous_trainable_params.items(): | |
if "lora" in n: | |
new_param = trainer.model.get_parameter(n) | |
if param.sum() != 0: # ignore 0 biases | |
self.assertFalse(torch.equal(param, new_param)) | |
def test_compute_metrics(self): | |
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") | |
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") | |
tokenizer.pad_token = tokenizer.eos_token | |
dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") | |
def dummy_compute_metrics(*args, **kwargs): | |
return {"test": 0.0} | |
with tempfile.TemporaryDirectory() as tmp_dir: | |
training_args = ORPOConfig( | |
output_dir=tmp_dir, | |
remove_unused_columns=False, | |
per_device_train_batch_size=2, | |
do_eval=True, | |
eval_strategy="steps", | |
eval_steps=1, | |
per_device_eval_batch_size=2, | |
report_to="none", | |
) | |
trainer = ORPOTrainer( | |
model=model, | |
args=training_args, | |
processing_class=tokenizer, | |
train_dataset=dummy_dataset["train"], | |
eval_dataset=dummy_dataset["test"], | |
compute_metrics=dummy_compute_metrics, | |
) | |
trainer.train() | |
self.assertEqual(trainer.state.log_history[-2]["eval_test"], 0.0) | |