Spaces:
Paused
Paused
# Copyright 2020-2025 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import unittest | |
import torch | |
from torch import nn | |
from transformers import AutoModelForCausalLM | |
from transformers.testing_utils import require_peft, require_torch_accelerator, torch_device | |
from transformers.utils import is_peft_available | |
from trl.models.activation_offloading import NoOpManager, OffloadActivations | |
if is_peft_available(): | |
from peft import LoraConfig, get_peft_model | |
class TestActivationOffloading(unittest.TestCase): | |
def test_offloading_with_peft_models(self) -> None: | |
"""Test that activation offloading works with PEFT models.""" | |
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" | |
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) | |
peft_config = LoraConfig( | |
lora_alpha=16, | |
lora_dropout=0.1, | |
r=8, | |
bias="none", | |
task_type="CAUSAL_LM", | |
) | |
model = get_peft_model(model, peft_config) | |
inp = torch.randint(0, 100, (2, 10), device=torch_device) | |
# First forward-backward pass without offloading | |
torch.manual_seed(42) | |
loss = model(inp, labels=inp).loss | |
loss.backward() | |
# Store gradients - only from trainable parameters | |
grads_original = [] | |
for name, param in model.named_parameters(): | |
if param.requires_grad and param.grad is not None: | |
grads_original.append((name, param.grad.clone())) | |
# Reset gradients | |
for p in model.parameters(): | |
if p.grad is not None: | |
p.grad = None | |
# Second forward-backward pass with offloading | |
torch.manual_seed(42) | |
with OffloadActivations(): | |
loss_c = model(inp, labels=inp).loss | |
loss_c.backward() | |
# Compare gradients - only trainable parameters | |
for name_orig, grad_orig in grads_original: | |
for name_param, param in model.named_parameters(): | |
if name_param == name_orig and param.requires_grad and param.grad is not None: | |
self.assertTrue( | |
torch.allclose(grad_orig, param.grad, rtol=1e-4, atol=1e-5), | |
f"Gradient mismatch for {name_orig}", | |
) | |
def test_noop_manager_with_offloading(self): | |
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" | |
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) | |
inp = torch.randint(0, 100, (2, 10), device=torch_device) | |
# Run with offloading but disable for specific section | |
with OffloadActivations(): | |
# First forward-backward with normal offloading | |
torch.manual_seed(42) | |
out1 = model(inp, labels=inp) | |
out1.loss.backward() | |
grads1 = [p.grad.clone() for p in model.parameters()] | |
# Reset grads | |
for p in model.parameters(): | |
p.grad = None | |
# Second forward-backward with NoOpManager | |
with NoOpManager(): | |
torch.manual_seed(42) | |
out2 = model(inp, labels=inp) | |
out2.loss.backward() | |
grads2 = [p.grad.clone() for p in model.parameters()] | |
# Gradients should match as NoOpManager should have prevented offloading | |
for g1, g2 in zip(grads1, grads2): | |
self.assertTrue(torch.allclose(g1, g2, rtol=1e-4, atol=1e-5)) | |
def test_min_offload_size(self): | |
"""Test that tensors smaller than min_offload_size aren't offloaded""" | |
model = nn.Sequential( | |
nn.Linear(5, 5), # Small layer that shouldn't be offloaded | |
nn.Linear(5, 1000), # Large layer that should be offloaded | |
).to(torch_device) | |
inp = torch.randn(2, 5, device=torch_device) | |
with OffloadActivations(min_offload_size=1000): | |
out = model(inp) | |
out.sum().backward() | |
# The test passes if no errors occur, as we're mainly testing | |
# that the logic handles both offloaded and non-offloaded tensors | |
def test_real_hf_model(self): | |
"""Test with an actual HuggingFace model""" | |
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" | |
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) | |
# Create small input | |
inp = torch.randint(0, 100, (2, 10), device=torch_device) | |
# Baseline without offloading | |
torch.manual_seed(42) | |
out1 = model(inp, labels=inp).loss | |
out1.backward() | |
grads1 = [p.grad.clone() for p in model.parameters()] | |
# Reset grads | |
for p in model.parameters(): | |
p.grad = None | |
# With offloading | |
with OffloadActivations(): | |
torch.manual_seed(42) | |
out2 = model(inp, labels=inp).loss | |
out2.backward() | |
grads2 = [p.grad.clone() for p in model.parameters()] | |
# Check outputs and gradients match | |
self.assertTrue(torch.allclose(out1, out2, rtol=1e-5)) | |
for g1, g2 in zip(grads1, grads2): | |
self.assertTrue(torch.allclose(g1, g2, rtol=1e-5)) | |