Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
1316 lines
59 KiB
Python
1316 lines
59 KiB
Python
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import gc
|
|
import importlib.metadata
|
|
import json
|
|
import os
|
|
import re
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from datasets import Dataset, DatasetDict
|
|
from huggingface_hub import hf_hub_download
|
|
from packaging import version
|
|
from torch import nn
|
|
|
|
from transformers import (
|
|
AutoConfig,
|
|
AutoModel,
|
|
AutoModelForCausalLM,
|
|
AutoModelForSequenceClassification,
|
|
AutoTokenizer,
|
|
BitsAndBytesConfig,
|
|
OPTForCausalLM,
|
|
Trainer,
|
|
TrainingArguments,
|
|
logging,
|
|
)
|
|
from transformers.testing_utils import (
|
|
CaptureLogger,
|
|
require_bitsandbytes,
|
|
require_peft,
|
|
require_torch,
|
|
require_torch_accelerator,
|
|
slow,
|
|
torch_device,
|
|
)
|
|
from transformers.utils import ADAPTER_CONFIG_NAME, check_torch_load_is_safe, is_torch_available
|
|
|
|
|
|
if is_torch_available():
|
|
import torch
|
|
|
|
|
|
@require_peft
|
|
@require_torch
|
|
class PeftTesterMixin:
|
|
peft_test_model_ids = ("peft-internal-testing/tiny-OPTForCausalLM-lora",)
|
|
transformers_test_model_ids = ("hf-internal-testing/tiny-random-OPTForCausalLM",)
|
|
transformers_test_model_classes = (AutoModelForCausalLM, OPTForCausalLM)
|
|
|
|
|
|
# TODO: run it with CI after PEFT release.
|
|
@slow
|
|
class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin):
|
|
"""
|
|
A testing suite that makes sure that the PeftModel class is correctly integrated into the transformers library.
|
|
"""
|
|
|
|
def _check_lora_correctly_converted(self, model):
|
|
"""
|
|
Utility method to check if the model has correctly adapters injected on it.
|
|
"""
|
|
from peft.tuners.tuners_utils import BaseTunerLayer
|
|
|
|
is_peft_loaded = False
|
|
|
|
for _, m in model.named_modules():
|
|
if isinstance(m, BaseTunerLayer):
|
|
is_peft_loaded = True
|
|
break
|
|
|
|
return is_peft_loaded
|
|
|
|
def _get_bnb_4bit_config(self):
|
|
return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")
|
|
|
|
def _get_bnb_8bit_config(self):
|
|
return BitsAndBytesConfig(load_in_8bit=True)
|
|
|
|
def test_peft_from_pretrained(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model through `from_pretrained`.
|
|
This checks if we pass a remote folder that contains an adapter config and adapter weights, it
|
|
should correctly load a model that has adapters injected on it.
|
|
"""
|
|
logger = logging.get_logger("transformers.integrations.peft")
|
|
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
with CaptureLogger(logger) as cl:
|
|
peft_model = transformers_class.from_pretrained(model_id, use_safetensors=False).to(torch_device)
|
|
# ensure that under normal circumstances, there are no warnings about keys
|
|
self.assertNotIn("unexpected keys", cl.out)
|
|
self.assertNotIn("missing keys", cl.out)
|
|
|
|
self.assertTrue(self._check_lora_correctly_converted(peft_model))
|
|
self.assertTrue(peft_model._hf_peft_config_loaded)
|
|
# dummy generation
|
|
_ = peft_model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
|
|
|
|
def test_peft_state_dict(self):
|
|
"""
|
|
Simple test that checks if the returned state dict of `get_adapter_state_dict()` method contains
|
|
the expected keys.
|
|
"""
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
peft_model = transformers_class.from_pretrained(model_id, revision="refs/pr/2").to(torch_device)
|
|
|
|
state_dict = peft_model.get_adapter_state_dict()
|
|
|
|
for key in state_dict:
|
|
self.assertTrue("lora" in key)
|
|
|
|
def test_peft_save_pretrained(self):
|
|
"""
|
|
Test that checks various combinations of `save_pretrained` with a model that has adapters loaded
|
|
on it. This checks if the saved model contains the expected files (adapter weights and adapter config).
|
|
"""
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
peft_model = transformers_class.from_pretrained(model_id, revision="refs/pr/2").to(torch_device)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
|
|
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
|
|
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
|
|
|
|
self.assertTrue("config.json" not in os.listdir(tmpdirname))
|
|
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
|
|
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
|
|
|
|
peft_model = transformers_class.from_pretrained(tmpdirname).to(torch_device)
|
|
self.assertTrue(self._check_lora_correctly_converted(peft_model))
|
|
|
|
def test_peft_enable_disable_adapters(self):
|
|
"""
|
|
A test that checks if `enable_adapters` and `disable_adapters` methods work as expected.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
peft_model.add_adapter(peft_config)
|
|
|
|
peft_logits = peft_model(dummy_input).logits
|
|
|
|
peft_model.disable_adapters()
|
|
|
|
peft_logits_disabled = peft_model(dummy_input).logits
|
|
|
|
peft_model.enable_adapters()
|
|
|
|
peft_logits_enabled = peft_model(dummy_input).logits
|
|
|
|
torch.testing.assert_close(peft_logits, peft_logits_enabled, rtol=1e-12, atol=1e-12)
|
|
self.assertFalse(torch.allclose(peft_logits_enabled, peft_logits_disabled, atol=1e-12, rtol=1e-12))
|
|
|
|
def test_peft_add_adapter(self):
|
|
"""
|
|
Simple test that tests if `add_adapter` works as expected
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
model.add_adapter(peft_config)
|
|
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
# dummy generation
|
|
_ = model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
|
|
|
|
def test_peft_add_adapter_from_pretrained(self):
|
|
"""
|
|
Simple test that tests if `add_adapter` works as expected
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
model.add_adapter(peft_config)
|
|
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
model.save_pretrained(tmpdirname)
|
|
model_from_pretrained = transformers_class.from_pretrained(tmpdirname).to(torch_device)
|
|
self.assertTrue(self._check_lora_correctly_converted(model_from_pretrained))
|
|
|
|
def test_peft_save_reload_preserves_adapter_weights(self):
|
|
"""
|
|
Regression test: after save_pretrained + from_pretrained roundtrip, the reloaded model's LoRA
|
|
weights must match the pre-save values. Covers both the encoder and decoder paths.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
cases = [
|
|
(AutoModel, "hf-internal-testing/tiny-random-BertModel"),
|
|
(AutoModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM"),
|
|
]
|
|
sentinel_a, sentinel_b = 0.0234, 0.0567
|
|
|
|
for auto_class, model_id in cases:
|
|
with self.subTest(model=model_id):
|
|
model = auto_class.from_pretrained(model_id).to(torch_device)
|
|
model.add_adapter(LoraConfig(init_lora_weights=False, r=8))
|
|
|
|
with torch.no_grad():
|
|
for name, p in model.named_parameters():
|
|
if "lora_A" in name:
|
|
p.fill_(sentinel_a)
|
|
elif "lora_B" in name:
|
|
p.fill_(sentinel_b)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
model.save_pretrained(tmpdirname)
|
|
reloaded = auto_class.from_pretrained(tmpdirname).to(torch_device)
|
|
|
|
lora_params = {
|
|
name: p for name, p in reloaded.named_parameters() if "lora_A" in name or "lora_B" in name
|
|
}
|
|
self.assertTrue(lora_params, "no LoRA parameters found on reloaded model")
|
|
for name, p in lora_params.items():
|
|
expected = sentinel_a if "lora_A" in name else sentinel_b
|
|
self.assertTrue(
|
|
torch.allclose(p, torch.full_like(p, expected)),
|
|
f"adapter weight {name} was not restored from the checkpoint "
|
|
f"(expected uniform {expected}, got first values {p.flatten()[:4].tolist()})",
|
|
)
|
|
|
|
def test_peft_load_adapter_non_moe_conversion_mapped_model(self):
|
|
"""
|
|
Regression test for a `KeyError` in `_convert_peft_config_moe` when the base model's `model_type`
|
|
appears in `_MODEL_TO_CONVERSION_PATTERN` (used for legacy checkpoint key renaming) but not in
|
|
`_MOE_TARGET_MODULE_MAPPING` (which only has MoE architectures). Affected types include
|
|
`qwen2_5_vl`, `paligemma`, `gemma3`, `internvl`, `aya_vision`, `got_ocr2`, and `rt_detr_v2`.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
model_id = "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration"
|
|
model = AutoModel.from_pretrained(model_id).to(torch_device)
|
|
model.add_adapter(
|
|
LoraConfig(
|
|
r=4,
|
|
lora_alpha=4,
|
|
target_modules=["q_proj", "v_proj"],
|
|
task_type="FEATURE_EXTRACTION",
|
|
)
|
|
)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
model.save_pretrained(tmpdirname)
|
|
reloaded = AutoModel.from_pretrained(tmpdirname).to(torch_device)
|
|
|
|
self.assertTrue(self._check_lora_correctly_converted(reloaded))
|
|
|
|
def test_peft_add_adapter_modules_to_save(self):
|
|
"""
|
|
Simple test that tests if `add_adapter` works as expected when training with
|
|
modules to save.
|
|
"""
|
|
from peft import LoraConfig
|
|
from peft.utils import ModulesToSaveWrapper
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
|
|
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
peft_config = LoraConfig(init_lora_weights=False, modules_to_save=["lm_head"])
|
|
model.add_adapter(peft_config)
|
|
self._check_lora_correctly_converted(model)
|
|
|
|
_has_modules_to_save_wrapper = False
|
|
for name, module in model.named_modules():
|
|
if isinstance(module, ModulesToSaveWrapper):
|
|
_has_modules_to_save_wrapper = True
|
|
self.assertTrue(module.modules_to_save.default.weight.requires_grad)
|
|
self.assertTrue("lm_head" in name)
|
|
break
|
|
|
|
self.assertTrue(_has_modules_to_save_wrapper)
|
|
state_dict = model.get_adapter_state_dict()
|
|
|
|
self.assertTrue("lm_head.weight" in state_dict)
|
|
|
|
logits = model(dummy_input).logits
|
|
loss = logits.mean()
|
|
loss.backward()
|
|
|
|
for _, param in model.named_parameters():
|
|
if param.requires_grad:
|
|
self.assertTrue(param.grad is not None)
|
|
|
|
def test_peft_add_adapter_training_gradient_checkpointing(self):
|
|
"""
|
|
Simple test that tests if `add_adapter` works as expected when training with
|
|
gradient checkpointing.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
model.add_adapter(peft_config)
|
|
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
# When attaching adapters the input embeddings will stay frozen, this will
|
|
# lead to the output embedding having requires_grad=False.
|
|
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
|
|
frozen_output = model.get_input_embeddings()(dummy_input)
|
|
self.assertTrue(frozen_output.requires_grad is False)
|
|
|
|
model.gradient_checkpointing_enable()
|
|
|
|
# Since here we attached the hook, the input should have requires_grad to set
|
|
# properly
|
|
non_frozen_output = model.get_input_embeddings()(dummy_input)
|
|
self.assertTrue(non_frozen_output.requires_grad is True)
|
|
|
|
# To repro the Trainer issue
|
|
dummy_input.requires_grad = False
|
|
|
|
for name, param in model.named_parameters():
|
|
if "lora" in name.lower():
|
|
self.assertTrue(param.requires_grad)
|
|
|
|
logits = model(dummy_input).logits
|
|
loss = logits.mean()
|
|
loss.backward()
|
|
|
|
for name, param in model.named_parameters():
|
|
if param.requires_grad:
|
|
self.assertTrue("lora" in name.lower())
|
|
self.assertTrue(param.grad is not None)
|
|
|
|
def test_peft_add_multi_adapter(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model through `from_pretrained`. This test tests if
|
|
add_adapter works as expected in multi-adapter setting.
|
|
"""
|
|
from peft import LoraConfig
|
|
from peft.tuners.tuners_utils import BaseTunerLayer
|
|
|
|
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
is_peft_loaded = False
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
logits_original_model = model(dummy_input).logits
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
model.add_adapter(peft_config)
|
|
|
|
logits_adapter_1 = model(dummy_input)
|
|
|
|
model.add_adapter(peft_config, adapter_name="adapter-2")
|
|
|
|
logits_adapter_2 = model(dummy_input)
|
|
|
|
for _, m in model.named_modules():
|
|
if isinstance(m, BaseTunerLayer):
|
|
is_peft_loaded = True
|
|
break
|
|
|
|
self.assertTrue(is_peft_loaded)
|
|
|
|
# dummy generation
|
|
_ = model.generate(input_ids=dummy_input)
|
|
|
|
model.set_adapter("default")
|
|
self.assertTrue(model.active_adapters() == ["default"])
|
|
|
|
model.set_adapter("adapter-2")
|
|
self.assertTrue(model.active_adapters() == ["adapter-2"])
|
|
|
|
# Logits comparison
|
|
self.assertFalse(
|
|
torch.allclose(logits_adapter_1.logits, logits_adapter_2.logits, atol=1e-6, rtol=1e-6)
|
|
)
|
|
self.assertFalse(torch.allclose(logits_original_model, logits_adapter_2.logits, atol=1e-6, rtol=1e-6))
|
|
|
|
model.set_adapter(["adapter-2", "default"])
|
|
self.assertTrue(model.active_adapters() == ["adapter-2", "default"])
|
|
|
|
logits_adapter_mixed = model(dummy_input)
|
|
self.assertFalse(
|
|
torch.allclose(logits_adapter_1.logits, logits_adapter_mixed.logits, atol=1e-6, rtol=1e-6)
|
|
)
|
|
self.assertFalse(
|
|
torch.allclose(logits_adapter_2.logits, logits_adapter_mixed.logits, atol=1e-6, rtol=1e-6)
|
|
)
|
|
|
|
# multi active adapter saving not supported
|
|
with self.assertRaises(ValueError), tempfile.TemporaryDirectory() as tmpdirname:
|
|
model.save_pretrained(tmpdirname)
|
|
|
|
def test_delete_adapter(self):
|
|
"""
|
|
Enhanced test for `delete_adapter` to handle multiple adapters,
|
|
edge cases, and proper error handling.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
# Add multiple adapters
|
|
peft_config_1 = LoraConfig(init_lora_weights=False)
|
|
peft_config_2 = LoraConfig(init_lora_weights=False)
|
|
model.add_adapter(peft_config_1, adapter_name="adapter_1")
|
|
model.add_adapter(peft_config_2, adapter_name="adapter_2")
|
|
|
|
# Ensure adapters were added
|
|
self.assertIn("adapter_1", model.peft_config)
|
|
self.assertIn("adapter_2", model.peft_config)
|
|
|
|
# Delete a single adapter
|
|
model.delete_adapter("adapter_1")
|
|
self.assertNotIn("adapter_1", model.peft_config)
|
|
self.assertIn("adapter_2", model.peft_config)
|
|
|
|
# Delete remaining adapter
|
|
model.delete_adapter("adapter_2")
|
|
self.assertFalse(hasattr(model, "peft_config"))
|
|
self.assertFalse(model._hf_peft_config_loaded)
|
|
|
|
# Re-add adapters for edge case tests
|
|
model.add_adapter(peft_config_1, adapter_name="adapter_1")
|
|
model.add_adapter(peft_config_2, adapter_name="adapter_2")
|
|
|
|
# Attempt to delete multiple adapters at once
|
|
model.delete_adapter(["adapter_1", "adapter_2"])
|
|
self.assertFalse(hasattr(model, "peft_config"))
|
|
self.assertFalse(model._hf_peft_config_loaded)
|
|
|
|
# Test edge cases
|
|
msg = re.escape("No adapter loaded. Please load an adapter first.")
|
|
with self.assertRaisesRegex(ValueError, msg):
|
|
model.delete_adapter("nonexistent_adapter")
|
|
|
|
model.add_adapter(peft_config_1, adapter_name="adapter_1")
|
|
|
|
with self.assertRaisesRegex(ValueError, "The following adapter\\(s\\) are not present"):
|
|
model.delete_adapter("nonexistent_adapter")
|
|
|
|
with self.assertRaisesRegex(ValueError, "The following adapter\\(s\\) are not present"):
|
|
model.delete_adapter(["adapter_1", "nonexistent_adapter"])
|
|
|
|
# Deleting with an empty list or None should not raise errors
|
|
model.add_adapter(peft_config_2, adapter_name="adapter_2")
|
|
model.delete_adapter([]) # No-op
|
|
self.assertIn("adapter_1", model.peft_config)
|
|
self.assertIn("adapter_2", model.peft_config)
|
|
|
|
# Deleting duplicate adapter names in the list
|
|
model.delete_adapter(["adapter_1", "adapter_1"])
|
|
self.assertNotIn("adapter_1", model.peft_config)
|
|
self.assertIn("adapter_2", model.peft_config)
|
|
|
|
def test_delete_adapter_with_modules_to_save(self):
|
|
"""
|
|
Ensure that modules_to_save is accounted for when deleting an adapter.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
# the test assumes a specific model architecture, so only test this one:
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
peft_config = LoraConfig(init_lora_weights=False, modules_to_save=["lm_head"])
|
|
model.add_adapter(peft_config, adapter_name="adapter_1")
|
|
|
|
# sanity checks
|
|
self.assertIn("adapter_1", model.peft_config)
|
|
self.assertNotIsInstance(model.lm_head, nn.Linear) # a ModulesToSaveWrapper
|
|
self.assertTrue(hasattr(model.lm_head, "modules_to_save"))
|
|
self.assertTrue("adapter_1" in model.lm_head.modules_to_save)
|
|
|
|
# now delete the adapter
|
|
model.delete_adapter("adapter_1")
|
|
self.assertFalse(hasattr(model, "peft_config"))
|
|
self.assertFalse("adapter_1" in model.lm_head.modules_to_save)
|
|
self.assertFalse(model.lm_head.modules_to_save) # i.e. empty ModuleDict
|
|
|
|
@require_torch_accelerator
|
|
@require_bitsandbytes
|
|
def test_peft_from_pretrained_kwargs(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model through `from_pretrained` + additional kwargs
|
|
and see if the integraiton behaves as expected.
|
|
"""
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
bnb_config = self._get_bnb_8bit_config()
|
|
peft_model = transformers_class.from_pretrained(
|
|
model_id, device_map="auto", quantization_config=bnb_config
|
|
)
|
|
|
|
module = peft_model.model.decoder.layers[0].self_attn.v_proj
|
|
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
|
|
|
|
# dummy generation
|
|
_ = peft_model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
|
|
|
|
@require_torch_accelerator
|
|
@require_bitsandbytes
|
|
def test_peft_save_quantized(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model save_pretrained with quantized base models
|
|
"""
|
|
# 4bit
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
bnb_config = self._get_bnb_4bit_config()
|
|
peft_model = transformers_class.from_pretrained(
|
|
model_id, device_map="auto", quantization_config=bnb_config
|
|
)
|
|
|
|
module = peft_model.model.decoder.layers[0].self_attn.v_proj
|
|
self.assertTrue(module.__class__.__name__ == "Linear4bit")
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
|
|
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
|
|
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
|
|
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
|
|
|
|
# 8-bit
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
bnb_config = self._get_bnb_8bit_config()
|
|
peft_model = transformers_class.from_pretrained(
|
|
model_id, device_map="auto", quantization_config=bnb_config
|
|
)
|
|
|
|
module = peft_model.model.decoder.layers[0].self_attn.v_proj
|
|
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
|
|
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
|
|
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
|
|
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
|
|
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
|
|
|
|
@require_torch_accelerator
|
|
@require_bitsandbytes
|
|
def test_peft_save_quantized_regression(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model save_pretrained with quantized base models
|
|
"""
|
|
# 4bit
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
bnb_config = self._get_bnb_4bit_config()
|
|
peft_model = transformers_class.from_pretrained(
|
|
model_id, device_map="auto", quantization_config=bnb_config
|
|
)
|
|
|
|
module = peft_model.model.decoder.layers[0].self_attn.v_proj
|
|
self.assertTrue(module.__class__.__name__ == "Linear4bit")
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
|
|
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
|
|
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
|
|
|
|
# 8-bit
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
bnb_config = self._get_bnb_8bit_config()
|
|
peft_model = transformers_class.from_pretrained(
|
|
model_id, device_map="auto", quantization_config=bnb_config
|
|
)
|
|
|
|
module = peft_model.model.decoder.layers[0].self_attn.v_proj
|
|
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
|
|
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
|
|
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
|
|
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
|
|
|
|
def test_peft_pipeline(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model + pipeline
|
|
"""
|
|
from transformers import pipeline
|
|
|
|
for adapter_id, base_model_id in zip(self.peft_test_model_ids, self.transformers_test_model_ids):
|
|
peft_pipe = pipeline("text-generation", adapter_id)
|
|
base_pipe = pipeline("text-generation", base_model_id)
|
|
peft_params = list(peft_pipe.model.parameters())
|
|
base_params = list(base_pipe.model.parameters())
|
|
self.assertNotEqual(len(peft_params), len(base_params)) # Assert we actually loaded the adapter too
|
|
_ = peft_pipe("Hello", max_new_tokens=20)
|
|
|
|
def test_peft_add_adapter_with_state_dict(self):
|
|
"""
|
|
Simple test that tests the basic usage of PEFT model through `from_pretrained`. This test tests if
|
|
add_adapter works as expected with a state_dict being passed.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
|
|
|
|
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig(init_lora_weights=False)
|
|
|
|
with self.assertRaises(ValueError):
|
|
model.load_adapter(peft_model_id=None)
|
|
|
|
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
|
|
|
|
check_torch_load_is_safe()
|
|
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
|
|
|
|
model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=peft_config)
|
|
with self.assertRaises(ValueError):
|
|
model.load_adapter(model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=None))
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
# dummy generation
|
|
_ = model.generate(input_ids=dummy_input)
|
|
|
|
def test_peft_add_adapter_with_state_dict_low_cpu_mem_usage(self):
|
|
"""
|
|
Check the usage of low_cpu_mem_usage, which is supported in PEFT >= 0.13.0
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig()
|
|
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
|
|
check_torch_load_is_safe()
|
|
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
|
|
|
|
# this should always work
|
|
model.load_adapter(
|
|
adapter_state_dict=dummy_state_dict, peft_config=peft_config, low_cpu_mem_usage=False
|
|
)
|
|
|
|
model.load_adapter(
|
|
adapter_state_dict=dummy_state_dict,
|
|
adapter_name="other",
|
|
peft_config=peft_config,
|
|
low_cpu_mem_usage=True,
|
|
)
|
|
# after loading, no meta device should be remaining
|
|
self.assertFalse(any((p.device.type == "meta") for p in model.parameters()))
|
|
|
|
def test_peft_load_adapter_warmup_uses_adapter_expected_keys(self):
|
|
"""
|
|
Check that adapter loading only warms up memory for adapter parameters by capturing the device map passed to
|
|
`caching_allocator_warmup`.
|
|
|
|
Note: this test depends on `_load_pretrained_model` calling `caching_allocator_warmup`; if that internal
|
|
contract changes, update the test to keep checking the keys used for warmup sizing.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
import transformers.modeling_utils as modeling_utils
|
|
|
|
adapter_name = "warmup_test_adapter"
|
|
adapter_key_markers = (adapter_name, "lora")
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig()
|
|
template_model = transformers_class.from_pretrained(model_id)
|
|
template_model.add_adapter(LoraConfig(), adapter_name=adapter_name)
|
|
dummy_state_dict = {
|
|
name: torch.zeros_like(param)
|
|
for name, param in template_model.named_parameters()
|
|
if any(marker in name for marker in adapter_key_markers)
|
|
}
|
|
del template_model
|
|
self.assertTrue(dummy_state_dict)
|
|
|
|
captured_device_maps = []
|
|
|
|
def capture_warmup(model, expanded_device_map, hf_quantizer):
|
|
captured_device_maps.append(dict(expanded_device_map))
|
|
|
|
with patch.object(modeling_utils, "caching_allocator_warmup", side_effect=capture_warmup):
|
|
with CaptureLogger(logging.get_logger("transformers.integrations.peft")):
|
|
model.load_adapter(
|
|
adapter_state_dict=dummy_state_dict,
|
|
adapter_name=adapter_name,
|
|
peft_config=peft_config,
|
|
)
|
|
|
|
self.assertTrue(captured_device_maps)
|
|
warmed_keys = set().union(*(device_map.keys() for device_map in captured_device_maps))
|
|
self.assertTrue(warmed_keys)
|
|
|
|
unexpected_base_keys = [
|
|
key for key in warmed_keys if not any(marker in key for marker in adapter_key_markers)
|
|
]
|
|
self.assertEqual(unexpected_base_keys, [])
|
|
|
|
def test_peft_from_pretrained_hub_kwargs(self):
|
|
"""
|
|
Tests different combinations of PEFT model + from_pretrained + hub kwargs
|
|
"""
|
|
peft_model_id = "peft-internal-testing/tiny-opt-lora-revision"
|
|
|
|
# This should not work
|
|
with self.assertRaises(OSError):
|
|
_ = AutoModelForCausalLM.from_pretrained(peft_model_id)
|
|
|
|
# This should work
|
|
adapter_kwargs = {"revision": "test"}
|
|
model = AutoModelForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
# note: always create new adapter_kwargs, avoid the test relying on the previous calls possibly mutating them
|
|
adapter_kwargs = {"revision": "test"}
|
|
model = OPTForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
adapter_kwargs = {"revision": "main", "subfolder": "test_subfolder"}
|
|
model = AutoModelForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
adapter_kwargs = {"revision": "main", "subfolder": "test_subfolder"}
|
|
model = OPTForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
|
|
self.assertTrue(self._check_lora_correctly_converted(model))
|
|
|
|
def test_peft_from_pretrained_unexpected_keys_warning(self):
|
|
"""
|
|
Test for warning when loading a PEFT checkpoint with unexpected keys.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
logger = logging.get_logger("transformers.integrations.peft")
|
|
|
|
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig()
|
|
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
|
|
check_torch_load_is_safe()
|
|
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
|
|
|
|
# add unexpected key
|
|
dummy_state_dict["foobar"] = next(iter(dummy_state_dict.values()))
|
|
|
|
with CaptureLogger(logger) as cl:
|
|
model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=peft_config)
|
|
|
|
msg = "foobar | UNEXPECTED"
|
|
self.assertIn(msg, cl.out)
|
|
|
|
def test_peft_from_pretrained_missing_keys_warning(self):
|
|
"""
|
|
Test for warning when loading a PEFT checkpoint with missing keys.
|
|
"""
|
|
from peft import LoraConfig
|
|
|
|
logger = logging.get_logger("transformers.integrations.peft")
|
|
|
|
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
peft_config = LoraConfig()
|
|
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
|
|
check_torch_load_is_safe()
|
|
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
|
|
|
|
# remove a key so that we have missing keys
|
|
key = next(iter(dummy_state_dict.keys()))
|
|
del dummy_state_dict[key]
|
|
|
|
with CaptureLogger(logger) as cl:
|
|
model.load_adapter(
|
|
adapter_state_dict=dummy_state_dict,
|
|
peft_config=peft_config,
|
|
low_cpu_mem_usage=False,
|
|
adapter_name="other",
|
|
)
|
|
|
|
# Here we need to adjust the key name a bit to account for PEFT-specific naming.
|
|
# 1. Remove PEFT-specific prefix
|
|
peft_prefix = "base_model.model."
|
|
key = key.removeprefix(peft_prefix)
|
|
# 2. Insert adapter name
|
|
prefix, _, suffix = key.rpartition(".")
|
|
key = f".other.{suffix}"
|
|
msg = f"{key} | MISSING |"
|
|
self.assertIn(msg, cl.out)
|
|
|
|
def test_peft_load_adapter_training_inference_mode_true(self):
|
|
"""
|
|
By default, when loading an adapter, the whole model should be in eval mode and no parameter should have
|
|
requires_grad=False.
|
|
"""
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
model = transformers_class.from_pretrained(peft_model.config._name_or_path)
|
|
model.load_adapter(tmpdirname)
|
|
assert not any(m.training for m in model.modules())
|
|
grads = [n for n, p in model.named_parameters() if p.requires_grad]
|
|
assert len(grads) == 0
|
|
del model
|
|
|
|
def test_peft_load_adapter_training_inference_mode_false(self):
|
|
"""
|
|
When passing is_trainable=True, the LoRA modules should be in training mode and their parameters should have
|
|
requires_grad=True.
|
|
"""
|
|
for model_id in self.peft_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
peft_model = transformers_class.from_pretrained(model_id, use_safetensors=False).to(torch_device)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
model = transformers_class.from_pretrained(peft_model.config._name_or_path)
|
|
model.load_adapter(tmpdirname, is_trainable=True)
|
|
|
|
for name, module in model.named_modules():
|
|
if list(module.children()):
|
|
# only check leaf modules
|
|
continue
|
|
|
|
if "lora_" in name:
|
|
assert module.training
|
|
assert all(p.requires_grad for p in module.parameters())
|
|
else:
|
|
assert not module.training
|
|
assert all(not p.requires_grad for p in module.parameters())
|
|
|
|
def test_prefix_tuning_trainer_load_best_model_at_end_error(self):
|
|
# Original issue: https://github.com/huggingface/peft/issues/2256
|
|
# There is a potential error when using load_best_model_at_end=True with a prompt learning PEFT method. This is
|
|
# because Trainer uses load_adapter under the hood but with some prompt learning methods, there is an
|
|
# optimization on the saved model to remove parameters that are not required for inference, which in turn
|
|
# requires a change to the model architecture. This is why load_adapter will fail in such cases and users should
|
|
# instead set load_best_model_at_end=False and use PeftModel.from_pretrained. As this is not obvious, we now
|
|
# intercept the error and add a helpful error message.
|
|
# This test checks this error message. It also tests the "happy path" (i.e. no error) when using LoRA.
|
|
from peft import LoraConfig, PrefixTuningConfig, TaskType, get_peft_model
|
|
|
|
# create a small sequence classification dataset (binary classification)
|
|
dataset = []
|
|
for i, row in enumerate(os.__doc__.splitlines()):
|
|
dataset.append({"text": row, "label": i % 2})
|
|
ds_train = Dataset.from_list(dataset)
|
|
ds_valid = ds_train
|
|
datasets = DatasetDict(
|
|
{
|
|
"train": ds_train,
|
|
"val": ds_valid,
|
|
}
|
|
)
|
|
|
|
# tokenizer for peft-internal-testing/tiny-OPTForCausalLM-lora cannot be loaded, thus using
|
|
# hf-internal-testing/tiny-random-OPTForCausalLM
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left", model_type="opt")
|
|
|
|
def tokenize_function(examples):
|
|
return tokenizer(examples["text"], max_length=128, truncation=True, padding="max_length")
|
|
|
|
tokenized_datasets = datasets.map(tokenize_function, batched=True)
|
|
# lora works, prefix-tuning is expected to raise an error
|
|
peft_configs = {
|
|
"lora": LoraConfig(task_type=TaskType.SEQ_CLS),
|
|
"prefix-tuning": PrefixTuningConfig(
|
|
task_type=TaskType.SEQ_CLS,
|
|
inference_mode=False,
|
|
prefix_projection=True,
|
|
num_virtual_tokens=10,
|
|
),
|
|
}
|
|
|
|
for peft_type, peft_config in peft_configs.items():
|
|
base_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
|
|
base_model.config.pad_token_id = tokenizer.pad_token_id
|
|
peft_model = get_peft_model(base_model, peft_config)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
training_args = TrainingArguments(
|
|
output_dir=tmpdirname,
|
|
num_train_epochs=3,
|
|
eval_strategy="epoch",
|
|
save_strategy="epoch",
|
|
load_best_model_at_end=True,
|
|
)
|
|
trainer = Trainer(
|
|
model=peft_model,
|
|
args=training_args,
|
|
train_dataset=tokenized_datasets["train"],
|
|
eval_dataset=tokenized_datasets["val"],
|
|
)
|
|
|
|
if peft_type == "lora":
|
|
# LoRA works with load_best_model_at_end
|
|
trainer.train()
|
|
else:
|
|
# prefix tuning does not work, but at least users should get a helpful error message
|
|
msg = "When using prompt learning PEFT methods such as PREFIX_TUNING"
|
|
with self.assertRaisesRegex(RuntimeError, msg):
|
|
trainer.train()
|
|
|
|
def test_peft_pipeline_no_warning(self):
|
|
"""
|
|
Test to verify that the warning message "The model 'PeftModel' is not supported for text-generation"
|
|
does not appear when using PeftModel with text-generation pipeline.
|
|
"""
|
|
from peft import PeftModel
|
|
|
|
from transformers import pipeline
|
|
|
|
ADAPTER_PATH = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
|
BASE_PATH = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
|
|
# Input text for testing
|
|
text = "Who is a Elon Musk?"
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
BASE_PATH,
|
|
device_map="auto",
|
|
)
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_PATH)
|
|
|
|
lora_model = PeftModel.from_pretrained(
|
|
model,
|
|
ADAPTER_PATH,
|
|
device_map="auto",
|
|
)
|
|
|
|
# Create pipeline with PEFT model while capturing log output
|
|
# Check that the warning message is not present in the logs
|
|
pipeline_logger = logging.get_logger("transformers.pipelines.base")
|
|
with self.assertNoLogs(pipeline_logger, logging.ERROR):
|
|
lora_generator = pipeline(
|
|
task="text-generation",
|
|
model=lora_model,
|
|
tokenizer=tokenizer,
|
|
max_length=10,
|
|
)
|
|
|
|
# Generate text to verify pipeline works
|
|
_ = lora_generator(text, max_new_tokens=20)
|
|
|
|
def test_non_lora_load_adapter(self):
|
|
"""
|
|
Check that loading a non-LoRA adapter works. Using LoKr as an example, not testing all possible PEFT methods.
|
|
"""
|
|
from peft import LoKrConfig, get_peft_model
|
|
|
|
inputs = torch.randint(0, 100, (1, 10)).to(torch_device)
|
|
atol, rtol = 1e-4, 1e-4
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
with torch.inference_mode():
|
|
output_base = model(inputs).logits
|
|
|
|
peft_config = LoKrConfig(init_weights=False)
|
|
peft_model = get_peft_model(model, peft_config)
|
|
with torch.inference_mode():
|
|
output_peft = peft_model(inputs).logits
|
|
|
|
# sanity check: should be different
|
|
assert not torch.allclose(output_base, output_peft, atol=atol, rtol=rtol)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
peft_model.save_pretrained(tmpdirname)
|
|
del model, peft_model
|
|
|
|
model = transformers_class.from_pretrained(tmpdirname).to(torch_device)
|
|
with torch.inference_mode():
|
|
output_transformers = model(inputs).logits
|
|
self.assertTrue(torch.allclose(output_peft, output_transformers, atol=atol, rtol=rtol))
|
|
|
|
def test_non_lora_add_adapter(self):
|
|
"""
|
|
Check that adding a non-LoRA adapter works. Using LoKr as an example, not testing all possible PEFT methods.
|
|
"""
|
|
from peft import LoKrConfig
|
|
|
|
inputs = torch.randint(0, 100, (1, 10)).to(torch_device)
|
|
atol, rtol = 1e-4, 1e-4
|
|
|
|
for model_id in self.transformers_test_model_ids:
|
|
for transformers_class in self.transformers_test_model_classes:
|
|
model = transformers_class.from_pretrained(model_id).to(torch_device)
|
|
with torch.inference_mode():
|
|
output_base = model(inputs).logits
|
|
|
|
peft_config = LoKrConfig(init_weights=False)
|
|
model.add_adapter(peft_config)
|
|
with torch.inference_mode():
|
|
output_peft = model(inputs).logits
|
|
# should be different
|
|
assert not torch.allclose(output_base, output_peft, atol=atol, rtol=rtol)
|
|
|
|
def test_mixtral_lora_conversion(self):
|
|
if version.parse(importlib.metadata.version("peft")) < version.parse("0.19.0"):
|
|
self.skipTest("For this test to pass, PEFT 0.19 is required.")
|
|
|
|
inputs = torch.arange(10).view(1, -1).to(0)
|
|
model_name = "hf-internal-testing/Mixtral-tiny"
|
|
adapter_name = "peft-internal-testing/mixtral-pre-v5-lora"
|
|
|
|
# original logits were:
|
|
# tensor([[[ 0.2676, 0.3870, 0.2956, ..., 0.4624, 0.1966, 0.2539],
|
|
# [-0.6706, -0.0969, -0.6240, ..., -0.0201, 0.7099, -0.3099],
|
|
# [ 0.0663, 0.1653, 0.7189, ..., 0.5905, 0.0649, 0.5839],
|
|
# ...,
|
|
# [-0.2712, -0.6451, -0.0219, ..., -0.4344, 0.5471, -0.9355],
|
|
# [-0.3607, 0.4526, 0.2750, ..., 0.1082, 0.7179, 0.8487],
|
|
# [ 0.5826, -0.1407, -0.3131, ..., 0.1026, 0.6878, -0.3382]]],
|
|
# device='cuda:0')
|
|
expected_logits_0_to_3 = torch.Tensor(
|
|
[
|
|
[0.2676, 0.3870, 0.2956],
|
|
[-0.6706, -0.0969, -0.6240],
|
|
[0.0663, 0.1653, 0.7189],
|
|
]
|
|
).to(device=torch_device, dtype=torch.float16)
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
model.load_adapter(adapter_name)
|
|
model.to(torch_device)
|
|
model.eval()
|
|
with torch.inference_mode():
|
|
output = model(inputs).logits
|
|
|
|
# a little bit of deviation but that's fine
|
|
atol, rtol = 1e-3, 1e-4
|
|
assert torch.allclose(output[0, :3, :3], expected_logits_0_to_3, atol=atol, rtol=rtol)
|
|
|
|
|
|
@require_peft
|
|
@require_torch
|
|
@slow
|
|
class PeftHotswapIntegrationTest(unittest.TestCase):
|
|
def tearDown(self):
|
|
# It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model,
|
|
# there will be recompilation errors, as torch caches the model when run in the same process.
|
|
torch.compiler.reset()
|
|
gc.collect()
|
|
|
|
def _check_model_hotswap(self, *, rank1, rank2, do_compile):
|
|
# utility method that checks that we can successfully hotswap adapters, with the model outputs corresponding to
|
|
# the respective adapters
|
|
from peft import LoraConfig
|
|
|
|
torch.manual_seed(0)
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
input = torch.randint(0, 100, (1, 10)).to(torch_device)
|
|
with torch.inference_mode():
|
|
base_output = model(input).logits
|
|
|
|
# create 2 adapters
|
|
model.add_adapter(LoraConfig(r=rank1, init_lora_weights=False), adapter_name="adapter_1")
|
|
with torch.inference_mode():
|
|
lora_1_output = model(input).logits
|
|
|
|
# second adapter may have a different rank
|
|
model.add_adapter(LoraConfig(r=rank2, init_lora_weights=False), adapter_name="adapter_2")
|
|
model.set_adapter("adapter_2")
|
|
with torch.inference_mode():
|
|
lora_2_output = model(input).logits
|
|
|
|
# sanity checks
|
|
self.assertFalse(torch.allclose(base_output, lora_1_output, atol=1e-6, rtol=1e-6))
|
|
self.assertFalse(torch.allclose(base_output, lora_2_output, atol=1e-6, rtol=1e-6))
|
|
self.assertFalse(torch.allclose(lora_1_output, lora_2_output, atol=1e-6, rtol=1e-6))
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
path_1 = os.path.join(tmpdirname, "adapter_1")
|
|
path_2 = os.path.join(tmpdirname, "adapter_2")
|
|
model.set_adapter("adapter_1")
|
|
model.save_pretrained(path_1)
|
|
model.set_adapter("adapter_2")
|
|
model.save_pretrained(path_2)
|
|
del model
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
enable_hotswap = do_compile or (rank1 != rank2)
|
|
if enable_hotswap:
|
|
# calling this is only needed if we want to compile the model or if the ranks are different
|
|
model.enable_peft_hotswap(target_rank=max(rank1, rank2))
|
|
|
|
# load the first adapter without hotswap (hotswap requires an existing adapter)
|
|
model.load_adapter(path_1, adapter_name="adapter_1")
|
|
if do_compile:
|
|
# compile the model after loading the first adapter
|
|
model = torch.compile(model, mode="reduce-overhead")
|
|
|
|
with torch.inference_mode():
|
|
lora_1_output_loaded = model(input).logits
|
|
self.assertTrue(torch.allclose(lora_1_output, lora_1_output_loaded, atol=1e-6, rtol=1e-6))
|
|
|
|
# hotswap in adapter_2 again, output should be same as lora_2_output
|
|
if enable_hotswap:
|
|
# after calling enable_peft_hotswap, hotswap will automatically be enabled
|
|
model.load_adapter(path_2, adapter_name="adapter_1")
|
|
else:
|
|
# enable_peft_hotswap was not called, need to explicitly pass hotswap=True
|
|
model.load_adapter(path_2, adapter_name="adapter_1", hotswap=True)
|
|
|
|
with torch.inference_mode():
|
|
lora_2_output_loaded = model(input).logits
|
|
self.assertTrue(torch.allclose(lora_2_output, lora_2_output_loaded, atol=1e-6, rtol=1e-6))
|
|
|
|
def test_hotswap_wrong_peft_type_raises(self):
|
|
# only LoRA is supported for now
|
|
from peft import IA3Config
|
|
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
peft_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
peft_config = IA3Config(feedforward_modules=[])
|
|
model.add_adapter(peft_config, adapter_name="ia3")
|
|
|
|
msg = "Hotswapping is currently only supported for LoRA"
|
|
with self.assertRaisesRegex(ValueError, msg):
|
|
model.load_adapter(peft_id, adapter_name="ia3", hotswap=True)
|
|
|
|
def test_hotswap_without_existing_adapter_raises(self):
|
|
# we can only hotswap if there is already an adapter with the same name
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
peft_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
|
|
msg = "To hotswap an adapter, there must already be an existing adapter with the same adapter name"
|
|
with self.assertRaisesRegex(ValueError, msg):
|
|
model.load_adapter(peft_id, adapter_name="adapter_1", hotswap=True)
|
|
|
|
def test_hotswap_different_adapter_name_raises(self):
|
|
# we can only hotswap if there is already an adapter with the same name
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
peft_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
model.load_adapter(peft_id, adapter_name="adapter_1")
|
|
|
|
other_name = "does_not_exist_yet"
|
|
msg = "To hotswap an adapter, there must already be an existing adapter with the same adapter name"
|
|
with self.assertRaisesRegex(ValueError, msg):
|
|
model.load_adapter(peft_id, adapter_name=other_name, hotswap=True)
|
|
|
|
def test_enable_peft_hotswap_called_after_adapter_added_raises(self):
|
|
# ensure that when enable_peft_hotswap is called *after* loading the first adapter, an error is raised
|
|
from peft import LoraConfig
|
|
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
lora_config = LoraConfig()
|
|
model.add_adapter(lora_config)
|
|
msg = re.escape("Call `enable_peft_hotswap` before loading the first adapter.")
|
|
|
|
with self.assertRaisesRegex(RuntimeError, msg):
|
|
model.enable_peft_hotswap(target_rank=32)
|
|
|
|
def test_enable_peft_hotswap_called_after_adapter_added_warns(self):
|
|
# ensure that when enable_peft_hotswap is called *after* loading the first adapter, there is a warning if
|
|
# check_compiled="warn"
|
|
from peft import LoraConfig
|
|
|
|
logger = logging.get_logger("transformers.integrations.peft")
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
lora_config = LoraConfig()
|
|
model.add_adapter(lora_config)
|
|
msg = "It is recommended to call `enable_peft_hotswap` before loading the first adapter to avoid recompilation"
|
|
|
|
with self.assertLogs(logger=logger, level="WARNING") as cm:
|
|
model.enable_peft_hotswap(target_rank=32, check_compiled="warn")
|
|
assert any(msg in log for log in cm.output)
|
|
|
|
def test_enable_peft_hotswap_called_after_adapter_added_ignored(self):
|
|
# Ensure that when enable_peft_hotswap is called *after* loading the first adapter, there is no error or
|
|
# warning if check_compiled="ignore". Note that assertNoLogs only works with Python 3.10+.
|
|
from peft import LoraConfig
|
|
|
|
logger = logging.get_logger("transformers.integrations.peft")
|
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
|
|
lora_config = LoraConfig()
|
|
model.add_adapter(lora_config)
|
|
|
|
with self.assertNoLogs(logger, level="WARNING"):
|
|
model.enable_peft_hotswap(target_rank=32, check_compiled="ignore")
|
|
|
|
def test_hotswap_without_compile_and_same_ranks_works(self):
|
|
self._check_model_hotswap(rank1=8, rank2=8, do_compile=False)
|
|
|
|
def test_hotswap_without_compile_and_with_lower_rank_works(self):
|
|
self._check_model_hotswap(rank1=13, rank2=7, do_compile=False)
|
|
|
|
def test_hotswap_without_compile_and_with_higher_rank_works(self):
|
|
self._check_model_hotswap(rank1=7, rank2=13, do_compile=False)
|
|
|
|
def test_hotswap_with_compile_and_same_ranks_works(self):
|
|
# It's important to add this context to raise an error on recompilation
|
|
with (
|
|
torch._dynamo.config.patch(error_on_recompile=True),
|
|
torch._inductor.utils.fresh_inductor_cache(),
|
|
):
|
|
self._check_model_hotswap(rank1=8, rank2=8, do_compile=True)
|
|
|
|
def test_hotswap_with_compile_and_lower_rank_works(self):
|
|
# It's important to add this context to raise an error on recompilation
|
|
with (
|
|
torch._dynamo.config.patch(error_on_recompile=True),
|
|
torch._inductor.utils.fresh_inductor_cache(),
|
|
):
|
|
self._check_model_hotswap(rank1=13, rank2=7, do_compile=True)
|
|
|
|
def test_hotswap_with_compile_and_higher_rank_works(self):
|
|
# It's important to add this context to raise an error on recompilation
|
|
with (
|
|
torch._dynamo.config.patch(error_on_recompile=True),
|
|
torch._inductor.utils.fresh_inductor_cache(),
|
|
):
|
|
self._check_model_hotswap(rank1=7, rank2=13, do_compile=True)
|
|
|
|
def test_maybe_load_adapters_path_not_overwritten_for_complete_model(self):
|
|
"""
|
|
Test for issue #43746: Only overwrite the pretrained_model_name_or_path if needed with adapter.
|
|
|
|
This test ensures that when a model has an adapter config and the pretrained_model_name_or_path
|
|
points to a model directory with both a base model and an embedded adapter, the path should NOT
|
|
be overwritten with the base_model_name_or_path from the adapter config.
|
|
|
|
The fix is applied in src/transformers/integrations/peft.py in the maybe_load_adapters function.
|
|
"""
|
|
peft_test_model = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
|
transformers_test_model = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
|
|
|
# Create a temporary directory with a complete adapter model structure
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
tmp_dir = Path(tmp_dir)
|
|
|
|
# Save the model and adapter locally
|
|
config = AutoConfig.from_pretrained(transformers_test_model)
|
|
model = AutoModel.from_pretrained(transformers_test_model)
|
|
adapter_model = AutoModel.from_pretrained(peft_test_model)
|
|
config.save_pretrained(tmp_dir)
|
|
model.save_pretrained(tmp_dir)
|
|
adapter_model.save_pretrained(tmp_dir)
|
|
|
|
# Overwrite the base_model_name_or_path to an invalid value that
|
|
# would cause the load to fail later
|
|
adapter_config_path = tmp_dir / ADAPTER_CONFIG_NAME
|
|
with open(adapter_config_path, "r") as handle:
|
|
adapter_config = json.load(handle)
|
|
adapter_config["base_model_name_or_path"] = "some/model/that/does/not/exist"
|
|
with open(adapter_config_path, "w") as handle:
|
|
json.dump(adapter_config, handle)
|
|
|
|
# Load from the saved path and make sure it actually loads despite
|
|
# the invalid adapter config path
|
|
AutoModel.from_pretrained(tmp_dir)
|