Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
3570 lines
161 KiB
Python
3570 lines
161 KiB
Python
# Copyright 2019 HuggingFace Inc.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
import copy
|
||
import glob
|
||
import json
|
||
import os
|
||
import os.path
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
import textwrap
|
||
import threading
|
||
import unittest
|
||
import unittest.mock as mock
|
||
import uuid
|
||
import warnings
|
||
from pathlib import Path
|
||
from unittest.mock import patch
|
||
|
||
import httpx
|
||
import pytest
|
||
from huggingface_hub import HfApi, snapshot_download, split_torch_state_dict_into_shards
|
||
from parameterized import parameterized
|
||
from pytest import mark
|
||
|
||
from transformers import (
|
||
AutoConfig,
|
||
AutoModel,
|
||
AutoModelForImageClassification,
|
||
AutoModelForSequenceClassification,
|
||
BartConfig,
|
||
BartForConditionalGeneration,
|
||
BartModel,
|
||
CLIPTextModelWithProjection,
|
||
DynamicCache,
|
||
GPT2Config,
|
||
GPT2LMHeadModel,
|
||
LlavaConfig,
|
||
LlavaForConditionalGeneration,
|
||
MistralConfig,
|
||
MistralForCausalLM,
|
||
OPTConfig,
|
||
OPTForCausalLM,
|
||
OwlViTForObjectDetection,
|
||
PreTrainedConfig,
|
||
T5Config,
|
||
T5ForConditionalGeneration,
|
||
is_torch_available,
|
||
logging,
|
||
)
|
||
from transformers.modeling_flash_attention_utils import is_flash_attn_available
|
||
from transformers.models.mistral.modeling_mistral import MistralModel
|
||
from transformers.testing_utils import (
|
||
TOKEN,
|
||
CaptureLogger,
|
||
LoggingLevel,
|
||
TemporaryHubRepo,
|
||
TestCasePlus,
|
||
force_serialization_as_bin_files,
|
||
hub_retry,
|
||
is_staging_test,
|
||
require_accelerate,
|
||
require_non_hpu,
|
||
require_torch,
|
||
require_torch_accelerator,
|
||
require_torch_multi_accelerator,
|
||
slow,
|
||
torch_device,
|
||
)
|
||
from transformers.utils import (
|
||
SAFE_WEIGHTS_INDEX_NAME,
|
||
SAFE_WEIGHTS_NAME,
|
||
WEIGHTS_INDEX_NAME,
|
||
WEIGHTS_NAME,
|
||
)
|
||
from transformers.utils.import_utils import (
|
||
PACKAGE_DISTRIBUTION_MAPPING,
|
||
is_flash_attn_2_available,
|
||
is_flash_attn_3_available,
|
||
is_flash_attn_4_available,
|
||
is_kernels_available,
|
||
is_torch_npu_available,
|
||
)
|
||
|
||
from ..test_modeling_common import compare_state_dicts
|
||
|
||
|
||
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
|
||
|
||
from test_module.custom_configuration import CustomConfig
|
||
|
||
|
||
if is_torch_available():
|
||
import torch
|
||
from safetensors.torch import load_file
|
||
from safetensors.torch import save_file as safe_save_file
|
||
from test_module.custom_modeling import CustomModel
|
||
from torch import nn
|
||
|
||
import transformers.initialization as init
|
||
from transformers import (
|
||
AutoModelForCausalLM,
|
||
AutoTokenizer,
|
||
BertConfig,
|
||
BertModel,
|
||
CLIPTextModel,
|
||
GenerationMixin,
|
||
LlamaConfig,
|
||
LlamaForCausalLM,
|
||
MixtralConfig,
|
||
MixtralModel,
|
||
MusicgenConfig,
|
||
MusicgenForConditionalGeneration,
|
||
PreTrainedModel,
|
||
T5Config,
|
||
T5ForConditionalGeneration,
|
||
)
|
||
from transformers.conversion_mapping import MergeModulelist, WeightConverter, get_model_conversion_mapping
|
||
from transformers.modeling_utils import (
|
||
FLASH_ATTN_KERNEL_FALLBACK,
|
||
_find_disjoint,
|
||
_find_identical,
|
||
get_total_byte_count,
|
||
)
|
||
|
||
# Fake pretrained models for tests
|
||
class BaseModel(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(5, 5)
|
||
self.linear_2 = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class BaseModelWithUnexpectedKeys(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
_keys_to_ignore_on_load_unexpected = [r"^mtp.*"]
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(50, 50)
|
||
self.linear_2 = nn.Linear(50, 50)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class BaseModelWithMissingKeys(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
_keys_to_ignore_on_load_missing = [r"^linear"]
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(50, 50)
|
||
self.linear_2 = nn.Linear(50, 50)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class BaseModelWithTiedWeights(PreTrainedModel):
|
||
config_class = PreTrainedConfig
|
||
_tied_weights_keys = {"linear_2.weight": "linear.weight"}
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(5, 5)
|
||
self.linear_2 = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class BaseModelWithMultipleTiedWeights(PreTrainedModel):
|
||
config_class = PreTrainedConfig
|
||
_tied_weights_keys = {"linear_2.weight": "linear.weight", "linear_3.weight": "linear.weight"}
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(5, 5)
|
||
self.linear_2 = nn.Linear(5, 5)
|
||
self.linear_3 = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class BaseModelWithMultipleMixedTiedWeights(PreTrainedModel):
|
||
config_class = PreTrainedConfig
|
||
# Here the tied keys both refer to `linear.weight`, but they are inconsistent in the mapping, i.e. they
|
||
# are provided as a "circular" dependency
|
||
_tied_weights_keys = {"linear_2.weight": "linear.weight", "linear_3.weight": "linear_2.weight"}
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(5, 5)
|
||
self.linear_2 = nn.Linear(5, 5)
|
||
self.linear_3 = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear_2(self.linear(x))
|
||
|
||
class ModelWithHead(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
|
||
def _init_weights(self, module):
|
||
pass
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.base = BaseModel(config)
|
||
# linear is a common name between Base and Head on purpose.
|
||
self.linear = nn.Linear(5, 5)
|
||
self.linear2 = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear2(self.linear(self.base(x)))
|
||
|
||
class ModelWithDirectParam(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
|
||
def _init_weights(self, module):
|
||
pass
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
# direct params and submodules is helpful for testing offloading logic
|
||
self.weight = nn.Parameter(torch.rand((5, 5)))
|
||
self.base = BaseModel(config)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.base(x @ self.weight.T)
|
||
|
||
class ModelWithDirectParamSubmodule(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
|
||
def _init_weights(self, module):
|
||
pass
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.submodule = ModelWithDirectParam(config)
|
||
# needed so model can have at least one module on accelerator
|
||
self.linear = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear(self.submodule(x))
|
||
|
||
class ModelWithHeadAndTiedWeights(PreTrainedModel):
|
||
base_model_prefix = "base"
|
||
config_class = PreTrainedConfig
|
||
_tied_weights_keys = {"decoder.weight": "base.linear.weight"}
|
||
|
||
def _init_weights(self, module):
|
||
pass
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.base = BaseModel(config)
|
||
self.decoder = nn.Linear(5, 5)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.decoder(self.base(x))
|
||
|
||
class VerySimpleLayer(nn.Module):
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.simple = nn.Linear(2, 2)
|
||
|
||
def forward(self, x):
|
||
return self.simple(x)
|
||
|
||
class DummyLanguageModel(PreTrainedModel):
|
||
_keep_in_fp32_modules = ["linear"]
|
||
_no_split_modules = ["VerySimpleLayer"]
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.linear = nn.Linear(2, 2)
|
||
self.layers = nn.ModuleList((VerySimpleLayer(), VerySimpleLayer()))
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.linear(self.layers[1](self.layers[0](x)))
|
||
|
||
class DummyVisionModel(PreTrainedModel):
|
||
_keep_in_fp32_modules_strict = ["simple"]
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.simple = nn.Linear(2, 2)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.simple(x)
|
||
|
||
class MultimodalModel(PreTrainedModel):
|
||
_keep_in_fp32_modules = ["head"]
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.language_model = DummyLanguageModel(config)
|
||
self.vision_model = DummyVisionModel(config)
|
||
self.head = nn.Linear(2, 2)
|
||
self.post_init()
|
||
|
||
def forward(self, x):
|
||
return self.head(self.language_model(self.vision_model(x)))
|
||
|
||
class TestOffline(unittest.TestCase):
|
||
def test_offline(self):
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmpdir}):
|
||
# First offline load should fail
|
||
with patch("huggingface_hub.constants.HF_HUB_OFFLINE", True):
|
||
with pytest.raises(OSError):
|
||
AutoModelForImageClassification.from_pretrained(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
|
||
|
||
# Enable online mode for download
|
||
with patch("huggingface_hub.constants.HF_HUB_OFFLINE", False):
|
||
snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
|
||
|
||
# Load again in offline mode - should work now
|
||
with patch("huggingface_hub.constants.HF_HUB_OFFLINE", True):
|
||
AutoModelForImageClassification.from_pretrained(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
|
||
|
||
def test_local_files_only(self):
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmpdir}):
|
||
# Empty cache => fail to load from cache
|
||
with pytest.raises(OSError):
|
||
AutoModelForImageClassification.from_pretrained(
|
||
TINY_IMAGE_CLASSIF, cache_dir=tmpdir, local_files_only=True
|
||
)
|
||
|
||
# Populate cache
|
||
snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
|
||
|
||
# Load again from cache => success
|
||
AutoModelForImageClassification.from_pretrained(
|
||
TINY_IMAGE_CLASSIF, cache_dir=tmpdir, local_files_only=True
|
||
)
|
||
|
||
|
||
# Need to be serializable, which means they cannot be in a test class method
|
||
class TestGammaBetaNorm(torch.nn.Module):
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.gamma = torch.nn.Parameter(torch.ones(1))
|
||
self.beta = torch.nn.Parameter(torch.zeros(1))
|
||
|
||
def forward(self):
|
||
return self.gamma.sum() + self.beta.sum()
|
||
|
||
|
||
class TestModelGammaBeta(PreTrainedModel):
|
||
def __init__(self, config):
|
||
super().__init__(config)
|
||
self.LayerNorm = TestGammaBetaNorm()
|
||
self.post_init()
|
||
|
||
def forward(self):
|
||
return self.LayerNorm()
|
||
|
||
|
||
TINY_T5 = "patrickvonplaten/t5-tiny-random"
|
||
TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification"
|
||
TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM"
|
||
TINY_IMAGE_CLASSIF = "hf-internal-testing/tiny-random-SiglipForImageClassification"
|
||
TINY_LLAVA = "hf-internal-testing/tiny-random-LlavaForConditionalGeneration"
|
||
|
||
LOG = logging.get_logger(__name__)
|
||
|
||
|
||
def check_models_equal(model1, model2):
|
||
models_are_equal = True
|
||
for model1_p, model2_p in zip(model1.parameters(), model2.parameters()):
|
||
if model1_p.data.ne(model2_p.data).sum() > 0:
|
||
models_are_equal = False
|
||
|
||
return models_are_equal
|
||
|
||
|
||
@require_torch
|
||
class ModelUtilsTest(TestCasePlus):
|
||
def setUp(self):
|
||
self.old_dtype = torch.get_default_dtype()
|
||
super().setUp()
|
||
|
||
def tearDown(self):
|
||
torch.set_default_dtype(self.old_dtype)
|
||
super().tearDown()
|
||
|
||
@require_torch
|
||
def test_get_total_byte_count_does_not_require_process_group(self):
|
||
model = BaseModel(PreTrainedConfig())
|
||
model._tp_plan = {"linear.weight": "rowwise"}
|
||
accelerator_device_map = {"linear.weight": torch.device("cpu")}
|
||
|
||
with (
|
||
patch("transformers.modeling_utils.torch.distributed.is_available", return_value=True),
|
||
patch("transformers.modeling_utils.torch.distributed.is_initialized", return_value=False),
|
||
patch("transformers.modeling_utils.torch.distributed.get_world_size") as mock_world_size,
|
||
):
|
||
total_byte_count = get_total_byte_count(model, accelerator_device_map, None)
|
||
|
||
mock_world_size.assert_not_called()
|
||
self.assertIn(torch.device("cpu"), total_byte_count)
|
||
self.assertGreater(total_byte_count[torch.device("cpu")], 0)
|
||
|
||
def test_hub_retry(self):
|
||
@hub_retry(max_attempts=2)
|
||
def test_func():
|
||
# First attempt will fail with a connection error
|
||
if not hasattr(test_func, "attempt"):
|
||
test_func.attempt = 1
|
||
raise httpx.ConnectError("Connection failed")
|
||
# Second attempt will succeed
|
||
return True
|
||
|
||
self.assertTrue(test_func())
|
||
|
||
@slow
|
||
def test_model_from_pretrained(self):
|
||
model_name = "google-bert/bert-base-uncased"
|
||
config = BertConfig.from_pretrained(model_name)
|
||
self.assertIsNotNone(config)
|
||
self.assertIsInstance(config, PreTrainedConfig)
|
||
|
||
model = BertModel.from_pretrained(model_name)
|
||
model, loading_info = BertModel.from_pretrained(model_name, output_loading_info=True)
|
||
self.assertIsNotNone(model)
|
||
self.assertIsInstance(model, PreTrainedModel)
|
||
|
||
self.assertEqual(len(loading_info["missing_keys"]), 0)
|
||
self.assertEqual(len(loading_info["unexpected_keys"]), 8)
|
||
self.assertEqual(len(loading_info["mismatched_keys"]), 0)
|
||
self.assertEqual(len(loading_info["error_msgs"]), 0)
|
||
|
||
config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
|
||
|
||
# Not sure this is the intended behavior. TODO fix Lysandre & Thom
|
||
config.name_or_path = model_name
|
||
|
||
model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
|
||
self.assertEqual(model.config.output_hidden_states, True)
|
||
self.assertEqual(model.config, config)
|
||
|
||
def test_model_from_pretrained_subfolder(self):
|
||
config = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
model = BertModel(config)
|
||
|
||
subfolder = "bert"
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(os.path.join(tmp_dir, subfolder))
|
||
|
||
with self.assertRaises(OSError):
|
||
_ = BertModel.from_pretrained(tmp_dir)
|
||
|
||
model_loaded = BertModel.from_pretrained(tmp_dir, subfolder=subfolder)
|
||
|
||
self.assertTrue(check_models_equal(model, model_loaded))
|
||
|
||
def test_model_manually_shared_disjointed_tensors_optimum(self):
|
||
config = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
model = BertModel(config)
|
||
|
||
# Let's fuse qkv
|
||
attn = model.encoder.layer[0].attention.self
|
||
q = attn.query.weight
|
||
k = attn.key.weight
|
||
v = attn.value.weight
|
||
# Force some shared storage
|
||
qkv = torch.stack([q, k, v], dim=0)
|
||
attn.query.weight = torch.nn.Parameter(qkv[0])
|
||
attn.key.weight = torch.nn.Parameter(qkv[1])
|
||
attn.value.weight = torch.nn.Parameter(qkv[2])
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
model_loaded = BertModel.from_pretrained(tmp_dir)
|
||
|
||
self.assertTrue(check_models_equal(model, model_loaded))
|
||
|
||
def test_model_from_pretrained_subfolder_sharded(self):
|
||
config = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
model = BertModel(config)
|
||
|
||
subfolder = "bert"
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(os.path.join(tmp_dir, subfolder), max_shard_size="10KB")
|
||
|
||
with self.assertRaises(OSError):
|
||
_ = BertModel.from_pretrained(tmp_dir)
|
||
|
||
model_loaded = BertModel.from_pretrained(tmp_dir, subfolder=subfolder)
|
||
|
||
self.assertTrue(check_models_equal(model, model_loaded))
|
||
|
||
def test_model_from_pretrained_hub_subfolder(self):
|
||
subfolder = "bert"
|
||
model_id = "hf-internal-testing/tiny-random-bert-subfolder"
|
||
with self.assertRaises(OSError):
|
||
_ = BertModel.from_pretrained(model_id)
|
||
|
||
model = BertModel.from_pretrained(model_id, subfolder=subfolder)
|
||
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_model_from_pretrained_with_different_pretrained_model_name(self):
|
||
model = T5ForConditionalGeneration.from_pretrained(TINY_T5)
|
||
self.assertIsNotNone(model)
|
||
|
||
logger = logging.get_logger("transformers.configuration_utils")
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
BertModel.from_pretrained(TINY_T5)
|
||
self.assertTrue(
|
||
"You are using a model of type `t5` to instantiate a model of type `bert`. "
|
||
"This may be expected if you are loading a checkpoint that shares a subset" in cl.out
|
||
)
|
||
|
||
@require_accelerate
|
||
def test_model_from_pretrained_with_none_quantization_config(self):
|
||
# Needs a device_map for to enter the low_cpu_mem branch. We also load AutoModelForSequenceClassification
|
||
# deliberately to enter the missing keys branch.
|
||
model = AutoModelForSequenceClassification.from_pretrained(
|
||
TINY_MISTRAL, device_map="auto", quantization_config=None
|
||
)
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_model_from_config_dtype(self):
|
||
# test that the model can be instantiated with dtype of user's choice - as long as it's a
|
||
# float dtype. To make it happen config.dtype needs to be set before instantiating the
|
||
# model from the config object.
|
||
|
||
config = T5Config.from_pretrained(TINY_T5)
|
||
model = AutoModel.from_config(config)
|
||
# XXX: isn't supported
|
||
# model = T5ForConditionalGeneration.from_config(config)
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
|
||
model = AutoModel.from_config(config, dtype=torch.float16)
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# torch.set_default_dtype() supports only float dtypes, so will fail with non-float type
|
||
with self.assertRaises(ValueError):
|
||
model = AutoModel.from_config(config, dtype=torch.int64)
|
||
|
||
def test_model_from_config_dtype_str(self):
|
||
# test that from_pretrained works with dtype being strings like "float32" for PyTorch backend
|
||
model = AutoModel.from_pretrained(TINY_T5, dtype="float32")
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
model = AutoModel.from_pretrained(TINY_T5, dtype="float16")
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# torch.set_default_dtype() supports only float dtypes, so will fail with non-float type
|
||
with self.assertRaises(ValueError):
|
||
model = AutoModel.from_pretrained(TINY_T5, dtype="int64")
|
||
|
||
def test_model_from_config_dtype_composite(self):
|
||
"""
|
||
Test that from_pretrained works with dtype being as a dict per each sub-config in composite config
|
||
Tiny-Llava has saved auto dtype as `torch.float32` for all modules.
|
||
Note, this is a deprecated feature and we fallback to main dtype in all cases below. This test checks
|
||
if the dtype fallback works correctly.
|
||
"""
|
||
# Load without dtype specified
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA)
|
||
self.assertEqual(model.model.language_model.dtype, torch.float32)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float32)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# should be able to set dtype as a simple string and the model loads it correctly
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA, dtype="float32")
|
||
self.assertEqual(model.model.language_model.dtype, torch.float32)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float32)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA, dtype=torch.float16)
|
||
self.assertEqual(model.model.language_model.dtype, torch.float16)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float16)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# should be able to accept dtype as a dict for each sub-config
|
||
model = LlavaForConditionalGeneration.from_pretrained(
|
||
TINY_LLAVA, dtype={"text_config": "float32", "vision_config": "float16", "": "bfloat16"}
|
||
)
|
||
self.assertEqual(model.model.language_model.dtype, torch.bfloat16)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.bfloat16)
|
||
self.assertEqual(model.model.multi_modal_projector.linear_1.weight.dtype, torch.bfloat16)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# should be able to accept the values as torch.dtype (not str)
|
||
model = LlavaForConditionalGeneration.from_pretrained(
|
||
TINY_LLAVA, dtype={"text_config": torch.float32, "vision_config": torch.float16, "": torch.bfloat16}
|
||
)
|
||
self.assertEqual(model.model.language_model.dtype, torch.bfloat16)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.bfloat16)
|
||
self.assertEqual(model.model.multi_modal_projector.linear_1.weight.dtype, torch.bfloat16)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# should be able to accept the values in configs directly and pass it to `from_pretrained`
|
||
config = copy.deepcopy(model.config)
|
||
config.text_config.dtype = torch.float32
|
||
config.vision_config.dtype = torch.bfloat16
|
||
config.dtype = torch.float16
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA, config=config, dtype="auto")
|
||
self.assertEqual(model.model.language_model.dtype, torch.float16)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float16)
|
||
self.assertEqual(model.model.multi_modal_projector.linear_1.weight.dtype, torch.float16)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# but if the model has `_keep_in_fp32_modules` then those modules should be in fp32 no matter what
|
||
LlavaForConditionalGeneration._keep_in_fp32_modules = ["multi_modal_projector"]
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA, config=config, dtype="auto")
|
||
self.assertEqual(
|
||
model.model.language_model.dtype, torch.float16
|
||
) # remember config says float32 for text_config
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float16)
|
||
self.assertEqual(model.model.multi_modal_projector.linear_1.weight.dtype, torch.float32)
|
||
self.assertIsInstance(model.config.dtype, torch.dtype)
|
||
|
||
# torch.set_default_dtype() supports only float dtypes, so will fail with non-float type
|
||
with self.assertRaises(ValueError):
|
||
model = LlavaForConditionalGeneration.from_pretrained(TINY_LLAVA, dtype="int64")
|
||
model = LlavaForConditionalGeneration.from_pretrained(
|
||
TINY_LLAVA, dtype={"text_config": "float32", "vision_config": "int64", "": "float16"}
|
||
)
|
||
|
||
# Check that `from_config` also works and uses the same dtype for all modules
|
||
config = AutoConfig.from_pretrained(TINY_LLAVA)
|
||
config.text_config.dtype = torch.float16
|
||
config.dtype = torch.float32
|
||
model = LlavaForConditionalGeneration._from_config(config)
|
||
self.assertEqual(model.model.language_model.dtype, torch.float32)
|
||
self.assertEqual(model.model.vision_tower.dtype, torch.float32)
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
|
||
def test_model_from_pretrained_dtype(self):
|
||
# test that the model can be instantiated with dtype of either
|
||
# 1. explicit from_pretrained's dtype argument
|
||
# 2. via autodiscovery by looking at model weights (dtype="auto")
|
||
# so if a model.half() was saved, we want it to be instantiated as such.
|
||
#
|
||
# test an explicit model class, but also AutoModel separately as the latter goes through a different code path
|
||
model_path = self.get_auto_remove_tmp_dir()
|
||
|
||
# baseline - we know TINY_T5 is fp32 model
|
||
model = T5ForConditionalGeneration.from_pretrained(TINY_T5)
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
|
||
def remove_dtype(model_path):
|
||
file = f"{model_path}/config.json"
|
||
with open(file, encoding="utf-8") as f:
|
||
s = json.load(f)
|
||
s.pop("dtype")
|
||
with open(file, "w", encoding="utf-8") as f:
|
||
json.dump(s, f)
|
||
|
||
# test the default fp32 save_pretrained => from_pretrained cycle
|
||
model.save_pretrained(model_path)
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path)
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
# 1. test dtype="auto" via `config.dtype`
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype="auto")
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
# 2. test dtype="auto" via auto-derivation
|
||
# now remove the dtype entry from config.json and try "auto" again which should
|
||
# perform auto-derivation from weights
|
||
remove_dtype(model_path)
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype="auto")
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
|
||
# test forced loading in fp16 (even though the weights are in fp32)
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype=torch.float16)
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# test fp16 save_pretrained, loaded with auto-detection
|
||
model = model.half()
|
||
model.save_pretrained(model_path)
|
||
# 1. test dtype="auto" via `config.dtype`
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype="auto")
|
||
self.assertEqual(model.config.dtype, torch.float16)
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
# tests `config.dtype` saving
|
||
with open(f"{model_path}/config.json") as f:
|
||
config_dict = json.load(f)
|
||
self.assertEqual(config_dict["dtype"], "float16")
|
||
# 2. test dtype="auto" via auto-derivation
|
||
# now same with using config info
|
||
remove_dtype(model_path)
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype="auto")
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# 3. now retest that AutoModel behaves the same wrt dtype="auto" as T5ForConditionalGeneration
|
||
model = AutoModel.from_pretrained(model_path, dtype="auto")
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# test fp16 save_pretrained, loaded with the explicit fp16
|
||
model = T5ForConditionalGeneration.from_pretrained(model_path, dtype=torch.float16)
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# test AutoModel separately as it goes through a different path
|
||
# test auto-detection - as currently TINY_T5 doesn't have dtype entry
|
||
model = AutoModel.from_pretrained(TINY_T5, dtype="auto")
|
||
# test that the config object didn't get polluted with dtype="auto"
|
||
# there was a bug that after this call we ended up with config.dtype=="auto"
|
||
self.assertNotEqual(model.config.dtype, "auto")
|
||
# now test the outcome
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
model = AutoModel.from_pretrained(TINY_T5, dtype=torch.float16)
|
||
self.assertEqual(model.dtype, torch.float16)
|
||
|
||
# test model whose first param is not of a floating type, but int
|
||
model = AutoModel.from_pretrained(TINY_BERT_FOR_TOKEN_CLASSIFICATION, dtype="auto")
|
||
self.assertEqual(model.dtype, torch.float32)
|
||
|
||
# test model that init the model with _from_config
|
||
model = CLIPTextModelWithProjection.from_pretrained(
|
||
"hf-internal-testing/diffusers-stable-diffusion-tiny-all",
|
||
subfolder="text_encoder",
|
||
dtype=torch.bfloat16,
|
||
)
|
||
self.assertEqual(model.dtype, torch.bfloat16)
|
||
|
||
def test_model_from_pretrained_attn_implementation(self):
|
||
# test that the model can be instantiated with attn_implementation of either
|
||
# 1. explicit from_pretrained's attn_implementation argument
|
||
# 2. explicit from_pretrained's attn_implementation argument with a config argument
|
||
attn_implementation_available = ["eager", "sdpa"]
|
||
|
||
if is_flash_attn_available():
|
||
attn_implementation_available.append("flash_attention_2")
|
||
|
||
if is_flash_attn_3_available():
|
||
attn_implementation_available.append("flash_attention_3")
|
||
|
||
if is_flash_attn_4_available():
|
||
attn_implementation_available.append("flash_attention_4")
|
||
|
||
for requested_attn_implementation in attn_implementation_available:
|
||
model = AutoModelForCausalLM.from_pretrained(
|
||
TINY_MISTRAL, attn_implementation=requested_attn_implementation
|
||
)
|
||
self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
|
||
|
||
config = AutoConfig.from_pretrained(TINY_MISTRAL)
|
||
model = AutoModelForCausalLM.from_pretrained(
|
||
TINY_MISTRAL, config=config, attn_implementation=requested_attn_implementation
|
||
)
|
||
self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
|
||
|
||
def test_model_from_config_attn_implementation(self):
|
||
# test that the model can be instantiated with attn_implementation of either
|
||
# 1. config created with explicit attn_implementation and from_config
|
||
# 2. explicit from_config's attn_implementation argument with a config argument
|
||
# 3. config created with explicit attn_implementation and from_config overriding with explicit attn_implementation argument
|
||
attn_implementation_available = ["eager", "sdpa"]
|
||
|
||
if is_flash_attn_available():
|
||
attn_implementation_available.append("flash_attention_2")
|
||
|
||
if is_flash_attn_3_available():
|
||
attn_implementation_available.append("flash_attention_3")
|
||
|
||
if is_flash_attn_4_available():
|
||
attn_implementation_available.append("flash_attention_4")
|
||
|
||
for requested_attn_implementation in attn_implementation_available:
|
||
config = AutoConfig.from_pretrained(TINY_MISTRAL, attn_implementation=requested_attn_implementation)
|
||
# Ensure the config was set correctly
|
||
self.assertEqual(config._attn_implementation, requested_attn_implementation)
|
||
model = AutoModelForCausalLM.from_config(config)
|
||
self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
|
||
|
||
config = AutoConfig.from_pretrained(TINY_MISTRAL)
|
||
# When the config is not set, the default is "eager"
|
||
self.assertEqual(config._attn_implementation, None)
|
||
model = AutoModelForCausalLM.from_config(config=config, attn_implementation=requested_attn_implementation)
|
||
self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
|
||
|
||
# Set a nonsense attn_implementation in the config, which should be overridden by the explicit argument
|
||
config = AutoConfig.from_pretrained(TINY_MISTRAL, attn_implementation="foo-bar-baz")
|
||
self.assertEqual(config._attn_implementation, "foo-bar-baz")
|
||
model = AutoModelForCausalLM.from_config(config=config, attn_implementation=requested_attn_implementation)
|
||
self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
|
||
|
||
def test_checkpoint_sharding_local(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# We use the same folder for various sizes to make sure a new save erases the old checkpoint.
|
||
for max_size in ["50kB", "100kB", "200kB"]:
|
||
model.save_pretrained(tmp_dir, max_shard_size=max_size)
|
||
|
||
# Get each shard file and its size
|
||
shard_to_size = {}
|
||
for shard in os.listdir(tmp_dir):
|
||
if shard.endswith(".safetensors"):
|
||
shard_file = os.path.join(tmp_dir, shard)
|
||
shard_to_size[shard_file] = os.path.getsize(shard_file)
|
||
|
||
index_file = os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)
|
||
# Check there is an index but no regular weight file
|
||
self.assertTrue(os.path.isfile(index_file))
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_NAME)))
|
||
|
||
# Check a file is bigger than max_size only when it has a single weight
|
||
for shard_file, size in shard_to_size.items():
|
||
max_size_int = int(max_size[:-2]) * 10**3
|
||
# Note: the file can end up being slightly bigger than the size asked for (since we count parameters)
|
||
if size >= max_size_int + 50000:
|
||
state_dict = load_file(shard_file)
|
||
self.assertEqual(len(state_dict), 1)
|
||
|
||
# Check the index and the shard files found match
|
||
with open(index_file, encoding="utf-8") as f:
|
||
index = json.loads(f.read())
|
||
|
||
all_shards = set(index["weight_map"].values())
|
||
shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".safetensors")}
|
||
self.assertSetEqual(all_shards, shards_found)
|
||
|
||
# Finally, check the model can be reloaded
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_sharding_from_hub(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-sharded")
|
||
# the model above is the same as the model below, just a sharded version.
|
||
ref_model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
for p1, p2 in zip(model.parameters(), ref_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_variant_local(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, variant="v2")
|
||
|
||
weights_name = ".".join(SAFE_WEIGHTS_NAME.split(".")[:-1] + ["v2"] + ["safetensors"])
|
||
|
||
weights_file = os.path.join(tmp_dir, weights_name)
|
||
self.assertTrue(os.path.isfile(weights_file))
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_NAME)))
|
||
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained(tmp_dir)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_dir, variant="v2")
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_variant_local_sharded(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, variant="v2", max_shard_size="50kB")
|
||
|
||
weights_index_name = ".".join(SAFE_WEIGHTS_INDEX_NAME.split(".")[:-1] + ["v2"] + ["json"])
|
||
weights_index_file = os.path.join(tmp_dir, weights_index_name)
|
||
self.assertTrue(os.path.isfile(weights_index_file))
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||
|
||
for i in range(1, 5):
|
||
weights_name = ".".join(SAFE_WEIGHTS_NAME.split(".")[:-1] + [f"v2-0000{i}-of-00005"] + ["safetensors"])
|
||
weights_name_file = os.path.join(tmp_dir, weights_name)
|
||
self.assertTrue(os.path.isfile(weights_name_file))
|
||
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained(tmp_dir)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_dir, variant="v2")
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_loading_only_safetensors_available(self):
|
||
# Test that the loading behaviour is as expected when only safetensor checkpoints are available
|
||
# - We can load the model with use_safetensors=True
|
||
# - We can load the model without specifying use_safetensors i.e. we search for the available checkpoint,
|
||
# preferring safetensors
|
||
# - We cannot load the model with use_safetensors=False
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, max_shard_size="50kB")
|
||
|
||
weights_index_name = ".".join(SAFE_WEIGHTS_INDEX_NAME.split(".")[:-1] + ["json"])
|
||
weights_index_file = os.path.join(tmp_dir, weights_index_name)
|
||
self.assertTrue(os.path.isfile(weights_index_file))
|
||
|
||
for i in range(1, 5):
|
||
weights_name = f"model-0000{i}-of-00005" + ".safetensors"
|
||
weights_name_file = os.path.join(tmp_dir, weights_name)
|
||
self.assertTrue(os.path.isfile(weights_name_file))
|
||
|
||
# Setting use_safetensors=False should raise an error as the checkpoint was saved in safetensors
|
||
with self.assertRaises(OSError):
|
||
_ = BertModel.from_pretrained(tmp_dir, use_safetensors=False)
|
||
|
||
# We can load the model with use_safetensors=True
|
||
new_model = BertModel.from_pretrained(tmp_dir, use_safetensors=True)
|
||
|
||
# We can load the model without specifying use_safetensors
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_loading_only_pytorch_bin_available(self):
|
||
# Test that the loading behaviour is as expected when only pytorch checkpoints are available
|
||
# - We can load the model with use_safetensors=False
|
||
# - We can load the model without specifying use_safetensors i.e. we search for the available checkpoint,
|
||
# preferring safetensors but falling back to pytorch
|
||
# - We cannot load the model with use_safetensors=True
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
model.save_pretrained(tmp_dir, max_shard_size="50kB")
|
||
|
||
weights_index_file = os.path.join(tmp_dir, WEIGHTS_INDEX_NAME)
|
||
self.assertTrue(os.path.isfile(weights_index_file))
|
||
|
||
for i in range(1, 5):
|
||
weights_name = WEIGHTS_NAME.split(".")[0].split("_")[0] + f"_model-0000{i}-of-00005" + ".bin"
|
||
weights_name_file = os.path.join(tmp_dir, weights_name)
|
||
self.assertTrue(os.path.isfile(weights_name_file))
|
||
|
||
# Setting use_safetensors=True should raise an error as the checkpoint was saved with safetensors=False
|
||
with self.assertRaises(OSError):
|
||
_ = BertModel.from_pretrained(tmp_dir, use_safetensors=True)
|
||
|
||
# We can load the model with use_safetensors=False
|
||
_ = BertModel.from_pretrained(tmp_dir, use_safetensors=False)
|
||
|
||
# We can load the model without specifying use_safetensors
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_checkpoint_variant_hub(self):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-variant", cache_dir=tmp_dir)
|
||
model = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant",
|
||
cache_dir=tmp_dir,
|
||
variant="v2",
|
||
use_safetensors=False,
|
||
)
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_checkpoint_variant_hub_sharded(self):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-sharded", cache_dir=tmp_dir
|
||
)
|
||
model = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-sharded",
|
||
cache_dir=tmp_dir,
|
||
variant="v2",
|
||
use_safetensors=False,
|
||
)
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_checkpoint_variant_hub_safe(self):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-safe", cache_dir=tmp_dir
|
||
)
|
||
model = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-safe", cache_dir=tmp_dir, variant="v2"
|
||
)
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_checkpoint_variant_hub_sharded_safe(self):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
with self.assertRaises(EnvironmentError):
|
||
_ = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-sharded-safe", cache_dir=tmp_dir
|
||
)
|
||
model = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant-sharded-safe", cache_dir=tmp_dir, variant="v2"
|
||
)
|
||
self.assertIsNotNone(model)
|
||
|
||
def test_checkpoint_variant_save_load(self):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
model = BertModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-bert-variant",
|
||
cache_dir=tmp_dir,
|
||
variant="v2",
|
||
use_safetensors=False,
|
||
)
|
||
weights_name = ".".join(SAFE_WEIGHTS_NAME.split(".")[:-1] + ["v2"] + ["safetensors"])
|
||
|
||
model.save_pretrained(tmp_dir, variant="v2")
|
||
# saving will create a variant checkpoint
|
||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, weights_name)))
|
||
|
||
model.save_pretrained(tmp_dir)
|
||
# saving shouldn't delete variant checkpoints
|
||
weights_name = ".".join(SAFE_WEIGHTS_NAME.split(".")[:-1] + ["v2"] + ["safetensors"])
|
||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, weights_name)))
|
||
|
||
# there should be a normal checkpoint
|
||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_NAME)))
|
||
|
||
self.assertIsNotNone(model)
|
||
|
||
@require_non_hpu
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_multi_accelerator
|
||
@slow
|
||
def test_model_parallelism_gpt2(self):
|
||
device_map = {"transformer.wte": 0, "transformer.wpe": 0, "lm_head": 0, "transformer.ln_f": 1}
|
||
for i in range(12):
|
||
device_map[f"transformer.h.{i}"] = 0 if i <= 5 else 1
|
||
|
||
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2", device_map=device_map)
|
||
|
||
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
||
inputs = tokenizer("Hello, my name is", return_tensors="pt")
|
||
output = model.generate(inputs["input_ids"].to(f"{torch_device}:0"))
|
||
|
||
text_output = tokenizer.decode(output[0].tolist())
|
||
self.assertEqual(text_output, "Hello, my name is John. I'm a writer, and I'm a writer. I'm")
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_accelerator
|
||
def test_from_pretrained_disk_offload_task_model(self):
|
||
model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||
device_map = {
|
||
"transformer.wte": f"{torch_device}:0",
|
||
"transformer.wpe": f"{torch_device}:0",
|
||
"transformer.h.0": "cpu",
|
||
"transformer.h.1": "cpu",
|
||
"transformer.h.2": "cpu",
|
||
"transformer.h.3": "disk",
|
||
"transformer.h.4": "disk",
|
||
"transformer.ln_f": f"{torch_device}:0",
|
||
"lm_head": f"{torch_device}:0",
|
||
}
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
inputs = torch.tensor([[1, 2, 3]]).to(f"{torch_device}:0")
|
||
|
||
model.save_pretrained(tmp_dir)
|
||
new_model = AutoModelForCausalLM.from_pretrained(tmp_dir).to(f"{torch_device}:0")
|
||
outputs1 = new_model.to(f"{torch_device}:0")(inputs)
|
||
|
||
offload_folder = os.path.join(tmp_dir, "offload")
|
||
new_model_with_offload = AutoModelForCausalLM.from_pretrained(
|
||
tmp_dir, device_map=device_map, offload_folder=offload_folder
|
||
)
|
||
outputs2 = new_model_with_offload(inputs)
|
||
|
||
torch.testing.assert_close(outputs1.logits.cpu(), outputs2.logits.cpu())
|
||
|
||
# With state dict temp offload
|
||
new_model_with_offload = AutoModelForCausalLM.from_pretrained(
|
||
tmp_dir,
|
||
device_map=device_map,
|
||
offload_folder=offload_folder,
|
||
offload_state_dict=True,
|
||
)
|
||
outputs2 = new_model_with_offload(inputs)
|
||
torch.testing.assert_close(outputs1.logits.cpu(), outputs2.logits.cpu())
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_accelerator
|
||
def test_from_pretrained_disk_offload_derived_to_base_model(self):
|
||
derived_model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||
|
||
device_map = {
|
||
"wte": f"{torch_device}:0",
|
||
"wpe": f"{torch_device}:0",
|
||
"h.0": "cpu",
|
||
"h.1": "cpu",
|
||
"h.2": "cpu",
|
||
"h.3": "disk",
|
||
"h.4": "disk",
|
||
"ln_f": f"{torch_device}:0",
|
||
}
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
inputs = torch.tensor([[1, 2, 3]]).to(f"{torch_device}:0")
|
||
derived_model.save_pretrained(tmp_dir, use_safetensors=True)
|
||
base_model = AutoModel.from_pretrained(tmp_dir)
|
||
outputs1 = base_model.to(f"{torch_device}:0")(inputs)
|
||
|
||
# with disk offload
|
||
offload_folder = os.path.join(tmp_dir, "offload")
|
||
base_model_with_offload = AutoModel.from_pretrained(
|
||
tmp_dir, device_map=device_map, offload_folder=offload_folder
|
||
)
|
||
outputs2 = base_model_with_offload(inputs)
|
||
torch.testing.assert_close(outputs1[0].cpu(), outputs2[0].cpu())
|
||
|
||
# With state dict temp offload
|
||
new_model_with_offload = AutoModel.from_pretrained(
|
||
tmp_dir,
|
||
device_map=device_map,
|
||
offload_folder=offload_folder,
|
||
offload_state_dict=True,
|
||
)
|
||
outputs2 = new_model_with_offload(inputs)
|
||
torch.testing.assert_close(outputs1[0].cpu(), outputs2[0].cpu())
|
||
|
||
@slow
|
||
@require_torch
|
||
def test_from_pretrained_non_contiguous_checkpoint(self):
|
||
# See: https://github.com/huggingface/transformers/pull/28414
|
||
# Tiny models on the Hub have contiguous weights, contrarily to google/owlvit
|
||
model = OwlViTForObjectDetection.from_pretrained("fxmarty/owlvit-tiny-non-contiguous-weight")
|
||
self.assertTrue(model.owlvit.visual_projection.weight.is_contiguous())
|
||
|
||
model = OwlViTForObjectDetection.from_pretrained(
|
||
"fxmarty/owlvit-tiny-non-contiguous-weight", device_map="auto"
|
||
)
|
||
self.assertTrue(model.owlvit.visual_projection.weight.is_contiguous())
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
def test_cached_files_are_used_when_internet_is_down(self):
|
||
# A mock response for an HTTP head request to emulate server down
|
||
response_mock = mock.Mock()
|
||
response_mock.status_code = 500
|
||
response_mock.headers = {}
|
||
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
|
||
"failed", request=mock.Mock(), response=mock.Mock()
|
||
)
|
||
response_mock.json.return_value = {}
|
||
|
||
# Download this model to make sure it's in the cache.
|
||
_ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
# Under the mock environment we get a 500 error when trying to reach the model.
|
||
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
|
||
_ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
# This check we did call the fake head request
|
||
mock_head.assert_called()
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
def test_save_model_with_device_map_cpu(self):
|
||
model_id = "hf-internal-testing/tiny-random-gpt2"
|
||
inputs = torch.tensor([[1, 2, 3]])
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
|
||
output = model(inputs)[0]
|
||
model.save_pretrained(
|
||
tmp_dir, max_shard_size="200KB"
|
||
) # model is 1.6MB, max shard size is allocated to cpu by default
|
||
saved_model = AutoModelForCausalLM.from_pretrained(tmp_dir, device_map="cpu")
|
||
saved_model_output = saved_model(inputs)[0]
|
||
|
||
torch.testing.assert_close(output, saved_model_output)
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_accelerator
|
||
def test_save_offloaded_model(self):
|
||
device_map = {
|
||
"transformer.wte": f"{torch_device}:0",
|
||
"transformer.wpe": f"{torch_device}:0",
|
||
"transformer.h.0": "cpu",
|
||
"transformer.h.1": "cpu",
|
||
"transformer.h.2": "cpu",
|
||
"transformer.h.3": "disk",
|
||
"transformer.h.4": "disk",
|
||
"transformer.ln_f": f"{torch_device}:0",
|
||
"lm_head": f"{torch_device}:0",
|
||
}
|
||
|
||
# check_models_equal requires onloaded tensors
|
||
model_id = "hf-internal-testing/tiny-random-gpt2"
|
||
onloaded_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu").to(f"{torch_device}:0")
|
||
inputs = torch.tensor([[1, 2, 3]]).to(f"{torch_device}:0")
|
||
output = onloaded_model(inputs)[0]
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
offload_folder = os.path.join(tmp_dir, "offload")
|
||
offloaded_model = AutoModelForCausalLM.from_pretrained(
|
||
model_id, device_map=device_map, offload_folder=offload_folder
|
||
)
|
||
presaved_output = offloaded_model(inputs)[0]
|
||
offloaded_model.save_pretrained(
|
||
tmp_dir, max_shard_size="200KB"
|
||
) # model is 1.6MB, max shard size is allocated to cpu by default
|
||
saved_model = AutoModelForCausalLM.from_pretrained(tmp_dir, device_map=device_map)
|
||
postsaved_output = saved_model(inputs)[0]
|
||
|
||
torch.testing.assert_close(output, presaved_output, rtol=1e-4, atol=1e-4)
|
||
torch.testing.assert_close(presaved_output, postsaved_output)
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_accelerator
|
||
def test_save_offloaded_model_with_direct_params(self):
|
||
from accelerate import dispatch_model
|
||
|
||
device_map = {"submodule": "cpu", "linear": f"{torch_device}:0"}
|
||
model = ModelWithDirectParamSubmodule(PreTrainedConfig())
|
||
dispatch_model(model, device_map)
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
@require_accelerate
|
||
@mark.accelerate_tests
|
||
@require_torch_accelerator
|
||
@unittest.skip("TODO @cyrilvallez when saving")
|
||
def test_save_offloaded_model_dynamic_tied_weights_keys(self):
|
||
from accelerate import dispatch_model
|
||
|
||
device_map = {"base": f"{torch_device}:0", "linear": "cpu", "linear2": "cpu"}
|
||
model = ModelWithHead(PreTrainedConfig())
|
||
dispatch_model(model, device_map)
|
||
|
||
transform_a = torch.nn.Linear(1, 1, bias=False)
|
||
transform_a._dynamic_tied_weights_keys = ["weight"]
|
||
transform_b = torch.nn.Linear(1, 1, bias=False)
|
||
transform_b._dynamic_tied_weights_keys = ["weight"]
|
||
|
||
model.linear.register_module("transform_a", transform_a)
|
||
model.linear.register_module("transform_b", transform_b)
|
||
model.linear2.register_module("transform_a", transform_a)
|
||
model.linear2.register_module("transform_b", transform_b)
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
def test_use_safetensors(self):
|
||
# Should not raise anymore
|
||
AutoModel.from_pretrained("hf-internal-testing/tiny-random-RobertaModel", use_safetensors=True)
|
||
|
||
# test that error if only safetensors is available
|
||
with self.assertRaises(OSError) as env_error:
|
||
BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-safetensors", use_safetensors=False)
|
||
|
||
self.assertTrue("does not appear to have a file named pytorch_model.bin" in str(env_error.exception))
|
||
|
||
# test that only safetensors if both available and use_safetensors=False
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
CLIPTextModel.from_pretrained(
|
||
"hf-internal-testing/diffusers-stable-diffusion-tiny-all",
|
||
subfolder="text_encoder",
|
||
use_safetensors=False,
|
||
cache_dir=tmp_dir,
|
||
)
|
||
|
||
all_downloaded_files = glob.glob(os.path.join(tmp_dir, "*", "snapshots", "*", "*", "*"))
|
||
self.assertTrue(any(f.endswith("bin") for f in all_downloaded_files))
|
||
self.assertFalse(any(f.endswith("safetensors") for f in all_downloaded_files))
|
||
|
||
# test that no safetensors if both available and use_safetensors=True
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# TODO: only necessary for read-only cache systems; replace with a shared helper
|
||
with unittest.mock.patch.dict(os.environ, {"HF_XET_CACHE": tmp_dir}):
|
||
CLIPTextModel.from_pretrained(
|
||
"hf-internal-testing/diffusers-stable-diffusion-tiny-all",
|
||
subfolder="text_encoder",
|
||
use_safetensors=True,
|
||
cache_dir=tmp_dir,
|
||
)
|
||
|
||
all_downloaded_files = glob.glob(os.path.join(tmp_dir, "*", "snapshots", "*", "*", "*"))
|
||
self.assertTrue(any(f.endswith("safetensors") for f in all_downloaded_files))
|
||
self.assertFalse(any(f.endswith("bin") for f in all_downloaded_files))
|
||
|
||
# test no model file found when use_safetensors=None (default when safetensors package available)
|
||
with self.assertRaises(OSError) as missing_model_file_error:
|
||
BertModel.from_pretrained("hf-internal-testing/config-no-model")
|
||
|
||
with self.assertRaises(OSError) as missing_model_file_error:
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write("{}")
|
||
f.close()
|
||
BertModel.from_pretrained(tmp_dir)
|
||
|
||
self.assertTrue(
|
||
"Error no file named model.safetensors, or pytorch_model.bin" in str(missing_model_file_error.exception),
|
||
msg=missing_model_file_error.exception,
|
||
)
|
||
|
||
def test_safetensors_save_and_load(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
# No pytorch_model.bin file, only a model.safetensors
|
||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_NAME)))
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, WEIGHTS_NAME)))
|
||
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
# Check models are equal
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_safetensors_load_from_hub(self):
|
||
safetensors_model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-safetensors")
|
||
pytorch_model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
|
||
# Check models are equal
|
||
for p1, p2 in zip(safetensors_model.parameters(), pytorch_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_safetensors_save_and_load_sharded(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, max_shard_size="100kB")
|
||
# No pytorch_model.bin index file, only a model.safetensors index
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, WEIGHTS_INDEX_NAME)))
|
||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||
# No regular weights file
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, WEIGHTS_NAME)))
|
||
self.assertFalse(os.path.isfile(os.path.join(tmp_dir, SAFE_WEIGHTS_NAME)))
|
||
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
# Check models are equal
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
def test_safetensors_load_from_hub_sharded(self):
|
||
safetensors_model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-sharded-safetensors")
|
||
pytorch_model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert-sharded")
|
||
|
||
# Check models are equal
|
||
for p1, p2 in zip(safetensors_model.parameters(), pytorch_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
@unittest.skip("This now just works by defaults :) no complicated load from task blah blah")
|
||
def test_base_model_to_head_model_load(self):
|
||
base_model = BaseModel(PreTrainedConfig())
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
base_model.save_pretrained(tmp_dir)
|
||
|
||
# Can load a base model in a model with head
|
||
model = ModelWithHead.from_pretrained(tmp_dir)
|
||
for p1, p2 in zip(model.base.parameters(), base_model.parameters()):
|
||
torch.testing.assert_close(p1, p2)
|
||
|
||
# It doesn't work if the state dict has a mix of keys of the head and base without prefix though.
|
||
base_state_dict = base_model.state_dict()
|
||
head_state_dict = model.state_dict()
|
||
base_state_dict["linear2.weight"] = head_state_dict["linear2.weight"]
|
||
base_state_dict["linear2.bias"] = head_state_dict["linear2.bias"]
|
||
safe_save_file(base_state_dict, os.path.join(tmp_dir, SAFE_WEIGHTS_NAME), metadata={"format": "pt"})
|
||
|
||
with self.assertRaisesRegex(
|
||
ValueError, "The state dictionary of the model you are trying to load is corrupted."
|
||
):
|
||
_ = ModelWithHead.from_pretrained(tmp_dir)
|
||
|
||
def test_tied_weights_reload(self):
|
||
# Base
|
||
model = BaseModelWithTiedWeights(PreTrainedConfig(tie_word_embeddings=True))
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
new_model = BaseModelWithTiedWeights.from_pretrained(tmp_dir)
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight)
|
||
|
||
state_dict = model.state_dict()
|
||
# Remove tied weight from state_dict -> model should load with no complain of missing keys
|
||
del state_dict["linear_2.weight"]
|
||
torch.save(state_dict, os.path.join(tmp_dir, WEIGHTS_NAME))
|
||
new_model, load_info = BaseModelWithTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
self.assertSetEqual(load_info["missing_keys"], set())
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight)
|
||
|
||
# With head
|
||
model = BaseModel(PreTrainedConfig(tie_word_embeddings=True))
|
||
model.save_pretrained(tmp_dir)
|
||
new_model, load_info = ModelWithHeadAndTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
self.assertIs(new_model.base.linear.weight, new_model.decoder.weight)
|
||
# Should only complain about the missing bias
|
||
self.assertSetEqual(load_info["missing_keys"], {"decoder.bias"})
|
||
|
||
def test_tied_weights_can_load_symmetrically(self):
|
||
"""Test that we can correctly load and tie weights even though the wrong key was saved."""
|
||
model = BaseModelWithTiedWeights(PreTrainedConfig(tie_word_embeddings=True))
|
||
# Just to be sure it's actually tied
|
||
self.assertIs(model.linear.weight, model.linear_2.weight, msg="Weights are not tied!")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Save the config
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write(json.dumps(model.config.to_dict()))
|
||
|
||
state_dict = model.state_dict()
|
||
# Save using the wrong key
|
||
state_dict.pop("linear.weight")
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
new_model, load_info = BaseModelWithTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
# Assert no missing keys
|
||
self.assertSetEqual(load_info["missing_keys"], set(), msg=f"{load_info['missing_keys']} are missing!")
|
||
# It's still the same weight
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight, msg="Weights are not tied!")
|
||
|
||
# Make sure both state dict are the same
|
||
compare_state_dicts(model.state_dict(), new_model.state_dict())
|
||
|
||
def test_tied_weights_can_load_symmetrically_multiple_keys(self):
|
||
"""Test that we can correctly load and tie weights even though the wrong key was saved, when we
|
||
have more than 1 target to the same source."""
|
||
# First class is consistent in how they provide the source, second is not -> make sure it works in both cases
|
||
for model_class in [BaseModelWithMultipleTiedWeights, BaseModelWithMultipleMixedTiedWeights]:
|
||
with self.subTest(model_class.__name__):
|
||
model = model_class(PreTrainedConfig(tie_word_embeddings=True))
|
||
# Just to be sure it's actually tied
|
||
self.assertIs(model.linear.weight, model.linear_2.weight, msg="Weights are not tied!")
|
||
self.assertIs(model.linear.weight, model.linear_3.weight, msg="Weights are not tied!")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Save the config
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write(json.dumps(model.config.to_dict()))
|
||
|
||
state_dict = model.state_dict()
|
||
# Keep only 1 of the 3 tied keys, but not the source (which is `linear.weight`)
|
||
state_dict.pop("linear.weight")
|
||
state_dict.pop("linear_3.weight")
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
new_model, load_info = BaseModelWithMultipleTiedWeights.from_pretrained(
|
||
tmp_dir, output_loading_info=True
|
||
)
|
||
# Assert no missing keys
|
||
self.assertSetEqual(
|
||
load_info["missing_keys"], set(), msg=f"{load_info['missing_keys']} are missing!"
|
||
)
|
||
# It's still the same weight
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight, msg="Weights are not tied!")
|
||
self.assertIs(new_model.linear.weight, new_model.linear_3.weight, msg="Weights are not tied!")
|
||
|
||
# Make sure both state dict are the same
|
||
compare_state_dicts(model.state_dict(), new_model.state_dict())
|
||
|
||
# Now, do the same but try to keep `linear_2.weight` in the saved key instead of `linear_3.weight`
|
||
# to make sure it does not matter
|
||
state_dict = model.state_dict()
|
||
# Keep only 1 of the 3 tied keys, but not the source (which is `linear.weight`)
|
||
state_dict.pop("linear.weight")
|
||
state_dict.pop("linear_2.weight")
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
new_model, load_info = BaseModelWithMultipleTiedWeights.from_pretrained(
|
||
tmp_dir, output_loading_info=True
|
||
)
|
||
# Assert no missing keys
|
||
self.assertSetEqual(
|
||
load_info["missing_keys"], set(), msg=f"{load_info['missing_keys']} are missing!"
|
||
)
|
||
# It's still the same weight
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight, msg="Weights are not tied!")
|
||
self.assertIs(new_model.linear.weight, new_model.linear_3.weight, msg="Weights are not tied!")
|
||
|
||
# Make sure both state dict are the same
|
||
compare_state_dicts(model.state_dict(), new_model.state_dict())
|
||
|
||
def test_tied_weights_are_not_tied_if_both_present_but_different(self):
|
||
"""Test that if both the source and target of tied weights are present and different, we do NOT tie them, and instead
|
||
raise a warning"""
|
||
model = BaseModelWithTiedWeights(PreTrainedConfig(tie_word_embeddings=True))
|
||
# Just to be sure it's actually tied
|
||
self.assertIs(model.linear.weight, model.linear_2.weight, msg="Weights are not tied!")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Save the config
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write(json.dumps(model.config.to_dict()))
|
||
|
||
state_dict = model.state_dict()
|
||
# Clone every param to make sure nothing is tied -> we save everything
|
||
state_dict = {k: v.clone() for k, v in state_dict.items()}
|
||
# Make sure the target tied weights has a different value than the source
|
||
state_dict["linear_2.weight"] = state_dict["linear_2.weight"] + 2
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
with CaptureLogger(logger) as cl:
|
||
new_model, load_info = BaseModelWithTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
|
||
# We should have raised a warning here saying that we will NOT tie the weights
|
||
self.assertIn("both are present in the checkpoints with different values, so we will NOT tie them", cl.out)
|
||
# Assert no missing keys
|
||
self.assertSetEqual(load_info["missing_keys"], set(), msg=f"{load_info['missing_keys']} are missing!")
|
||
# It should not be the same weight anymore
|
||
self.assertIsNot(
|
||
new_model.linear.weight, new_model.linear_2.weight, msg="Weights are tied but they should not!"
|
||
)
|
||
|
||
def test_tied_weights_are_tied_if_both_present_and_similar(self):
|
||
"""Test that if both the source and target of tied weights are present but have same values, we tie them"""
|
||
model = BaseModelWithTiedWeights(PreTrainedConfig(tie_word_embeddings=True))
|
||
# Just to be sure it's actually tied
|
||
self.assertIs(model.linear.weight, model.linear_2.weight, msg="Weights are not tied!")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Save the config
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write(json.dumps(model.config.to_dict()))
|
||
|
||
state_dict = model.state_dict()
|
||
# Clone every param to make sure nothing is tied -> we save everything
|
||
state_dict = {k: v.clone() for k, v in state_dict.items()}
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
new_model, load_info = BaseModelWithTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
|
||
# Assert no missing keys
|
||
self.assertSetEqual(load_info["missing_keys"], set(), msg=f"{load_info['missing_keys']} are missing!")
|
||
# It should still be the same weight
|
||
self.assertIs(
|
||
new_model.linear.weight, new_model.linear_2.weight, msg="Weights are NOT tied but they should be!"
|
||
)
|
||
|
||
# Make sure both state dict are the same
|
||
compare_state_dicts(model.state_dict(), new_model.state_dict())
|
||
|
||
def test_tied_weights_are_missing_if_both_absent(self):
|
||
"""Test that if both the source and target of tied weights are absent, we do tie them, but they are missing"""
|
||
model = BaseModelWithTiedWeights(PreTrainedConfig(tie_word_embeddings=True))
|
||
# Just to be sure it's actually tied
|
||
self.assertIs(model.linear.weight, model.linear_2.weight, msg="Weights are not tied!")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
# Save the config
|
||
with open(os.path.join(tmp_dir, "config.json"), "w") as f:
|
||
f.write(json.dumps(model.config.to_dict()))
|
||
|
||
state_dict = model.state_dict()
|
||
# Remove both from the state dict
|
||
state_dict.pop("linear.weight")
|
||
state_dict.pop("linear_2.weight")
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, "model.safetensors"))
|
||
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
with CaptureLogger(logger) as cl:
|
||
new_model, load_info = BaseModelWithTiedWeights.from_pretrained(tmp_dir, output_loading_info=True)
|
||
|
||
# We should have raised a warning here saying that we will NOT tie the weights
|
||
self.assertIn(
|
||
"This checkpoint seem corrupted. The tied weights mapping for this model specifies to tie", cl.out
|
||
)
|
||
# Assert both are in the missing keys
|
||
self.assertSetEqual(load_info["missing_keys"], {"linear.weight", "linear_2.weight"})
|
||
# They should still be tied though
|
||
self.assertIs(new_model.linear.weight, new_model.linear_2.weight, msg="Weights are not tied!")
|
||
|
||
def test_tied_weights_are_always_tied_from_config(self):
|
||
"""Test that if a model is initialized from config it's always tied, and that the context `no_tie_weights` works
|
||
as expected"""
|
||
config = LlamaConfig(num_hidden_layers=2, hidden_size=32, intermediate_size=16, tie_word_embeddings=True)
|
||
|
||
# Make sure they are tied if called with `_from_config` and directly
|
||
model = LlamaForCausalLM._from_config(copy.deepcopy(config))
|
||
self.assertTrue(model.lm_head.weight is model.model.embed_tokens.weight)
|
||
model = LlamaForCausalLM(copy.deepcopy(config))
|
||
self.assertTrue(model.lm_head.weight is model.model.embed_tokens.weight)
|
||
|
||
# Also when using a meta device explicitly (as it skips e.g. weight init automatically)
|
||
with torch.device("meta"):
|
||
model = LlamaForCausalLM._from_config(copy.deepcopy(config))
|
||
self.assertTrue(model.lm_head.weight is model.model.embed_tokens.weight)
|
||
model = LlamaForCausalLM(copy.deepcopy(config))
|
||
self.assertTrue(model.lm_head.weight is model.model.embed_tokens.weight)
|
||
|
||
# Make sure the context works as expected
|
||
with init.no_tie_weights():
|
||
model = LlamaForCausalLM._from_config(copy.deepcopy(config))
|
||
self.assertTrue(model.lm_head.weight is not model.model.embed_tokens.weight)
|
||
|
||
def test_unexpected_keys_warnings(self):
|
||
model = ModelWithHead(PreTrainedConfig(tie_word_embeddings=True))
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
# Loading the model with a new class, we don't get a warning for unexpected weights, just an info
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
_, loading_info = BaseModel.from_pretrained(tmp_dir, output_loading_info=True)
|
||
self.assertNotIn("were not used when initializing ModelWithHead", cl.out)
|
||
self.assertEqual(
|
||
set(loading_info["unexpected_keys"]),
|
||
{"linear2.weight", "linear2.bias"},
|
||
)
|
||
|
||
# Loading the model with the same class, we do get a warning for unexpected weights
|
||
state_dict = model.state_dict()
|
||
state_dict["added_key"] = copy.deepcopy(state_dict["linear.weight"])
|
||
safe_save_file(state_dict, os.path.join(tmp_dir, SAFE_WEIGHTS_NAME), metadata={"format": "pt"})
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
_, loading_info = ModelWithHead.from_pretrained(tmp_dir, output_loading_info=True)
|
||
# Will be colored if terminal is interactive
|
||
expected_output = "added_key | [38;5;208mUNEXPECTED" if sys.stdout.isatty() else "added_key | UNEXPECTED"
|
||
self.assertIn(expected_output, cl.out)
|
||
self.assertEqual(loading_info["unexpected_keys"], {"added_key"})
|
||
|
||
def test_warn_if_padding_and_no_attention_mask(self):
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
|
||
with self.subTest("Ensure no warnings when pad_token_id is None."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config_no_pad_token = PreTrainedConfig(pad_token_id=None, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config_no_pad_token)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 0, 0]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertNotIn("We strongly recommend passing in an `attention_mask`", cl.out)
|
||
|
||
with self.subTest("Ensure no warnings when there is an attention_mask."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 0, 0]])
|
||
attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
|
||
self.assertNotIn("We strongly recommend passing in an `attention_mask`", cl.out)
|
||
|
||
with self.subTest("Ensure no warnings when there are no pad_token_ids in the input_ids."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[1, 345, 232, 328, 740, 140, 1695, 69, 6078, 2341, 25]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertNotIn("We strongly recommend passing in an `attention_mask`", cl.out)
|
||
|
||
with self.subTest("Ensure a warning is shown when the input_ids start with a pad_token_id."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 432, 5232]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertIn("We strongly recommend passing in an `attention_mask`", cl.out)
|
||
|
||
with self.subTest("Ensure a warning is shown when the input_ids end with a pad_token_id."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[432, 345, 232, 328, 740, 140, 1695, 69, 6078, 0, 0]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertIn("We strongly recommend passing in an `attention_mask`", cl.out)
|
||
|
||
with self.subTest("Ensure that the warning is shown at most once."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 0, 0]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertEqual(cl.out.count("We strongly recommend passing in an `attention_mask`"), 1)
|
||
|
||
with self.subTest("Ensure a different warning is shown when the pad_token_id is equal to the bos_token_id."):
|
||
logger.warning_once.cache_clear()
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=0, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 0, 0]])
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out)
|
||
|
||
with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."):
|
||
logger.warning_once.cache_clear()
|
||
from torch._dynamo import config, testing
|
||
|
||
config = PreTrainedConfig(pad_token_id=0, bos_token_id=None, eos_token_id=None)
|
||
model = ModelWithHead(config)
|
||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 432, 5232]])
|
||
|
||
def f(input_ids):
|
||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||
|
||
compile_counter = testing.CompileCounter()
|
||
opt_fn = torch.compile(f, dynamic=True, backend=compile_counter)
|
||
opt_fn(input_ids)
|
||
self.assertEqual(compile_counter.frame_count, 0)
|
||
|
||
@require_torch_accelerator
|
||
@slow
|
||
def test_pretrained_low_mem_new_config(self):
|
||
# Checking for 1 model(the same one which was described in the issue) .
|
||
model_ids = ["openai-community/gpt2"]
|
||
|
||
for model_id in model_ids:
|
||
model_config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_id)
|
||
model_config.n_layer = 48
|
||
model_config.n_head = 25
|
||
model_config.n_embd = 1600
|
||
model = AutoModelForCausalLM.from_pretrained(
|
||
pretrained_model_name_or_path=model_id,
|
||
config=model_config,
|
||
ignore_mismatched_sizes=True,
|
||
dtype=torch.float16,
|
||
)
|
||
model_ref = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id)
|
||
|
||
self.assertEqual(model.__class__.__name__, model_ref.__class__.__name__)
|
||
|
||
def test_generation_config_is_loaded_with_model(self):
|
||
# Note: `hf-internal-testing/tiny-random-MistralForCausalLM` has a `generation_config.json`
|
||
# containing `bos_token_id: 1`
|
||
|
||
# 1. Load without further parameters
|
||
model = AutoModelForCausalLM.from_pretrained(TINY_MISTRAL)
|
||
self.assertEqual(model.generation_config.bos_token_id, 1)
|
||
|
||
# 2. Load with `device_map`
|
||
model = AutoModelForCausalLM.from_pretrained(TINY_MISTRAL, device_map="auto")
|
||
self.assertEqual(model.generation_config.bos_token_id, 1)
|
||
|
||
def test_safetensors_torch_from_torch(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
def test_safetensors_torch_from_torch_sharded(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only")
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, max_shard_size="100kB")
|
||
new_model = BertModel.from_pretrained(tmp_dir)
|
||
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
def test_saving_model_config_with_generation_params(self):
|
||
"""
|
||
Calling `model.save_pretrained` with generation parameters should raise a `ValueError`
|
||
"""
|
||
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
|
||
self.assertTrue(model.generation_config.repetition_penalty is None)
|
||
self.assertFalse(hasattr(model.config, "repetition_penalty"))
|
||
|
||
# If the user attempts to save a custom generation parameter, we raise an Error
|
||
model.config.repetition_penalty = 3.0
|
||
with self.assertRaises(ValueError):
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
def test_model_from_pretrained_from_mlx(self):
|
||
from safetensors import safe_open
|
||
|
||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-mistral-mlx")
|
||
self.assertIsNotNone(model)
|
||
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir)
|
||
with safe_open(os.path.join(tmp_dir, "model.safetensors"), framework="pt") as f:
|
||
metadata = f.metadata()
|
||
self.assertEqual(metadata.get("format"), "pt")
|
||
new_model = AutoModelForCausalLM.from_pretrained(tmp_dir)
|
||
|
||
input_ids = torch.randint(100, 1000, (1, 10))
|
||
with torch.no_grad():
|
||
outputs = model(input_ids)
|
||
outputs_from_saved = new_model(input_ids)
|
||
torch.testing.assert_close(outputs_from_saved["logits"], outputs["logits"])
|
||
|
||
def test_can_generate(self):
|
||
"""Tests the behavior of `PreTrainedModel.can_generate` method."""
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
logger.warning_once.cache_clear()
|
||
|
||
# 1 - By default, a model CAN'T generate
|
||
can_generate = BertModel.can_generate()
|
||
self.assertFalse(can_generate)
|
||
|
||
# 2 - The most common case for a model to be able to generate is to inherit from `GenerationMixin` directly
|
||
class DummyBertWithMixin(BertModel, GenerationMixin):
|
||
pass
|
||
|
||
with CaptureLogger(logger) as cl:
|
||
can_generate = DummyBertWithMixin.can_generate()
|
||
self.assertTrue(cl.out == "")
|
||
self.assertTrue(can_generate)
|
||
|
||
# 3 - Finally, it can inherit from a model that can generate
|
||
class DummyBertWithParent(DummyBertWithMixin):
|
||
pass
|
||
|
||
with CaptureLogger(logger) as cl:
|
||
can_generate = DummyBertWithParent.can_generate()
|
||
self.assertTrue(cl.out == "")
|
||
self.assertTrue(can_generate)
|
||
|
||
# 4 - Legacy: models with a custom `prepare_inputs_for_generation` can generate (it was assumed
|
||
# they inherited `GenerationMixin`). Deprecated in v4.45 and removed in v4.51.
|
||
class DummyBertWithPrepareInputs(BertModel):
|
||
def prepare_inputs_for_generation(self):
|
||
pass
|
||
|
||
with CaptureLogger(logger) as cl:
|
||
can_generate = DummyBertWithPrepareInputs.can_generate()
|
||
self.assertTrue("it doesn't directly inherit from `GenerationMixin`" in cl.out)
|
||
self.assertFalse(can_generate)
|
||
|
||
def test_save_and_load_config_with_custom_generation(self):
|
||
"""
|
||
Tests that saving and loading a config with a custom generation kwarg is not possible
|
||
"""
|
||
model = T5ForConditionalGeneration.from_pretrained(TINY_T5)
|
||
|
||
self.assertTrue(model.generation_config.num_beams is None)
|
||
self.assertTrue(model.generation_config.early_stopping is None)
|
||
self.assertFalse(hasattr(model.config, "num_beams"))
|
||
self.assertFalse(hasattr(model.config, "early_stopping"))
|
||
|
||
# Sanity check: We can run `generate` with the model without any warnings
|
||
random_ids = torch.randint(0, 100, (1, 5))
|
||
with warnings.catch_warnings(record=True) as w:
|
||
model.generate(random_ids, max_new_tokens=3)
|
||
self.assertTrue(len(w) == 0)
|
||
|
||
# When we save the model and config has generation-related parameter,
|
||
# we will throw an error, nudging user to save attributes in the generation_config
|
||
model.config.num_beams = 5
|
||
model.config.early_stopping = True
|
||
self.assertTrue(model.generation_config.num_beams is None) # default value
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
with self.assertRaises(ValueError):
|
||
model.save_pretrained(tmp_dir)
|
||
|
||
def test_load_model_with_state_dict_only(self):
|
||
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||
state_dict = model.state_dict()
|
||
config = model.config
|
||
|
||
model_loaded = BertModel.from_pretrained(
|
||
pretrained_model_name_or_path=None, config=config, state_dict=state_dict
|
||
)
|
||
self.assertTrue(check_models_equal(model, model_loaded))
|
||
|
||
@unittest.skip("Skipping flaky test")
|
||
def test_cache_when_needed_at_train_time(self):
|
||
"""
|
||
Some fine-tuning methods require the use of cache, like prefix tuning in PEFT. This test checks that a cache
|
||
is at train time used if we request it. Related issue: #35648
|
||
"""
|
||
model = AutoModelForCausalLM.from_pretrained(TINY_MISTRAL)
|
||
tokenizer = AutoTokenizer.from_pretrained(TINY_MISTRAL)
|
||
model_inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||
|
||
# By default it is not training, we have to set it
|
||
self.assertFalse(model.training)
|
||
model.train()
|
||
|
||
# If we set `use_cache=True` while training, then a cache is returned
|
||
model_outputs = model(**model_inputs, use_cache=True)
|
||
self.assertIsInstance(model_outputs.past_key_values, DynamicCache)
|
||
self.assertTrue(model.training)
|
||
|
||
# simulate injecting virtual tokens like in prefix tuning
|
||
num_virtual_tokens = 3
|
||
past_key_values = [
|
||
(torch.randn(1, 2, num_virtual_tokens, 8), torch.randn(1, 2, num_virtual_tokens, 8)),
|
||
(torch.randn(1, 2, num_virtual_tokens, 8), torch.randn(1, 2, num_virtual_tokens, 8)),
|
||
]
|
||
past_key_values = DynamicCache(past_key_values)
|
||
model_inputs["attention_mask"] = torch.cat(
|
||
(
|
||
model_inputs["attention_mask"],
|
||
torch.ones(1, num_virtual_tokens).to(model_inputs["attention_mask"].device),
|
||
),
|
||
dim=1,
|
||
)
|
||
model_outputs = model(**model_inputs, past_key_values=past_key_values, use_cache=True)
|
||
self.assertTrue(model.training)
|
||
|
||
# We can also disable the cache to skip a few operations, if the training loop doesn't need cache
|
||
# NOTE: after #41900, we need to pass the correct attention mask size
|
||
model_inputs["attention_mask"] = model_inputs["attention_mask"][:, :-num_virtual_tokens]
|
||
model_outputs = model(**model_inputs, use_cache=False)
|
||
self.assertIsNone(model_outputs.past_key_values)
|
||
self.assertTrue(model.training)
|
||
|
||
def test_restore_default_dtype_from_pretrained(self):
|
||
"""
|
||
Tests that the default torch dtype is restored
|
||
when an error happens during the loading of a model.
|
||
"""
|
||
old_dtype = torch.get_default_dtype()
|
||
# set default type to float32
|
||
torch.set_default_dtype(torch.float32)
|
||
|
||
# Mock injection point which is right after the call to `torch.set_default_dtype`
|
||
original_set_default_dtype = torch.set_default_dtype
|
||
|
||
def debug(*args, **kwargs):
|
||
# call the method as usual, than raise a RuntimeError
|
||
original_set_default_dtype(*args, **kwargs)
|
||
raise RuntimeError
|
||
|
||
with patch("torch.set_default_dtype", new=debug):
|
||
with self.assertRaises(RuntimeError):
|
||
_ = AutoModelForCausalLM.from_pretrained(TINY_MISTRAL, device_map="auto", dtype=torch.float16)
|
||
# default should still be float32
|
||
self.assertTrue(torch.get_default_dtype() == torch.float32)
|
||
torch.set_default_dtype(old_dtype)
|
||
|
||
def test_restore_default_dtype_from_config(self):
|
||
"""
|
||
Tests that the default torch dtype is restored
|
||
when an error happens during the loading of a model.
|
||
"""
|
||
old_dtype = torch.get_default_dtype()
|
||
# set default type to float32
|
||
torch.set_default_dtype(torch.float32)
|
||
|
||
config = AutoConfig.from_pretrained(TINY_MISTRAL)
|
||
|
||
# Mock injection point which is right after the call to `torch.set_default_dtype`
|
||
original_set_default_dtype = torch.set_default_dtype
|
||
|
||
def debug(*args, **kwargs):
|
||
# call the method as usual, than raise a RuntimeError
|
||
original_set_default_dtype(*args, **kwargs)
|
||
raise RuntimeError
|
||
|
||
with patch("torch.set_default_dtype", new=debug):
|
||
with self.assertRaises(RuntimeError):
|
||
config.dtype = torch.float16
|
||
_ = AutoModelForCausalLM.from_config(config)
|
||
|
||
# default should still be float32
|
||
self.assertTrue(torch.get_default_dtype() == torch.float32)
|
||
torch.set_default_dtype(old_dtype)
|
||
|
||
def test_unknown_quantization_config(self):
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
config.quantization_config = {"quant_method": "unknown"}
|
||
model.save_pretrained(tmpdir)
|
||
with self.assertLogs("transformers", level="WARNING") as cm:
|
||
BertModel.from_pretrained(tmpdir)
|
||
self.assertEqual(len(cm.records), 1)
|
||
self.assertTrue(cm.records[0].message.startswith("Unknown quantization type, got"))
|
||
|
||
@parameterized.expand([("Qwen/Qwen2.5-3B-Instruct", 10), ("meta-llama/Llama-2-7b-chat-hf", 10)])
|
||
@slow
|
||
@require_torch_accelerator
|
||
def test_loading_is_fast_on_gpu(self, model_id: str, max_loading_time: float):
|
||
"""
|
||
This test is used to avoid regression on https://github.com/huggingface/transformers/pull/36380.
|
||
10s should be more than enough for both models, and allows for some margin as loading time are quite
|
||
unstable. Before #36380, it used to take more than 40s, so 10s is still reasonable.
|
||
Note that we run this test in a subprocess, to ensure that cuda is not already initialized/warmed-up.
|
||
"""
|
||
# First download the weights if not already on disk
|
||
_ = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
|
||
|
||
script_to_run = textwrap.dedent(
|
||
"""
|
||
import torch
|
||
import time
|
||
import argparse
|
||
from transformers import AutoModelForCausalLM
|
||
from transformers.utils import is_torch_accelerator_available
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("model_id", type=str)
|
||
parser.add_argument("max_loading_time", type=float)
|
||
args = parser.parse_args()
|
||
|
||
device_type = torch.accelerator.current_accelerator().type if is_torch_accelerator_available() else "cuda"
|
||
device = torch.device(f"{device_type}:0")
|
||
|
||
torch_accelerator_module = getattr(torch, device_type, torch.cuda)
|
||
torch_accelerator_module.synchronize(device)
|
||
t0 = time.time()
|
||
model = AutoModelForCausalLM.from_pretrained(args.model_id, dtype=torch.float16, device_map=device)
|
||
torch_accelerator_module.synchronize(device)
|
||
dt = time.time() - t0
|
||
|
||
# Assert loading is faster (it should be more than enough in both cases)
|
||
if dt > args.max_loading_time:
|
||
raise ValueError(f"Loading took {dt:.2f}s! It should not take more than {args.max_loading_time}s")
|
||
# Ensure everything is correctly loaded on accelerator
|
||
bad_device_params = {k for k, v in model.named_parameters() if v.device != device}
|
||
if len(bad_device_params) > 0:
|
||
raise ValueError(f"The following parameters are not on accelerator: {bad_device_params}")
|
||
"""
|
||
)
|
||
|
||
with tempfile.NamedTemporaryFile(mode="w+", suffix=".py") as tmp:
|
||
tmp.write(script_to_run)
|
||
tmp.flush()
|
||
tmp.seek(0)
|
||
cmd = f"python {tmp.name} {model_id} {max_loading_time}".split()
|
||
try:
|
||
# We cannot use a timeout of `max_loading_time` as cuda initialization can take up to 15-20s
|
||
_ = subprocess.run(cmd, capture_output=True, env=self.get_env(), text=True, check=True, timeout=60)
|
||
except subprocess.CalledProcessError as e:
|
||
raise Exception(f"The following error was captured: {e.stderr}")
|
||
|
||
def test_explicit_transformers_weights(self):
|
||
"""
|
||
Transformers supports loading from repos where the weights file is explicitly set in the config.
|
||
When loading a config file, transformers will see whether `transformers_weights` is defined in the config.
|
||
If so, it will load from that file.
|
||
|
||
Here, we ensure that the correct file is loaded.
|
||
"""
|
||
model = BertModel.from_pretrained("hf-internal-testing/explicit_transformers_weight_in_config")
|
||
self.assertEqual(model.num_parameters(), 87929)
|
||
|
||
def test_explicit_transformers_weights_index(self):
|
||
"""
|
||
Transformers supports loading from repos where the weights file is explicitly set in the config.
|
||
When loading a config file, transformers will see whether `transformers_weights` is defined in the config.
|
||
If so, it will load from that file.
|
||
|
||
Here, we ensure that the correct file is loaded, given the file is an index of multiple weights.
|
||
"""
|
||
model = BertModel.from_pretrained("hf-internal-testing/explicit_transformers_weight_in_config_sharded")
|
||
self.assertEqual(model.num_parameters(), 87929)
|
||
|
||
def test_explicit_transformers_weights_save_and_reload(self):
|
||
"""
|
||
Transformers supports loading from repos where the weights file is explicitly set in the config.
|
||
When loading a config file, transformers will see whether `transformers_weights` is defined in the config.
|
||
If so, it will load from that file.
|
||
|
||
When saving the model, we should be careful not to safe the `transformers_weights` attribute in the config;
|
||
otherwise, transformers will try to load from that file whereas it should simply load from the default file.
|
||
|
||
We test that for a non-sharded repo.
|
||
"""
|
||
model = BertModel.from_pretrained("hf-internal-testing/explicit_transformers_weight_in_config")
|
||
explicit_transformers_weights = model.config.transformers_weights
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname)
|
||
|
||
# The config should not have a mention of transformers_weights
|
||
with open(os.path.join(tmpdirname, "config.json")) as f:
|
||
config = json.loads(f.read())
|
||
self.assertFalse("transformers_weights" in config)
|
||
|
||
# The serialized weights should be in model.safetensors and not the transformers_weights
|
||
self.assertTrue(explicit_transformers_weights not in os.listdir(tmpdirname))
|
||
self.assertTrue("model.safetensors" in os.listdir(tmpdirname))
|
||
|
||
def test_explicit_transformers_weights_index_save_and_reload(self):
|
||
"""
|
||
Transformers supports loading from repos where the weights file is explicitly set in the config.
|
||
When loading a config file, transformers will see whether `transformers_weights` is defined in the config.
|
||
If so, it will load from that file.
|
||
|
||
When saving the model, we should be careful not to safe the `transformers_weights` attribute in the config;
|
||
otherwise, transformers will try to load from that file whereas it should simply load from the default file.
|
||
|
||
We test that for a sharded repo.
|
||
"""
|
||
model = BertModel.from_pretrained("hf-internal-testing/explicit_transformers_weight_in_config_sharded")
|
||
explicit_transformers_weights = model.config.transformers_weights
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname, max_shard_size="100kb")
|
||
|
||
# The config should not have a mention of transformers_weights
|
||
with open(os.path.join(tmpdirname, "config.json")) as f:
|
||
config = json.loads(f.read())
|
||
self.assertFalse("transformers_weights" in config)
|
||
|
||
# The serialized weights should be in model.safetensors and not the transformers_weights
|
||
self.assertTrue(explicit_transformers_weights not in os.listdir(tmpdirname))
|
||
self.assertTrue("model.safetensors.index.json" in os.listdir(tmpdirname))
|
||
|
||
def test_config_class_attribute(self):
|
||
# custom configs
|
||
class MyConfigA(PreTrainedConfig):
|
||
pass
|
||
|
||
class MyConfigB(PreTrainedConfig):
|
||
pass
|
||
|
||
class MyConfigC(PreTrainedConfig):
|
||
pass
|
||
|
||
# custom models
|
||
class MyModelA(PreTrainedModel):
|
||
config: dict
|
||
config_class = MyConfigA
|
||
|
||
class MyModelB(MyModelA):
|
||
config: MyConfigB
|
||
|
||
class MyModelC(MyModelA):
|
||
config_class = MyConfigC
|
||
|
||
class MyModelD(MyModelA):
|
||
pass
|
||
|
||
# child config_class > child 'config:' > parent config_class > parent 'config:'
|
||
self.assertIs(MyModelA.config_class, MyConfigA)
|
||
self.assertIs(MyModelB.config_class, MyConfigB)
|
||
self.assertIs(MyModelC.config_class, MyConfigC)
|
||
self.assertIs(MyModelD.config_class, MyConfigA)
|
||
|
||
def test_ignore_missing_key_works(self):
|
||
"""Test that if a parameter (not buffer) is specified in `_keys_to_ignore_on_load_missing` and is actually
|
||
missing from the checkpoint, it will still be moved to cpu and initialized"""
|
||
temp = tempfile.TemporaryDirectory()
|
||
# Create dummy model
|
||
model = BaseModelWithMissingKeys(PreTrainedConfig())
|
||
|
||
# Save the config
|
||
model.config.save_pretrained(temp.name)
|
||
# Get the state dict to save
|
||
state_dict = model.state_dict()
|
||
# Remove the layer that we should ignore if missing
|
||
del state_dict["linear.weight"], state_dict["linear.bias"]
|
||
# Save the state dict as a single shard
|
||
safe_save_file(state_dict, Path(temp.name) / "model.safetensors", metadata={"format": "pt"})
|
||
|
||
# Try loading back, with the missing key not present in the state_dict
|
||
model = BaseModelWithMissingKeys.from_pretrained(temp.name)
|
||
|
||
# Make sure the skipped missing key is not still on meta device!
|
||
for k, v in model.state_dict().items():
|
||
self.assertTrue(v.device.type == "cpu", f"{k} is not on cpu!")
|
||
|
||
def test_device_map_works_with_unexpected_keys(self):
|
||
"""Test that if a parameter is specified in `_keys_to_ignore_on_load_unexpected` and is actually
|
||
present in the checkpoint, it will correctly be removed from the weights we load, especially those
|
||
we use if the device map has offloading"""
|
||
temp = tempfile.TemporaryDirectory()
|
||
|
||
# Create dummy model
|
||
model = BaseModelWithUnexpectedKeys(PreTrainedConfig())
|
||
|
||
# Save the config
|
||
model.config.save_pretrained(temp.name)
|
||
|
||
# Get the state dict to save
|
||
state_dict = model.state_dict()
|
||
# Add a layer that is in the "_keys_to_ignore_on_load_unexpected" list to ignore
|
||
state_dict["mtp"] = torch.randn(12, 12)
|
||
# Save the state dict as a single shard
|
||
safe_save_file(state_dict, Path(temp.name) / "model.safetensors", metadata={"format": "pt"})
|
||
|
||
# Load the model with entire shards placed on disk in order to trigger `get_disk_only_shard_files`.
|
||
# Unexpected keys (mtp) should be removed from the state dict, therefore this should not error out.
|
||
BaseModelWithUnexpectedKeys.from_pretrained(temp.name, device_map={"linear": "cpu", "linear_2": "disk"})
|
||
|
||
def test_device_map_works_with_unexpected_keys_sharded(self):
|
||
"""Test that if a parameter is specified in `_keys_to_ignore_on_load_unexpected` and is actually
|
||
present in the checkpoint, it will correctly be removed from the weights we load, especially those
|
||
we use if the device map has offloading"""
|
||
temp = tempfile.TemporaryDirectory()
|
||
|
||
# Create dummy model
|
||
model = BaseModelWithUnexpectedKeys(PreTrainedConfig())
|
||
|
||
# Save the config
|
||
model.config.save_pretrained(temp.name)
|
||
|
||
# Get the state dict to save
|
||
state_dict = model.state_dict()
|
||
|
||
# Add a layer that is in the "_keys_to_ignore_on_load_unexpected" list to ignore
|
||
state_dict["mtp"] = torch.randn(50, 50)
|
||
|
||
# Split the state dict in shards, save the index and the shards
|
||
shards = split_torch_state_dict_into_shards(state_dict, max_shard_size="1kb")
|
||
index = {
|
||
"metadata": {"total_parameters": model.num_parameters(), **shards.metadata},
|
||
"weight_map": shards.tensor_to_filename,
|
||
}
|
||
with open(Path(temp.name) / SAFE_WEIGHTS_INDEX_NAME, "w", encoding="utf-8") as f:
|
||
content = json.dumps(index, indent=2, sort_keys=True) + "\n"
|
||
f.write(content)
|
||
|
||
# Save each shard
|
||
filename_to_tensors = shards.filename_to_tensors.items()
|
||
for shard_file, tensors in filename_to_tensors:
|
||
shard = {}
|
||
for tensor in tensors:
|
||
shard[tensor] = state_dict[tensor].contiguous()
|
||
safe_save_file(shard, Path(temp.name) / shard_file, metadata={"format": "pt"})
|
||
|
||
# Load the model with entire shards placed on disk in order to trigger `get_disk_only_shard_files`.
|
||
# Unexpected keys (mtp) should be removed from the state dict, therefore this should not error out.
|
||
BaseModelWithUnexpectedKeys.from_pretrained(temp.name, device_map={"linear": "cpu", "linear_2": "disk"})
|
||
|
||
def test_loading_respect_env_variable_for_threading(self):
|
||
"""Test that we can correctly control threading during loading"""
|
||
model = BaseModel(PreTrainedConfig())
|
||
|
||
# Monkey patch Thread.__init__ to add a counter of launched threads
|
||
original_init = threading.Thread.__init__
|
||
counter = 0
|
||
|
||
def tracking_init(self, *args, **kwargs):
|
||
nonlocal counter
|
||
counter += 1
|
||
original_init(self, *args, **kwargs)
|
||
|
||
threading.Thread.__init__ = tracking_init
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname)
|
||
|
||
# Use threading
|
||
os.environ["HF_DEACTIVATE_ASYNC_LOAD"] = "0"
|
||
before = counter
|
||
_ = BaseModel.from_pretrained(tmpdirname)
|
||
after = counter
|
||
self.assertTrue(after - before > 0, "Loading should have spawned new threads!")
|
||
|
||
# Deactivate threading
|
||
os.environ["HF_DEACTIVATE_ASYNC_LOAD"] = "1"
|
||
before = counter
|
||
_ = BaseModel.from_pretrained(tmpdirname)
|
||
after = counter
|
||
self.assertTrue(after == before, "It looks like loading did spawn new threads, but it should not have!")
|
||
|
||
# Reverse monkey patch
|
||
threading.Thread.__init__ = original_init
|
||
|
||
def test_error_in_weight_conversion_is_raised(self):
|
||
"""Test that errors in `ConversionOps` are correctly re-raised after loading."""
|
||
small_config = MixtralConfig(num_hidden_layers=2, hidden_size=32, intermediate_size=32, num_attention_heads=8)
|
||
model = MixtralModel(small_config)
|
||
weight_conversions = get_model_conversion_mapping(model)
|
||
converters = [conversion for conversion in weight_conversions if isinstance(conversion, WeightConverter)]
|
||
# Just a safeguard
|
||
self.assertTrue(
|
||
any(isinstance(ops, MergeModulelist) for converter in converters for ops in converter.operations),
|
||
"The test is useless without conversions on the model",
|
||
)
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname)
|
||
# Now try to reload while mocking the WeightConversion to raise
|
||
with patch.object(MergeModulelist, "convert", side_effect=Exception("failed")):
|
||
# It should raise the proper error
|
||
with self.assertRaisesRegex(
|
||
RuntimeError, "We encountered some issues during automatic conversion of the weights."
|
||
):
|
||
_ = MixtralModel.from_pretrained(tmpdirname)
|
||
|
||
def test_composite_model_inherit_properties(self):
|
||
model = MultimodalModel(PreTrainedConfig())
|
||
# Make sure the top level inherited properties from its child language and vision models
|
||
self.assertEqual(model._no_split_modules, {"VerySimpleLayer"}) # language model
|
||
self.assertEqual(model._keep_in_fp32_modules, {"linear", "head"}) # language model + composite model
|
||
self.assertEqual(model._keep_in_fp32_modules_strict, {"simple"}) # vision model
|
||
|
||
@parameterized.expand([("sdpa",), ("flash_attention_2",)])
|
||
def test_decoder_only_model_can_be_used_as_encoder(self, attn_implementation: str):
|
||
"""Test that most well-behaved decoder models can be used as encoders through the `is_causal` kwarg/config.
|
||
Note that it's enough to test it on Llama, as the entry points are all through general code
|
||
(masking_utils.py + `capture_outputs` decorator). This makes it easier as the model need to use both the
|
||
mask API from masking_utils.py and the decorator as mentionned above, and we don't know what models follow that
|
||
standard exactly (so we cannot make it easily a common model test)."""
|
||
if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
|
||
self.skipTest("FA2 not available")
|
||
|
||
from transformers import LlamaConfig, LlamaModel
|
||
from transformers.masking_utils import create_bidirectional_mask
|
||
|
||
config = LlamaConfig(
|
||
num_hidden_layers=2,
|
||
num_attention_heads=2,
|
||
num_key_value_heads=1,
|
||
head_dim=16,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
vocab_size=100,
|
||
attn_implementation=attn_implementation,
|
||
)
|
||
model = LlamaModel(copy.deepcopy(config)).to(device=torch_device, dtype=torch.bfloat16)
|
||
|
||
# Create inputs, making sure we use padding to verify that mask creation accounts for it correctly
|
||
input_ids = torch.randint(5, 95, (2, 17), device=torch_device)
|
||
attention_mask = torch.ones_like(input_ids, device=torch_device)
|
||
attention_mask[1, 0:3] = 0
|
||
|
||
# The original `create_causal_mask` used in modeling_llama forward more kwargs than `create_bidirectional_mask`,
|
||
# so we need this one instead to absorb them
|
||
def create_bidirectional_mask_with_kwargs(
|
||
config,
|
||
inputs_embeds,
|
||
attention_mask,
|
||
encoder_hidden_states=None,
|
||
or_mask_function=None,
|
||
and_mask_function=None,
|
||
**kwargs,
|
||
):
|
||
return create_bidirectional_mask(
|
||
config, inputs_embeds, attention_mask, encoder_hidden_states, or_mask_function, and_mask_function
|
||
)
|
||
|
||
# Explicitly monkey patch the mask creation function + forward the is_causal kwarg to get the expected result
|
||
# from the model behaving as encoder instead of decoder
|
||
with patch(
|
||
"transformers.models.llama.modeling_llama.create_causal_mask", new=create_bidirectional_mask_with_kwargs
|
||
):
|
||
reference = model(input_ids, attention_mask=attention_mask, is_causal=False).last_hidden_state
|
||
without_kwarg = model(input_ids, attention_mask=attention_mask).last_hidden_state
|
||
|
||
# Here, since we have padding, the mask created should never be None. Since the mask is never None, the sdpa
|
||
# backend will always use `is_causal=False`, so both should be strictly equivalent
|
||
if attn_implementation == "sdpa":
|
||
torch.testing.assert_close(reference, without_kwarg)
|
||
# But FA2 relies solely on the `is_causal` kwarg to decide how to dispatch, as it will use varlen since we
|
||
# have padding, so both won't be equivalent at all
|
||
else:
|
||
# Everything should be different (we only test the maximum of the diff to avoid flakyness)
|
||
self.assertTrue(torch.abs(reference - without_kwarg).max() >= 1e-1)
|
||
|
||
# Now if we simply forward the kwarg with the usual mask function, it should still work the exact same
|
||
with_kwarg_only = model(input_ids, attention_mask=attention_mask, is_causal=False).last_hidden_state
|
||
torch.testing.assert_close(reference, with_kwarg_only)
|
||
|
||
# Now, if we use the usual forward, the model should behave normally as a decoder, and output should be
|
||
# completely different
|
||
as_decoder = model(input_ids, attention_mask=attention_mask).last_hidden_state
|
||
# Everything should be different (we only test the maximum of the diff to avoid flakyness)
|
||
self.assertTrue(torch.abs(reference - as_decoder).max() >= 1e-1)
|
||
|
||
# It should also work with it in the config
|
||
model.config.is_causal = False
|
||
with_config_only = model(input_ids, attention_mask=attention_mask).last_hidden_state
|
||
torch.testing.assert_close(reference, with_config_only)
|
||
|
||
|
||
@slow
|
||
@require_torch
|
||
class ModelOnTheFlyConversionTester(unittest.TestCase):
|
||
@classmethod
|
||
def setUpClass(cls):
|
||
cls.user = "huggingface-hub-ci"
|
||
cls.token = os.getenv("HUGGINGFACE_PRODUCTION_USER_TOKEN", None)
|
||
|
||
if cls.token is None:
|
||
raise ValueError("Cannot run tests as secret isn't setup.")
|
||
|
||
cls.api = HfApi(token=cls.token)
|
||
|
||
def setUp(self) -> None:
|
||
self.repo_name = f"{self.user}/test-model-on-the-fly-{uuid.uuid4()}"
|
||
|
||
def tearDown(self) -> None:
|
||
self.api.delete_repo(self.repo_name)
|
||
|
||
def test_safetensors_on_the_fly_conversion(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token)
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, "SFconvertbot")
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_conversion_private(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, private=True)
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name, token=self.token)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, self.user)
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_conversion_gated(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token)
|
||
self.api.update_repo_settings(self.repo_name, gated="auto")
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, "SFconvertbot")
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_sharded_conversion(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, max_shard_size="200kb")
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, "SFconvertbot")
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_sharded_conversion_private(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, max_shard_size="200kb", private=True)
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, self.user)
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_sharded_conversion_gated(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, max_shard_size="200kb")
|
||
headers = {"Authorization": f"Bearer {self.token}"}
|
||
httpx.put(
|
||
f"https://huggingface.co/api/models/{self.repo_name}/settings", json={"gated": "auto"}, headers=headers
|
||
)
|
||
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
with self.subTest("Initial and converted models are equal"):
|
||
for p1, p2 in zip(initial_model.parameters(), converted_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, "SFconvertbot")
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
@unittest.skip(reason="Edge case, should work once the Space is updated`")
|
||
def test_safetensors_on_the_fly_wrong_user_opened_pr(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, private=True)
|
||
BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
# This should have opened a PR with the user's account
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
discussion = next(discussions)
|
||
self.assertEqual(discussion.author, self.user)
|
||
self.assertEqual(discussion.title, "Adding `safetensors` variant of this model")
|
||
|
||
# We now switch the repo visibility to public
|
||
self.api.update_repo_settings(self.repo_name, private=False)
|
||
|
||
# We once again call from_pretrained, which should call the bot to open a PR
|
||
BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
|
||
|
||
with self.subTest("PR was open with the safetensors account"):
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
|
||
bot_opened_pr = None
|
||
bot_opened_pr_title = None
|
||
|
||
for discussion in discussions:
|
||
if discussion.author == "SFconvertbot":
|
||
bot_opened_pr = True
|
||
bot_opened_pr_title = discussion.title
|
||
|
||
self.assertTrue(bot_opened_pr)
|
||
self.assertEqual(bot_opened_pr_title, "Adding `safetensors` variant of this model")
|
||
|
||
def test_safetensors_on_the_fly_specific_revision(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Push a model on `main`
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token)
|
||
|
||
# Push a model on a given revision
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token, revision="new-branch")
|
||
|
||
# Try to convert the model on that revision should raise
|
||
with self.assertRaises(EnvironmentError):
|
||
BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token, revision="new-branch")
|
||
|
||
def test_absence_of_safetensors_triggers_conversion(self):
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Push a model on `main`
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token)
|
||
|
||
# Download the model that doesn't have safetensors
|
||
BertModel.from_pretrained(self.repo_name, token=self.token)
|
||
|
||
for thread in threading.enumerate():
|
||
if thread.name == "Thread-autoconversion":
|
||
thread.join(timeout=10)
|
||
|
||
discussions = self.api.get_repo_discussions(self.repo_name)
|
||
|
||
bot_opened_pr = None
|
||
bot_opened_pr_title = None
|
||
|
||
for discussion in discussions:
|
||
if discussion.author == "SFconvertbot":
|
||
bot_opened_pr = True
|
||
bot_opened_pr_title = discussion.title
|
||
|
||
self.assertTrue(bot_opened_pr)
|
||
self.assertEqual(bot_opened_pr_title, "Adding `safetensors` variant of this model")
|
||
|
||
@mock.patch("transformers.safetensors_conversion.spawn_conversion")
|
||
def test_absence_of_safetensors_triggers_conversion_failed(self, spawn_conversion_mock):
|
||
spawn_conversion_mock.side_effect = httpx.HTTPError("failed")
|
||
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
initial_model = BertModel(config)
|
||
|
||
# Push a model on `main`
|
||
# Since we don't support saving with bins files anymore, but still support loading we use this context
|
||
# to easily create the bins files and try to load them
|
||
with force_serialization_as_bin_files():
|
||
initial_model.push_to_hub(self.repo_name, token=self.token)
|
||
|
||
# The auto conversion is mocked to always raise; ensure that it doesn't raise in the main thread
|
||
BertModel.from_pretrained(self.repo_name, token=self.token)
|
||
|
||
|
||
@require_torch
|
||
@is_staging_test
|
||
class ModelPushToHubTester(unittest.TestCase):
|
||
@classmethod
|
||
def setUpClass(cls):
|
||
cls._token = TOKEN
|
||
|
||
@unittest.skip(reason="This test is flaky")
|
||
def test_push_to_hub(self):
|
||
with TemporaryHubRepo(token=self._token) as tmp_repo:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
model.push_to_hub(tmp_repo.repo_id, token=self._token)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
@unittest.skip(reason="This test is flaky")
|
||
def test_push_to_hub_via_save_pretrained(self):
|
||
with TemporaryHubRepo(token=self._token) as tmp_repo:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
# Push to hub via save_pretrained
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
def test_push_to_hub_with_description(self):
|
||
with TemporaryHubRepo(token=self._token) as tmp_repo:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
COMMIT_DESCRIPTION = """
|
||
The commit description supports markdown synthax see:
|
||
```python
|
||
>>> form transformers import AutoConfig
|
||
>>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
|
||
```
|
||
"""
|
||
commit_details = model.push_to_hub(
|
||
tmp_repo.repo_id, create_pr=True, token=self._token, commit_description=COMMIT_DESCRIPTION
|
||
)
|
||
self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION)
|
||
|
||
@unittest.skip(reason="This test is flaky")
|
||
def test_push_to_hub_in_organization(self):
|
||
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
model.push_to_hub(tmp_repo.repo_id, token=self._token)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
@unittest.skip(reason="This test is flaky")
|
||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
|
||
config = BertConfig(
|
||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||
)
|
||
model = BertModel(config)
|
||
# Push to hub via save_pretrained
|
||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo.repo_id)
|
||
|
||
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
def test_push_to_hub_dynamic_model(self):
|
||
with TemporaryHubRepo(token=self._token) as tmp_repo:
|
||
CustomConfig.register_for_auto_class()
|
||
CustomModel.register_for_auto_class()
|
||
|
||
config = CustomConfig(hidden_size=32)
|
||
model = CustomModel(config)
|
||
|
||
model.push_to_hub(tmp_repo.repo_id, token=self._token)
|
||
# checks
|
||
self.assertDictEqual(
|
||
config.auto_map,
|
||
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
|
||
)
|
||
|
||
new_model = AutoModel.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
|
||
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
|
||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||
self.assertTrue(torch.equal(p1, p2))
|
||
|
||
config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
|
||
new_model = AutoModel.from_config(config, trust_remote_code=True)
|
||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||
|
||
def test_push_to_hub_with_tags(self):
|
||
with TemporaryHubRepo(token=self._token) as tmp_repo:
|
||
from huggingface_hub import ModelCard
|
||
|
||
new_tags = ["tag-1", "tag-2"]
|
||
|
||
CustomConfig.register_for_auto_class()
|
||
CustomModel.register_for_auto_class()
|
||
|
||
config = CustomConfig(hidden_size=32)
|
||
model = CustomModel(config)
|
||
|
||
self.assertTrue(model.model_tags is None)
|
||
|
||
model.add_model_tags(new_tags)
|
||
|
||
self.assertTrue(model.model_tags == new_tags)
|
||
|
||
model.push_to_hub(tmp_repo.repo_id, token=self._token)
|
||
|
||
loaded_model_card = ModelCard.load(tmp_repo.repo_id)
|
||
self.assertEqual(loaded_model_card.data.tags, new_tags)
|
||
|
||
|
||
@require_torch
|
||
class TestAttentionImplementation(unittest.TestCase):
|
||
@unittest.skip("Just a bit annoying")
|
||
def test_error_no_sdpa_available(self):
|
||
with self.assertRaises(ValueError) as cm:
|
||
_ = AutoModel.from_pretrained("hf-tiny-model-private/tiny-random-MCTCTModel", attn_implementation="sdpa")
|
||
|
||
self.assertTrue(
|
||
"does not support an attention implementation through torch.nn.functional.scaled_dot_product_attention"
|
||
in str(cm.exception)
|
||
)
|
||
|
||
_ = AutoModel.from_pretrained("hf-tiny-model-private/tiny-random-MCTCTModel")
|
||
|
||
# TODO (ydshieh): use another model
|
||
@unittest.skip("model deleted")
|
||
def test_error_no_flash_available(self):
|
||
with self.assertRaises(ValueError) as cm:
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-tiny-model-private/tiny-random-MCTCTModel", attn_implementation="flash_attention_2"
|
||
)
|
||
|
||
self.assertTrue("does not support Flash Attention 2.0" in str(cm.exception))
|
||
|
||
# TODO (ydshieh): use another model
|
||
@unittest.skip("model deleted")
|
||
def test_error_no_flash_available_with_config(self):
|
||
with self.assertRaises(ValueError) as cm:
|
||
config = AutoConfig.from_pretrained("hf-tiny-model-private/tiny-random-MCTCTModel")
|
||
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-tiny-model-private/tiny-random-MCTCTModel", config=config, attn_implementation="flash_attention_2"
|
||
)
|
||
|
||
self.assertTrue("does not support Flash Attention 2.0" in str(cm.exception))
|
||
|
||
# TODO (ydshieh): use another model
|
||
@unittest.skip("model deleted")
|
||
def test_error_wrong_attn_implementation(self):
|
||
with self.assertRaises(ValueError) as cm:
|
||
_ = AutoModel.from_pretrained("hf-tiny-model-private/tiny-random-MCTCTModel", attn_implementation="foo")
|
||
|
||
self.assertTrue('The only possible arguments are `attn_implementation="eager"' in str(cm.exception))
|
||
|
||
def test_registered_experts_implementation_is_valid(self):
|
||
from transformers.integrations.moe import ALL_EXPERTS_FUNCTIONS
|
||
|
||
def custom_experts_forward(*args, **kwargs):
|
||
pass
|
||
|
||
experts_implementation = "custom_experts"
|
||
model = BaseModel(PreTrainedConfig())
|
||
|
||
with patch.dict(ALL_EXPERTS_FUNCTIONS._global_mapping, {}, clear=False):
|
||
ALL_EXPERTS_FUNCTIONS.register(experts_implementation, custom_experts_forward)
|
||
|
||
self.assertEqual(model.get_correct_experts_implementation(experts_implementation), experts_implementation)
|
||
|
||
def test_not_available_flash(self):
|
||
if is_flash_attn_2_available():
|
||
self.skipTest(reason="Please uninstall flash-attn package to run test_not_available_flash")
|
||
|
||
if is_torch_npu_available():
|
||
self.skipTest(
|
||
reason="FlashAttention2 is supported on Ascend NPU without using package `flash-attn`, ignore this test case."
|
||
)
|
||
|
||
if is_kernels_available():
|
||
self.skipTest(reason="Please uninstall `kernels` package to run `test_not_available_flash`")
|
||
|
||
with self.assertRaises(ImportError) as cm:
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-GPTBigCodeModel", attn_implementation="flash_attention_2"
|
||
)
|
||
self.assertTrue("the package for FlashAttention2 doesn't seem to be installed." in str(cm.exception))
|
||
|
||
def test_flash_attn_available_no_keyerror_when_missing_from_distribution_map(self):
|
||
# Regression test for https://github.com/huggingface/transformers/issues/45520.
|
||
# When flash_attn is importable but not present in PACKAGE_DISTRIBUTION_MAPPING
|
||
# (e.g. installed via a non-standard wheel), the availability checks must not raise
|
||
# a KeyError; they should simply return False.
|
||
stripped_map = {
|
||
k: v for k, v in PACKAGE_DISTRIBUTION_MAPPING.items() if k not in ("flash_attn", "flash_attn_interface")
|
||
}
|
||
with patch("transformers.utils.import_utils.PACKAGE_DISTRIBUTION_MAPPING", stripped_map):
|
||
with patch("transformers.modeling_flash_attention_utils.PACKAGE_DISTRIBUTION_MAPPING", stripped_map):
|
||
self.assertFalse(is_flash_attn_2_available())
|
||
self.assertFalse(is_flash_attn_3_available())
|
||
self.assertFalse(is_flash_attn_4_available())
|
||
|
||
def test_not_available_flash_with_config(self):
|
||
if is_flash_attn_2_available():
|
||
self.skipTest(reason="Please uninstall flash-attn package to run test_not_available_flash")
|
||
|
||
if is_torch_npu_available():
|
||
self.skipTest(
|
||
reason="FlashAttention2 is supported on Ascend NPU without using package `flash-attn`, ignore this test case."
|
||
)
|
||
|
||
if is_kernels_available():
|
||
self.skipTest(reason="Please uninstall `kernels` package to run `test_not_available_flash_with_config`")
|
||
|
||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-GPTBigCodeModel")
|
||
|
||
with self.assertRaises(ImportError) as cm:
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-GPTBigCodeModel",
|
||
config=config,
|
||
attn_implementation="flash_attention_2",
|
||
)
|
||
|
||
self.assertTrue("the package for FlashAttention2 doesn't seem to be installed." in str(cm.exception))
|
||
|
||
def test_kernels_fallback(self):
|
||
if not is_kernels_available():
|
||
self.skipTest(reason="Please install `kernels` package to run `test_kernels_fallback`")
|
||
|
||
if is_flash_attn_2_available():
|
||
self.skipTest(reason="Please uninstall flash-attn package to run test_kernels_fallback")
|
||
|
||
if is_torch_npu_available():
|
||
self.skipTest(
|
||
reason="FlashAttention2 is supported on Ascend NPU without using package `flash-attn`, ignore this test case."
|
||
)
|
||
|
||
logger = logging.get_logger("transformers.modeling_utils")
|
||
with LoggingLevel(logging.WARNING):
|
||
with CaptureLogger(logger) as cl:
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-internal-testing/tiny-random-GPTBigCodeModel", attn_implementation="flash_attention_2"
|
||
)
|
||
|
||
self.assertTrue(
|
||
f"You do not have `flash_attn` installed, using `{FLASH_ATTN_KERNEL_FALLBACK['flash_attention_2']}` from the `kernels` library instead!"
|
||
in cl.out
|
||
)
|
||
|
||
# TODO (ydshieh): use another model
|
||
@unittest.skip("model deleted")
|
||
def test_not_available_kernels(self):
|
||
if is_kernels_available():
|
||
self.skipTest(reason="Please uninstall `kernels` package to run `test_not_available_kernels`")
|
||
|
||
with self.assertRaises(ImportError) as cm:
|
||
_ = AutoModel.from_pretrained(
|
||
"hf-tiny-model-private/tiny-random-MCTCTModel",
|
||
attn_implementation=FLASH_ATTN_KERNEL_FALLBACK["flash_attention_2"],
|
||
)
|
||
|
||
self.assertTrue("`kernels` is either not installed or uses an incompatible version." in str(cm.exception))
|
||
|
||
def test_attention_and_experts_modules_can_be_used_standalone(self):
|
||
"""Test that both Attention and Expert modules can be used on their own, instantiated from a config without the
|
||
respective `_xxx_implementation` attr set. Also checks that it correctly raises a warning"""
|
||
from transformers.models.mixtral.configuration_mixtral import MixtralConfig
|
||
from transformers.models.mixtral.modeling_mixtral import (
|
||
MixtralAttention,
|
||
MixtralExperts,
|
||
MixtralRotaryEmbedding,
|
||
)
|
||
|
||
hidden_size = 32
|
||
seq_len = 10
|
||
config = MixtralConfig(hidden_size=32, intermediate_size=16, num_hidden_layers=2)
|
||
experts_module = MixtralExperts(config)
|
||
attn_module = MixtralAttention(config, layer_idx=0)
|
||
|
||
hidden_states = torch.randn(1, seq_len, hidden_size)
|
||
|
||
# Try the Attention (check it works + raises the warning)
|
||
dummy_ids = torch.arange(seq_len).unsqueeze(0)
|
||
dummy_embeddings = MixtralRotaryEmbedding(config)(hidden_states, dummy_ids)
|
||
with CaptureLogger(logging.get_logger("transformers.modeling_utils")) as cl:
|
||
_ = attn_module(hidden_states, dummy_embeddings, None)
|
||
self.assertIn(
|
||
"You tried to access the `AttentionInterface` with a `config._attn_implementation` set to `None`.", cl.out
|
||
)
|
||
# With a wrong _attn_implementation, it should raise a proper exception
|
||
attn_module.config._attn_implementation = "foobar"
|
||
with self.assertRaisesRegex(KeyError, "`foobar` is not a valid attention implementation registered"):
|
||
_ = attn_module(hidden_states, dummy_embeddings, None)
|
||
|
||
# Try the Experts (check it works + raises the warning)
|
||
hidden_states = hidden_states.reshape(-1, hidden_size)
|
||
dummy_scores = torch.randn(seq_len, config.num_experts_per_tok)
|
||
dummy_indices = torch.randint(0, config.num_local_experts, (seq_len, config.num_experts_per_tok))
|
||
with CaptureLogger(logging.get_logger("transformers.integrations.moe")) as cl:
|
||
_ = experts_module(hidden_states, dummy_indices, dummy_scores)
|
||
self.assertIn(
|
||
"You tried to access the `ExpertsInterface` with a `config._experts_implementation` set to `None`.", cl.out
|
||
)
|
||
# With a wrong _experts_implementation, it should raise a proper exception
|
||
experts_module.config._experts_implementation = "foobar"
|
||
with self.assertRaisesRegex(KeyError, "`foobar` is not a valid experts implementation registered"):
|
||
_ = experts_module(hidden_states, dummy_indices, dummy_scores)
|
||
|
||
|
||
@require_torch
|
||
class TestTensorSharing(TestCasePlus):
|
||
def test_disjoint(self):
|
||
main = torch.zeros(10)
|
||
a = main[:5]
|
||
b = main[5:]
|
||
state_dict = {"a": a, "b": b}
|
||
|
||
shared_names, disjoint_names = _find_disjoint([{"a", "b"}], state_dict)
|
||
self.assertEqual(shared_names, [])
|
||
self.assertEqual(disjoint_names, ["a", "b"])
|
||
|
||
a = main[::2]
|
||
b = main[1::2]
|
||
state_dict = {"a": a, "b": b}
|
||
|
||
shared_names, disjoint_names = _find_disjoint([{"a", "b"}], state_dict)
|
||
self.assertEqual(shared_names, [{"a", "b"}])
|
||
self.assertEqual(disjoint_names, [])
|
||
|
||
def test_identical(self):
|
||
a = torch.zeros(10)
|
||
b = a
|
||
state_dict = {"a": a, "b": b}
|
||
|
||
shared_names, identical_names = _find_identical([{"a", "b"}], state_dict)
|
||
self.assertEqual(shared_names, [])
|
||
self.assertEqual(identical_names, [{"a", "b"}])
|
||
|
||
b = a[:5]
|
||
state_dict = {"a": a, "b": b}
|
||
|
||
shared_names, identical_names = _find_identical([{"a", "b"}], state_dict)
|
||
self.assertEqual(shared_names, [{"a", "b"}])
|
||
self.assertEqual(identical_names, [])
|
||
|
||
|
||
@require_torch
|
||
class TestSaveAndLoadModelWithExtraState(TestCasePlus):
|
||
"""
|
||
This test checks that a model can be saved and loaded that uses the torch extra state API.
|
||
https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.get_extra_state.
|
||
|
||
Currently, only tensor-valued extra_states are supported.
|
||
"""
|
||
|
||
def test_save_and_load_model_with_tensor_extra_state(self):
|
||
class MyConfig(PreTrainedConfig):
|
||
def __init__(self, **kwargs):
|
||
super().__init__(**kwargs)
|
||
|
||
class MyModule(torch.nn.Module):
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.some_counter = 0
|
||
self.linear = torch.nn.Linear(320, 320)
|
||
|
||
def get_extra_state(self):
|
||
return torch.tensor(self.some_counter)
|
||
|
||
def set_extra_state(self, state):
|
||
self.some_counter = state.item()
|
||
|
||
class MyModel(PreTrainedModel):
|
||
config_class = MyConfig
|
||
|
||
def __init__(self, config: MyConfig):
|
||
super().__init__(config)
|
||
self.my_layer = MyModule()
|
||
self.post_init()
|
||
|
||
def forward(self, hidden_states, attention_mask):
|
||
return self.my_layer(hidden_states, attention_mask)
|
||
|
||
config = MyConfig()
|
||
model = MyModel(config)
|
||
model.my_layer.some_counter = 42
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname)
|
||
del model
|
||
model, loading_info = MyModel.from_pretrained(tmpdirname, output_loading_info=True)
|
||
self.assertEqual(model.my_layer.some_counter, 42)
|
||
self.assertEqual(len(loading_info["missing_keys"]), 0)
|
||
self.assertEqual(len(loading_info["unexpected_keys"]), 0)
|
||
self.assertEqual(len(loading_info["mismatched_keys"]), 0)
|
||
self.assertEqual(len(loading_info["error_msgs"]), 0)
|
||
|
||
@mark.xfail(reason="save and from_pretrained currently only supports tensor extra_state")
|
||
def test_save_and_load_model_with_dict_extra_state(self):
|
||
class MyConfig(PreTrainedConfig):
|
||
def __init__(self, **kwargs):
|
||
super().__init__(**kwargs)
|
||
|
||
class MyModule(torch.nn.Module):
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.some_counter = 0
|
||
self.linear = torch.nn.Linear(320, 320)
|
||
|
||
def get_extra_state(self):
|
||
return {"some_counter": self.some_counter}
|
||
|
||
def set_extra_state(self, state):
|
||
self.some_counter = state["some_counter"]
|
||
|
||
class MyModel(PreTrainedModel):
|
||
config_class = MyConfig
|
||
|
||
def __init__(self, config: MyConfig):
|
||
super().__init__(config)
|
||
self.my_layer = MyModule()
|
||
|
||
def forward(self, hidden_states, attention_mask):
|
||
return self.my_layer(hidden_states, attention_mask)
|
||
|
||
config = MyConfig()
|
||
model = MyModel(config)
|
||
model.my_layer.some_counter = 42
|
||
|
||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||
model.save_pretrained(tmpdirname)
|
||
del model
|
||
model, loading_info = MyModel.from_pretrained(tmpdirname, output_loading_info=True)
|
||
self.assertEqual(model.my_layer.some_counter, 42)
|
||
self.assertEqual(len(loading_info["missing_keys"]), 0)
|
||
self.assertEqual(len(loading_info["unexpected_keys"]), 0)
|
||
self.assertEqual(len(loading_info["mismatched_keys"]), 0)
|
||
self.assertEqual(len(loading_info["error_msgs"]), 0)
|
||
|
||
|
||
class TestGetDecoder(unittest.TestCase):
|
||
def test_causal_lm_get_decoder_returns_underlying_model(self):
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
model = MistralForCausalLM(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.model, f"Expected get_decoder() to return model.model, got {type(dec)}"
|
||
|
||
def test_seq2seq_get_decoder_still_returns_decoder_module(self):
|
||
cfg = BartConfig(
|
||
vocab_size=128,
|
||
d_model=32,
|
||
encoder_layers=2,
|
||
decoder_layers=2,
|
||
encoder_attention_heads=4,
|
||
decoder_attention_heads=4,
|
||
encoder_ffn_dim=64,
|
||
decoder_ffn_dim=64,
|
||
)
|
||
model = BartForConditionalGeneration(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.model.decoder, "Seq2seq get_decoder() should return the decoder submodule"
|
||
|
||
def test_base_model_returns_self(self):
|
||
"""Test that base transformer models (no decoder/model attributes) return self."""
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
base_model = MistralModel(cfg)
|
||
dec = base_model.get_decoder()
|
||
|
||
assert dec is base_model, f"Base model get_decoder() should return self, got {type(dec)}"
|
||
|
||
def test_explicit_decoder_attribute_opt(self):
|
||
"""Test models with explicit decoder attribute (OPT style)."""
|
||
cfg = OPTConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
ffn_dim=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
max_position_embeddings=512,
|
||
)
|
||
model = OPTForCausalLM(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.model.decoder, f"OPT get_decoder() should return model.decoder, got {type(dec)}"
|
||
|
||
def test_explicit_decoder_attribute_t5(self):
|
||
"""Test encoder-decoder models with explicit decoder attribute."""
|
||
cfg = T5Config(
|
||
vocab_size=128,
|
||
d_model=32,
|
||
d_ff=64,
|
||
num_layers=2,
|
||
num_heads=4,
|
||
)
|
||
model = T5ForConditionalGeneration(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.decoder, f"T5 get_decoder() should return decoder attribute, got {type(dec)}"
|
||
|
||
def test_same_type_recursion_prevention(self):
|
||
"""Test that same-type recursion is prevented (see issue #40815)."""
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
model = MistralForCausalLM(cfg)
|
||
|
||
assert type(model) is not type(model.model), "Types should be different to prevent recursion"
|
||
|
||
dec = model.get_decoder()
|
||
assert dec is model.model, f"Should return model.model without infinite recursion, got {type(dec)}"
|
||
|
||
inner_dec = model.model.get_decoder()
|
||
assert inner_dec is model.model, f"Inner model should return itself, got {type(inner_dec)}"
|
||
|
||
def test_nested_wrapper_recursion(self):
|
||
"""Test models that don't have model/decoder attributes return self."""
|
||
cfg = GPT2Config(
|
||
vocab_size=128,
|
||
n_embd=32,
|
||
n_layer=2,
|
||
n_head=4,
|
||
n_positions=512,
|
||
)
|
||
model = GPT2LMHeadModel(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.transformer, f"GPT2 get_decoder() should return self (fallback), got {type(dec)}"
|
||
|
||
def test_model_without_get_decoder(self):
|
||
"""Test edge case where model has model attribute but no get_decoder method."""
|
||
|
||
class MockInnerModel:
|
||
"""Mock model without get_decoder method."""
|
||
|
||
pass
|
||
|
||
class MockWrapperModel:
|
||
"""Mock wrapper with model attribute but inner has no get_decoder."""
|
||
|
||
def __init__(self):
|
||
self.model = MockInnerModel()
|
||
|
||
def get_decoder(self):
|
||
if hasattr(self, "decoder"):
|
||
return self.decoder
|
||
if hasattr(self, "model"):
|
||
inner = self.model
|
||
if hasattr(inner, "get_decoder") and type(inner) is not type(self):
|
||
return inner.get_decoder()
|
||
return inner
|
||
return self
|
||
|
||
wrapper = MockWrapperModel()
|
||
dec = wrapper.get_decoder()
|
||
|
||
assert dec is wrapper.model, f"Should return inner model when no get_decoder, got {type(dec)}"
|
||
|
||
def test_vision_language_model(self):
|
||
"""Test vision-language models like LLaVA that delegate to language_model."""
|
||
text_config = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
|
||
vision_config = {
|
||
"hidden_size": 32,
|
||
"intermediate_size": 64,
|
||
"num_hidden_layers": 2,
|
||
"num_attention_heads": 4,
|
||
"num_channels": 3,
|
||
"image_size": 224,
|
||
"patch_size": 16,
|
||
}
|
||
|
||
cfg = LlavaConfig(
|
||
text_config=text_config.to_dict(),
|
||
vision_config=vision_config,
|
||
vocab_size=128,
|
||
)
|
||
|
||
model = LlavaForConditionalGeneration(cfg)
|
||
dec = model.get_decoder()
|
||
|
||
assert dec is model.model.language_model, f"LLaVA get_decoder() should return language_model, got {type(dec)}"
|
||
|
||
|
||
class TestGetEncoder(unittest.TestCase):
|
||
def test_seq2seq_lm_get_encoder_returns_encoder(self):
|
||
cfg = BartConfig(
|
||
vocab_size=128,
|
||
d_model=32,
|
||
encoder_layers=2,
|
||
decoder_layers=2,
|
||
encoder_attention_heads=4,
|
||
decoder_attention_heads=4,
|
||
encoder_ffn_dim=64,
|
||
decoder_ffn_dim=64,
|
||
)
|
||
model = BartForConditionalGeneration(cfg)
|
||
encoder = model.get_encoder()
|
||
|
||
assert encoder is model.model.encoder, (
|
||
f"Expected get_encoder() to return model.model.encoder, got {type(encoder)}"
|
||
)
|
||
|
||
def test_base_model_returns_encoder(self):
|
||
cfg = BartConfig(
|
||
vocab_size=128,
|
||
d_model=32,
|
||
encoder_layers=2,
|
||
decoder_layers=2,
|
||
encoder_attention_heads=4,
|
||
decoder_attention_heads=4,
|
||
encoder_ffn_dim=64,
|
||
decoder_ffn_dim=64,
|
||
)
|
||
model = BartModel(cfg)
|
||
encoder = model.get_encoder()
|
||
|
||
assert encoder is model.encoder, f"Expected get_encoder() to return model.encoder, got {type(encoder)}"
|
||
|
||
def test_decoder_only_model_returns_self(self):
|
||
"""Test that decoder-only models (no encoder) return self."""
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
model = MistralForCausalLM(cfg)
|
||
encoder = model.get_encoder()
|
||
|
||
assert encoder is model, f"Base model get_encoder() should return self, got {type(encoder)}"
|
||
|
||
def test_when_encoder_has_different_name(self):
|
||
"""Test models with non-standard name for encoder modular (Musicgen has `self.model.text_encoder`)."""
|
||
cfg = MusicgenConfig(
|
||
text_encoder={
|
||
"model_type": "t5",
|
||
"vocab_size": 99,
|
||
"d_model": 32,
|
||
"d_ff": 37,
|
||
"num_layers": 2,
|
||
"num_heads": 2,
|
||
},
|
||
audio_encoder={
|
||
"model_type": "encodec",
|
||
"hidden_size": 99,
|
||
"compress": 1,
|
||
"num_filters": 2,
|
||
"codebook_size": 32,
|
||
"codebook_dim": 32,
|
||
},
|
||
decoder={
|
||
"vocab_size": 99,
|
||
"ffn_dim": 32,
|
||
"num_attention_heads": 2,
|
||
"hidden_size": 32,
|
||
"num_hidden_layers": 2,
|
||
},
|
||
)
|
||
model = MusicgenForConditionalGeneration(cfg)
|
||
encoder = model.get_encoder()
|
||
|
||
assert encoder is model.text_encoder, (
|
||
f"MusicgenForConditionalGeneration get_encoder() should return model.model.text_encoder, got {type(encoder)}"
|
||
)
|
||
|
||
def test_audio_encoder(self):
|
||
"""Test models with multiple modality encoders (Musicgen has `self.model.audio_encoder`)."""
|
||
cfg = MusicgenConfig(
|
||
text_encoder={
|
||
"model_type": "t5",
|
||
"vocab_size": 99,
|
||
"d_model": 32,
|
||
"d_ff": 37,
|
||
"num_layers": 2,
|
||
"num_heads": 2,
|
||
},
|
||
audio_encoder={
|
||
"model_type": "encodec",
|
||
"hidden_size": 99,
|
||
"compress": 1,
|
||
"num_filters": 2,
|
||
"codebook_size": 32,
|
||
"codebook_dim": 32,
|
||
},
|
||
decoder={
|
||
"vocab_size": 99,
|
||
"ffn_dim": 32,
|
||
"num_attention_heads": 2,
|
||
"hidden_size": 32,
|
||
"num_hidden_layers": 2,
|
||
},
|
||
)
|
||
model = MusicgenForConditionalGeneration(cfg)
|
||
encoder = model.get_encoder(modality="audio")
|
||
|
||
assert encoder is model.audio_encoder, (
|
||
f"MusicgenForConditionalGeneration get_encoder(modality='audio') should return model.model.audio_encoder, got {type(encoder)}"
|
||
)
|
||
|
||
def test_non_existant_modality_throws_error(self):
|
||
"""Test that an error is thrown when a rquested modality does not exist."""
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
model = MistralModel(cfg)
|
||
with self.assertRaises(ValueError):
|
||
_ = model.get_encoder(modality="3d")
|
||
|
||
def test_encoder_return_self_when_modality_not_found(self):
|
||
"""Test that `self` is returned if the model has no encoder for requested modality."""
|
||
cfg = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
model = MistralModel(cfg)
|
||
encoder = model.get_encoder(modality="image")
|
||
|
||
assert encoder is model, f"Mistral get_encoder(modality='image') should return self, got {type(encoder)}"
|
||
|
||
def test_model_without_get_encoder(self):
|
||
"""Test edge case where model has model attribute but no get_encoder method."""
|
||
|
||
class MockInnerModel:
|
||
"""Mock model without get_encoder method."""
|
||
|
||
pass
|
||
|
||
class MockWrapperModel:
|
||
"""Mock wrapper with model attribute but inner has no get_encoder."""
|
||
|
||
def __init__(self):
|
||
self.model = MockInnerModel()
|
||
|
||
def get_encoder(self):
|
||
if hasattr(self, "encoder"):
|
||
return self.encoder
|
||
if hasattr(self, "model"):
|
||
inner = self.model
|
||
if hasattr(inner, "get_encoder") and type(inner) is not type(self):
|
||
return inner.get_encoder()
|
||
return inner
|
||
return self
|
||
|
||
wrapper = MockWrapperModel()
|
||
encoder = wrapper.get_encoder()
|
||
|
||
assert encoder is wrapper.model, f"Should return inner model when no get_encoder, got {type(encoder)}"
|
||
|
||
def test_vision_language_model(self):
|
||
"""Test vision-language models like LLaVA can find the modality encoder ("image")."""
|
||
text_config = MistralConfig(
|
||
vocab_size=128,
|
||
hidden_size=32,
|
||
intermediate_size=64,
|
||
num_hidden_layers=2,
|
||
num_attention_heads=4,
|
||
)
|
||
|
||
vision_config = {
|
||
"hidden_size": 32,
|
||
"intermediate_size": 64,
|
||
"num_hidden_layers": 2,
|
||
"num_attention_heads": 4,
|
||
"num_channels": 3,
|
||
"image_size": 224,
|
||
"patch_size": 16,
|
||
}
|
||
|
||
cfg = LlavaConfig(
|
||
text_config=text_config.to_dict(),
|
||
vision_config=vision_config,
|
||
vocab_size=128,
|
||
)
|
||
|
||
model = LlavaForConditionalGeneration(cfg)
|
||
image_encoder = model.get_encoder(modality="image")
|
||
|
||
assert image_encoder is model.model.vision_tower, (
|
||
f"LLaVA get_encoder(modality='image') should return vision_tower, got {type(image_encoder)}"
|
||
)
|
||
|
||
|
||
@require_torch
|
||
class DisableMmapLoadingTest(unittest.TestCase):
|
||
"""Tests for the `disable_mmap` kwarg in `load_state_dict` and the `_is_on_hf_mount` helper."""
|
||
|
||
def _fake_open_factory(self, proc_mounts_contents):
|
||
"""Return a patched `open` that serves `proc_mounts_contents` for `/proc/mounts` and defers otherwise."""
|
||
import builtins
|
||
|
||
real_open = builtins.open
|
||
|
||
def fake_open(path, *args, **kwargs):
|
||
if path == "/proc/mounts":
|
||
import io
|
||
|
||
return io.StringIO(proc_mounts_contents)
|
||
return real_open(path, *args, **kwargs)
|
||
|
||
return fake_open
|
||
|
||
def test_is_on_hf_mount_linux_match(self):
|
||
from transformers.modeling_utils import _is_on_hf_mount
|
||
|
||
mounts = (
|
||
"proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0\n"
|
||
"hf-mount /data fuse.hf-mount rw,nosuid,nodev,relatime,user_id=0 0 0\n"
|
||
)
|
||
with patch("sys.platform", "linux"), patch("builtins.open", self._fake_open_factory(mounts)):
|
||
self.assertTrue(_is_on_hf_mount("/data/model.safetensors"))
|
||
|
||
def test_is_on_hf_mount_no_match(self):
|
||
from transformers.modeling_utils import _is_on_hf_mount
|
||
|
||
mounts = "proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0\n/dev/nvme0n1p1 /data ext4 rw,relatime 0 0\n"
|
||
with patch("sys.platform", "linux"), patch("builtins.open", self._fake_open_factory(mounts)):
|
||
self.assertFalse(_is_on_hf_mount("/data/model.safetensors"))
|
||
|
||
def test_is_on_hf_mount_non_linux(self):
|
||
from transformers.modeling_utils import _is_on_hf_mount
|
||
|
||
with patch("sys.platform", "darwin"):
|
||
self.assertFalse(_is_on_hf_mount("/data/model.safetensors"))
|
||
|
||
def test_load_state_dict_disable_mmap_explicit(self):
|
||
import torch
|
||
from safetensors.torch import save_file as safe_save_file
|
||
|
||
from transformers.modeling_utils import load_state_dict
|
||
|
||
state_dict = {
|
||
"weight": torch.arange(12, dtype=torch.float32).reshape(3, 4),
|
||
"bias": torch.tensor([1.0, 2.0, 3.0]),
|
||
}
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
ckpt_path = os.path.join(tmpdir, "model.safetensors")
|
||
safe_save_file(state_dict, ckpt_path)
|
||
|
||
loaded_mmap = load_state_dict(ckpt_path, disable_mmap=False)
|
||
loaded_no_mmap = load_state_dict(ckpt_path, disable_mmap=True)
|
||
|
||
self.assertEqual(set(loaded_mmap.keys()), set(loaded_no_mmap.keys()))
|
||
for k in loaded_mmap:
|
||
torch.testing.assert_close(loaded_mmap[k], loaded_no_mmap[k])
|