Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
310 lines
13 KiB
Python
310 lines
13 KiB
Python
# Copyright 2026 The HuggingFace Inc. team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import argparse
|
|
import ast
|
|
import difflib
|
|
import glob
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from collections import Counter, OrderedDict
|
|
from typing import Any
|
|
|
|
from sort_auto_mappings import sort_auto_mapping
|
|
|
|
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES as COMPLETE_CONFIG_MAPPING_NAMES
|
|
from transformers.models.auto.image_processing_auto import MISSING_IMAGE_PROCESSOR_MAPPING_NAMES
|
|
from transformers.models.auto.video_processing_auto import MISSING_VIDEO_PROCESSOR_MAPPING_NAMES
|
|
|
|
|
|
CHECKER_CONFIG = {
|
|
"name": "auto_mappings",
|
|
"label": "Generate auto mappings",
|
|
"cache_globs": [],
|
|
"check_args": [],
|
|
"fix_args": ["--fix_and_overwrite"],
|
|
}
|
|
|
|
AUTO_GENERATED_HADER = """# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
# This file was automatically generated from existing config files and their `model_type`s. Do NOT edit this file
|
|
# manually as any edits will be overwritten by auto-generation of the file. If any change should be done,
|
|
# please add the correct `cls.model_type` in your config class and run `python utils/check_auto.py --fix_and_overwrite`.
|
|
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
# Copyright 2026 The HuggingFace Inc. team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
|
|
# Some keys are duplicated due to incorrect naming at model shipping and BC
|
|
IGNORE_DUPLICATE_CONFIG = ["GPT2Config", "EvollaConfig", "MLCDVisionConfig"]
|
|
|
|
|
|
def build_config_mapping_names() -> tuple[dict, dict]:
|
|
model_type_map = OrderedDict()
|
|
special_mappings = OrderedDict()
|
|
# Track which model_types were resolved by a "natural" match (model_type == module_name)
|
|
# so a later non-natural match (e.g. MaskFormerDetrConfig with model_type="detr" inside
|
|
# models/maskformer/) does not silently overwrite the canonical class.
|
|
natural_types: set[str] = set()
|
|
|
|
# `glob.glob` is filesystem-order dependent — sort to make the output deterministic.
|
|
all_files = sorted(glob.glob("src/transformers/models/**/configuration_*.py", recursive=True))
|
|
for config_path in all_files:
|
|
module_name = config_path.split("/")[-2]
|
|
with open(config_path, "r") as f:
|
|
content = f.read()
|
|
|
|
tree = ast.parse(content)
|
|
for node in tree.body:
|
|
if isinstance(node, ast.ClassDef) and any(
|
|
base.id == "PreTrainedConfig" for base in node.bases if isinstance(base, ast.Name)
|
|
):
|
|
config_cls_name = node.name
|
|
model_type = None
|
|
for stmt in node.body:
|
|
if isinstance(stmt, ast.Assign):
|
|
if model_types := [
|
|
stmt.value.value
|
|
for target in stmt.targets
|
|
if isinstance(target, ast.Name) and target.id == "model_type"
|
|
]:
|
|
model_type = model_types[0]
|
|
break
|
|
elif isinstance(stmt, ast.AnnAssign):
|
|
if stmt.target.id == "model_type":
|
|
model_type = stmt.value.value
|
|
break
|
|
|
|
if not model_type:
|
|
continue
|
|
|
|
is_natural = model_type == module_name
|
|
# If we already recorded a natural match for this model_type, don't let a
|
|
# non-natural one overwrite it — the natural class is the canonical owner.
|
|
if model_type in natural_types and not is_natural:
|
|
continue
|
|
|
|
model_type_map[model_type] = config_cls_name
|
|
if is_natural:
|
|
natural_types.add(model_type)
|
|
special_mappings.pop(model_type, None)
|
|
else:
|
|
special_mappings[model_type] = module_name
|
|
|
|
return model_type_map, special_mappings
|
|
|
|
|
|
def build_image_processor_mapping(
|
|
config_mapping: dict[str, str],
|
|
) -> OrderedDict[str, dict[str, str | None]]:
|
|
processor_mapping = OrderedDict()
|
|
for model_type in config_mapping:
|
|
module = model_type.replace("-", "_")
|
|
fast_processor_name = slow_processor_name = None
|
|
if os.path.exists(f"src/transformers/models/{module}/image_processing_pil_{module}.py"):
|
|
with open(f"src/transformers/models/{module}/image_processing_pil_{module}.py", "r") as f:
|
|
content = f.read()
|
|
|
|
tree = ast.parse(content)
|
|
for node in tree.body:
|
|
if isinstance(node, ast.ClassDef) and any(
|
|
base.id == "PilBackend" for base in node.bases if isinstance(base, ast.Name)
|
|
):
|
|
slow_processor_name = node.name
|
|
|
|
if os.path.exists(f"src/transformers/models/{module}/image_processing_{module}.py"):
|
|
with open(f"src/transformers/models/{module}/image_processing_{module}.py", "r") as f:
|
|
content = f.read()
|
|
|
|
tree = ast.parse(content)
|
|
for node in tree.body:
|
|
if isinstance(node, ast.ClassDef) and any(
|
|
base.id == "TorchvisionBackend" for base in node.bases if isinstance(base, ast.Name)
|
|
):
|
|
fast_processor_name = node.name
|
|
|
|
if slow_processor_name is not None or fast_processor_name is not None:
|
|
processor_mapping[model_type] = {
|
|
**({"pil": slow_processor_name} if slow_processor_name else {}),
|
|
**({"torchvision": fast_processor_name} if fast_processor_name else {}),
|
|
}
|
|
|
|
return processor_mapping
|
|
|
|
|
|
def build_video_processor_mapping(
|
|
config_mapping: dict[str, str],
|
|
) -> OrderedDict[str, dict[str, str | None]]:
|
|
processor_mapping = OrderedDict()
|
|
for model_type in config_mapping:
|
|
module = model_type.replace("-", "_")
|
|
video_processor_name = None
|
|
|
|
if os.path.exists(f"src/transformers/models/{module}/video_processing_{module}.py"):
|
|
with open(f"src/transformers/models/{module}/video_processing_{module}.py", "r") as f:
|
|
content = f.read()
|
|
|
|
tree = ast.parse(content)
|
|
for node in tree.body:
|
|
if isinstance(node, ast.ClassDef) and any(
|
|
base.id == "BaseVideoProcessor" for base in node.bases if isinstance(base, ast.Name)
|
|
):
|
|
video_processor_name = node.name
|
|
|
|
if video_processor_name is not None:
|
|
processor_mapping[model_type] = video_processor_name
|
|
|
|
return processor_mapping
|
|
|
|
|
|
def run_ruff_and_sort(file: str):
|
|
"""Run `ruff` linter and formatter on `file`, as in `make style` and sort the mappings order"""
|
|
sort_auto_mapping(file, overwrite=True)
|
|
subprocess.run(["ruff", "check", file, "--fix"], stdout=subprocess.DEVNULL)
|
|
subprocess.run(["ruff", "format", file], stdout=subprocess.DEVNULL)
|
|
|
|
|
|
def format_dict_value(v):
|
|
if isinstance(v, str):
|
|
return f'"{v}"'
|
|
elif isinstance(v, dict):
|
|
items = ", ".join(f'"{k}": {format_dict_value(val)}' for k, val in v.items())
|
|
return "{" + items + "}"
|
|
elif isinstance(v, list):
|
|
items = ", ".join(format_dict_value(x) for x in v)
|
|
return "[" + items + "]"
|
|
else:
|
|
return repr(v)
|
|
|
|
|
|
def format_ordered_dict(name: str, data: OrderedDict):
|
|
lines = []
|
|
|
|
lines.append(f"{name} = OrderedDict(")
|
|
lines.append(f"{' ' * 4}[")
|
|
|
|
for k, v in data.items():
|
|
lines.append(f'{" " * 8}("{k}", {format_dict_value(v)}),')
|
|
|
|
lines.append(f"{' ' * 4}]")
|
|
lines.append(")\n\n")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def check_duplicates(mapping_for_special_models: dict[str, Any], auto_mapping: dict[str, Any]):
|
|
if intersections := (set(mapping_for_special_models.keys()) & set(auto_mapping.keys())):
|
|
raise ValueError(
|
|
"You have manually duplicated a model-type that is present in `auto_mappings.py`. "
|
|
f"Please, delete the entries for {intersections} if they are identical to auto-generated dict, "
|
|
"or use consistent naming across model files so that the names match."
|
|
)
|
|
|
|
|
|
def main(overwrite: bool):
|
|
filename = "src/transformers/models/auto/auto_mappings.py"
|
|
|
|
# 1. Read existing file content if available
|
|
old_content = ""
|
|
if os.path.exists(filename):
|
|
old_content = open(filename, "r").read()
|
|
|
|
# 2. Generate new config mapping dicts by parsing all model-config classes
|
|
config_mapping, special_mapping = build_config_mapping_names()
|
|
image_processor_mapping = build_image_processor_mapping(config_mapping=config_mapping)
|
|
video_processor_mapping = build_video_processor_mapping(config_mapping=config_mapping)
|
|
|
|
# Make sure users aren't duplicating the same keys manually
|
|
check_duplicates(MISSING_IMAGE_PROCESSOR_MAPPING_NAMES, image_processor_mapping)
|
|
check_duplicates(MISSING_VIDEO_PROCESSOR_MAPPING_NAMES, video_processor_mapping)
|
|
|
|
# The config mapping has to be one-to-one for correct `AutoConfig.from_pretrained()` because `LazyMapping`
|
|
# reverts keys/values and creates a dict from it. Duplicate values will be overwritten by whatever comes at last
|
|
duplicate_keys = [n for n, c in Counter(COMPLETE_CONFIG_MAPPING_NAMES.keys()).items() if c > 1]
|
|
if duplicate_keys:
|
|
raise ValueError(
|
|
f"Keys in `CONFIG_MAPPING_NAMES` contain duplicates = {duplicate_keys}. "
|
|
"The mapping has to be one-to-one to ensure correct `AutoConfig` functionality!"
|
|
)
|
|
|
|
duplicate_values = [
|
|
n
|
|
for n, c in Counter(COMPLETE_CONFIG_MAPPING_NAMES.values()).items()
|
|
if c > 1 and n not in IGNORE_DUPLICATE_CONFIG
|
|
]
|
|
if duplicate_values:
|
|
raise ValueError(
|
|
f"Values in `CONFIG_MAPPING_NAMES` contain duplicates = {duplicate_values}. "
|
|
"The mapping has to be one-to-one to ensure correct `AutoConfig` functionality!"
|
|
)
|
|
|
|
new_mappings = {
|
|
"CONFIG_MAPPING_NAMES": config_mapping,
|
|
"SPECIAL_MODEL_TYPE_TO_MODULE_NAME": special_mapping,
|
|
"IMAGE_PROCESSOR_MAPPING_NAMES": image_processor_mapping,
|
|
"VIDEO_PROCESSOR_MAPPING_NAMES": video_processor_mapping,
|
|
}
|
|
new_content = AUTO_GENERATED_HADER + "\nfrom collections import OrderedDict\n\n"
|
|
for k, v in new_mappings.items():
|
|
new_content += format_ordered_dict(name=k, data=v)
|
|
|
|
# 3. If the new auto-generate content is different, overwrite it
|
|
# Dirty hack to sort and apply ruff to the file content, for easier matching
|
|
with tempfile.TemporaryDirectory() as temp_folder:
|
|
temp_filename = os.path.join(temp_folder, "temp.py")
|
|
with open(temp_filename, "w") as temp_file:
|
|
temp_file.write(new_content)
|
|
|
|
run_ruff_and_sort(temp_filename)
|
|
new_content = open(temp_filename, "r").read()
|
|
|
|
if old_content != new_content:
|
|
if not overwrite:
|
|
diff = "".join(
|
|
difflib.unified_diff(
|
|
old_content.splitlines(keepends=True),
|
|
new_content.splitlines(keepends=True),
|
|
fromfile=f"{filename} (on disk)",
|
|
tofile=f"{filename} (regenerated)",
|
|
n=3,
|
|
)
|
|
)
|
|
raise Exception(
|
|
"Generated auto-mapping is not consistent with the contents of `models/auto/auto_mappings.py`.\n"
|
|
"Run `make fix-repo` or `python utils/check_auto.py --fix_and_overwrite` to fix them.\n\n"
|
|
f"Diff (on disk → regenerated):\n{diff}"
|
|
)
|
|
else:
|
|
with open(filename, "w") as f:
|
|
f.write(new_content)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
|
|
args = parser.parse_args()
|
|
main(overwrite=args.fix_and_overwrite)
|