# Copyright 2026 The HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import ast import difflib import glob import os import subprocess import tempfile from collections import Counter, OrderedDict from typing import Any from sort_auto_mappings import sort_auto_mapping from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES as COMPLETE_CONFIG_MAPPING_NAMES from transformers.models.auto.image_processing_auto import MISSING_IMAGE_PROCESSOR_MAPPING_NAMES from transformers.models.auto.video_processing_auto import MISSING_VIDEO_PROCESSOR_MAPPING_NAMES CHECKER_CONFIG = { "name": "auto_mappings", "label": "Generate auto mappings", "cache_globs": [], "check_args": [], "fix_args": ["--fix_and_overwrite"], } AUTO_GENERATED_HADER = """# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # This file was automatically generated from existing config files and their `model_type`s. Do NOT edit this file # manually as any edits will be overwritten by auto-generation of the file. If any change should be done, # please add the correct `cls.model_type` in your config class and run `python utils/check_auto.py --fix_and_overwrite`. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # Copyright 2026 The HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ # Some keys are duplicated due to incorrect naming at model shipping and BC IGNORE_DUPLICATE_CONFIG = ["GPT2Config", "EvollaConfig", "MLCDVisionConfig"] def build_config_mapping_names() -> tuple[dict, dict]: model_type_map = OrderedDict() special_mappings = OrderedDict() # Track which model_types were resolved by a "natural" match (model_type == module_name) # so a later non-natural match (e.g. MaskFormerDetrConfig with model_type="detr" inside # models/maskformer/) does not silently overwrite the canonical class. natural_types: set[str] = set() # `glob.glob` is filesystem-order dependent — sort to make the output deterministic. all_files = sorted(glob.glob("src/transformers/models/**/configuration_*.py", recursive=True)) for config_path in all_files: module_name = config_path.split("/")[-2] with open(config_path, "r") as f: content = f.read() tree = ast.parse(content) for node in tree.body: if isinstance(node, ast.ClassDef) and any( base.id == "PreTrainedConfig" for base in node.bases if isinstance(base, ast.Name) ): config_cls_name = node.name model_type = None for stmt in node.body: if isinstance(stmt, ast.Assign): if model_types := [ stmt.value.value for target in stmt.targets if isinstance(target, ast.Name) and target.id == "model_type" ]: model_type = model_types[0] break elif isinstance(stmt, ast.AnnAssign): if stmt.target.id == "model_type": model_type = stmt.value.value break if not model_type: continue is_natural = model_type == module_name # If we already recorded a natural match for this model_type, don't let a # non-natural one overwrite it — the natural class is the canonical owner. if model_type in natural_types and not is_natural: continue model_type_map[model_type] = config_cls_name if is_natural: natural_types.add(model_type) special_mappings.pop(model_type, None) else: special_mappings[model_type] = module_name return model_type_map, special_mappings def build_image_processor_mapping( config_mapping: dict[str, str], ) -> OrderedDict[str, dict[str, str | None]]: processor_mapping = OrderedDict() for model_type in config_mapping: module = model_type.replace("-", "_") fast_processor_name = slow_processor_name = None if os.path.exists(f"src/transformers/models/{module}/image_processing_pil_{module}.py"): with open(f"src/transformers/models/{module}/image_processing_pil_{module}.py", "r") as f: content = f.read() tree = ast.parse(content) for node in tree.body: if isinstance(node, ast.ClassDef) and any( base.id == "PilBackend" for base in node.bases if isinstance(base, ast.Name) ): slow_processor_name = node.name if os.path.exists(f"src/transformers/models/{module}/image_processing_{module}.py"): with open(f"src/transformers/models/{module}/image_processing_{module}.py", "r") as f: content = f.read() tree = ast.parse(content) for node in tree.body: if isinstance(node, ast.ClassDef) and any( base.id == "TorchvisionBackend" for base in node.bases if isinstance(base, ast.Name) ): fast_processor_name = node.name if slow_processor_name is not None or fast_processor_name is not None: processor_mapping[model_type] = { **({"pil": slow_processor_name} if slow_processor_name else {}), **({"torchvision": fast_processor_name} if fast_processor_name else {}), } return processor_mapping def build_video_processor_mapping( config_mapping: dict[str, str], ) -> OrderedDict[str, dict[str, str | None]]: processor_mapping = OrderedDict() for model_type in config_mapping: module = model_type.replace("-", "_") video_processor_name = None if os.path.exists(f"src/transformers/models/{module}/video_processing_{module}.py"): with open(f"src/transformers/models/{module}/video_processing_{module}.py", "r") as f: content = f.read() tree = ast.parse(content) for node in tree.body: if isinstance(node, ast.ClassDef) and any( base.id == "BaseVideoProcessor" for base in node.bases if isinstance(base, ast.Name) ): video_processor_name = node.name if video_processor_name is not None: processor_mapping[model_type] = video_processor_name return processor_mapping def run_ruff_and_sort(file: str): """Run `ruff` linter and formatter on `file`, as in `make style` and sort the mappings order""" sort_auto_mapping(file, overwrite=True) subprocess.run(["ruff", "check", file, "--fix"], stdout=subprocess.DEVNULL) subprocess.run(["ruff", "format", file], stdout=subprocess.DEVNULL) def format_dict_value(v): if isinstance(v, str): return f'"{v}"' elif isinstance(v, dict): items = ", ".join(f'"{k}": {format_dict_value(val)}' for k, val in v.items()) return "{" + items + "}" elif isinstance(v, list): items = ", ".join(format_dict_value(x) for x in v) return "[" + items + "]" else: return repr(v) def format_ordered_dict(name: str, data: OrderedDict): lines = [] lines.append(f"{name} = OrderedDict(") lines.append(f"{' ' * 4}[") for k, v in data.items(): lines.append(f'{" " * 8}("{k}", {format_dict_value(v)}),') lines.append(f"{' ' * 4}]") lines.append(")\n\n") return "\n".join(lines) def check_duplicates(mapping_for_special_models: dict[str, Any], auto_mapping: dict[str, Any]): if intersections := (set(mapping_for_special_models.keys()) & set(auto_mapping.keys())): raise ValueError( "You have manually duplicated a model-type that is present in `auto_mappings.py`. " f"Please, delete the entries for {intersections} if they are identical to auto-generated dict, " "or use consistent naming across model files so that the names match." ) def main(overwrite: bool): filename = "src/transformers/models/auto/auto_mappings.py" # 1. Read existing file content if available old_content = "" if os.path.exists(filename): old_content = open(filename, "r").read() # 2. Generate new config mapping dicts by parsing all model-config classes config_mapping, special_mapping = build_config_mapping_names() image_processor_mapping = build_image_processor_mapping(config_mapping=config_mapping) video_processor_mapping = build_video_processor_mapping(config_mapping=config_mapping) # Make sure users aren't duplicating the same keys manually check_duplicates(MISSING_IMAGE_PROCESSOR_MAPPING_NAMES, image_processor_mapping) check_duplicates(MISSING_VIDEO_PROCESSOR_MAPPING_NAMES, video_processor_mapping) # The config mapping has to be one-to-one for correct `AutoConfig.from_pretrained()` because `LazyMapping` # reverts keys/values and creates a dict from it. Duplicate values will be overwritten by whatever comes at last duplicate_keys = [n for n, c in Counter(COMPLETE_CONFIG_MAPPING_NAMES.keys()).items() if c > 1] if duplicate_keys: raise ValueError( f"Keys in `CONFIG_MAPPING_NAMES` contain duplicates = {duplicate_keys}. " "The mapping has to be one-to-one to ensure correct `AutoConfig` functionality!" ) duplicate_values = [ n for n, c in Counter(COMPLETE_CONFIG_MAPPING_NAMES.values()).items() if c > 1 and n not in IGNORE_DUPLICATE_CONFIG ] if duplicate_values: raise ValueError( f"Values in `CONFIG_MAPPING_NAMES` contain duplicates = {duplicate_values}. " "The mapping has to be one-to-one to ensure correct `AutoConfig` functionality!" ) new_mappings = { "CONFIG_MAPPING_NAMES": config_mapping, "SPECIAL_MODEL_TYPE_TO_MODULE_NAME": special_mapping, "IMAGE_PROCESSOR_MAPPING_NAMES": image_processor_mapping, "VIDEO_PROCESSOR_MAPPING_NAMES": video_processor_mapping, } new_content = AUTO_GENERATED_HADER + "\nfrom collections import OrderedDict\n\n" for k, v in new_mappings.items(): new_content += format_ordered_dict(name=k, data=v) # 3. If the new auto-generate content is different, overwrite it # Dirty hack to sort and apply ruff to the file content, for easier matching with tempfile.TemporaryDirectory() as temp_folder: temp_filename = os.path.join(temp_folder, "temp.py") with open(temp_filename, "w") as temp_file: temp_file.write(new_content) run_ruff_and_sort(temp_filename) new_content = open(temp_filename, "r").read() if old_content != new_content: if not overwrite: diff = "".join( difflib.unified_diff( old_content.splitlines(keepends=True), new_content.splitlines(keepends=True), fromfile=f"{filename} (on disk)", tofile=f"{filename} (regenerated)", n=3, ) ) raise Exception( "Generated auto-mapping is not consistent with the contents of `models/auto/auto_mappings.py`.\n" "Run `make fix-repo` or `python utils/check_auto.py --fix_and_overwrite` to fix them.\n\n" f"Diff (on disk → regenerated):\n{diff}" ) else: with open(filename, "w") as f: f.write(new_content) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") args = parser.parse_args() main(overwrite=args.fix_and_overwrite)