first commit

2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions
--- a/.circleci/TROUBLESHOOT.md
+++ b/.circleci/TROUBLESHOOT.md
@@ -0,0 +1,7 @@
+# Troubleshooting
+
+This is a document explaining how to deal with various issues on Circle-CI. The entries may include actual solutions or pointers to Issues that cover those.
+
+## Circle CI
+
+* pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -0,0 +1,233 @@
+version: 2.1
+setup: true
+orbs:
+    continuation: circleci/continuation@0.1.0
+
+parameters:
+    nightly:
+        type: boolean
+        default: false
+    GHA_Actor:
+        type: string
+        default: ""
+    GHA_Action:
+        type: string
+        default: ""
+    GHA_Event:
+        type: string
+        default: ""
+    GHA_Meta:
+        type: string
+        default: ""
+
+jobs:
+    # Ensure running with CircleCI/huggingface
+    check_circleci_user:
+        docker:
+            - image: python:3.10-slim
+        resource_class: small
+        parallelism: 1
+        steps:
+            - run: echo $CIRCLE_PROJECT_USERNAME
+            - run: |
+                if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
+                    exit 0
+                else
+                    echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
+                fi
+    # Fetch the tests to run
+    fetch_tests:
+        working_directory: ~/transformers
+        docker:
+            - image: huggingface/transformers-quality
+        parallelism: 1
+        steps:
+            - checkout
+            - run: uv pip install -U -e .
+            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
+            - run: mkdir -p test_preparation
+            - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
+            - run: python utils/tests_fetcher.py --filter_tests || true
+            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
+            - run: |
+                if [ ! -s test_preparation/generated_config.yml ]; then
+                    echo "No tests to run, exiting early!"
+                    circleci-agent step halt
+                fi
+
+            - store_artifacts:
+                path: test_preparation
+
+            - run:
+                name: "Retrieve Artifact Paths"
+                # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
+                # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
+                command: |
+                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
+                    job_number=${CIRCLE_BUILD_NUM}
+                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
+                    curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
+            - run:
+                name: "Prepare pipeline parameters"
+                command: |
+                    python utils/process_test_artifacts.py
+
+            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
+            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
+            # We used:
+
+            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
+            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
+
+            - store_artifacts:
+                path: test_preparation/transformed_artifacts.json
+            - store_artifacts:
+                path: test_preparation/artifacts.json
+            - continuation/continue:
+                parameters:  test_preparation/transformed_artifacts.json
+                configuration_path: test_preparation/generated_config.yml
+
+    # To run all tests for the nightly build
+    fetch_all_tests:
+        working_directory: ~/transformers
+        docker:
+            - image: huggingface/transformers-quality
+        parallelism: 1
+        steps:
+            - checkout
+            - run: uv pip install -U -e .
+            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
+            - run: mkdir -p test_preparation
+            - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt || true
+            - run: python utils/tests_fetcher.py --filter_tests || true
+            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
+            - run: |
+                if [ ! -s test_preparation/generated_config.yml ]; then
+                    echo "No tests to run, exiting early!"
+                    circleci-agent step halt
+                fi
+
+            - store_artifacts:
+                path: test_preparation
+
+            - run:
+                name: "Retrieve Artifact Paths"
+                command: |
+                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
+                    job_number=${CIRCLE_BUILD_NUM}
+                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
+                    curl -o  test_preparation/artifacts.json ${url}
+            - run:
+                name: "Prepare pipeline parameters"
+                command: |
+                    python utils/process_test_artifacts.py
+
+            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
+            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
+            # We used:
+
+            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
+            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
+
+            - store_artifacts:
+                path: test_preparation/transformed_artifacts.json
+            - store_artifacts:
+                path: test_preparation/artifacts.json
+            - continuation/continue:
+                parameters:  test_preparation/transformed_artifacts.json
+                configuration_path: test_preparation/generated_config.yml
+
+    check_code_quality:
+        working_directory: ~/transformers
+        docker:
+            - image: huggingface/transformers-quality
+        resource_class: large
+        environment:
+            TRANSFORMERS_IS_CI: yes
+            PYTEST_TIMEOUT: 120
+        parallelism: 1
+        steps:
+            - checkout
+            - run: uv pip install -e ".[quality,serving]"
+            - run:
+                name: Show installed libraries and their versions
+                command: pip freeze | tee installed.txt
+            - store_artifacts:
+                  path: ~/transformers/installed.txt
+            - run: make check-code-quality
+
+    check_repository_consistency:
+        working_directory: ~/transformers
+        docker:
+            - image: huggingface/transformers-consistency
+        resource_class: large
+        environment:
+            TRANSFORMERS_IS_CI: yes
+            PYTEST_TIMEOUT: 120
+        parallelism: 1
+        steps:
+            - checkout
+            - run: apt-get update && apt-get install -y make
+            - run: uv pip install -e ".[quality]"
+            - run:
+                name: Show installed libraries and their versions
+                command: pip freeze | tee installed.txt
+            - store_artifacts:
+                  path: ~/transformers/installed.txt
+            - run: make check-repository-consistency
+            - run:
+                name: "Test import with all backends (torch + PIL + torchvision)"
+                command: python -c "from transformers import *" || (echo '🚨 import failed with all backends. Fix unprotected imports!! 🚨'; exit 1)
+            - run:
+                name: "Test import with torch only (no PIL, no torchvision)"
+                command: |
+                    uv pip uninstall Pillow torchvision -q
+                    python -c "from transformers import *" || (echo '🚨 import failed with torch only (no PIL). Fix unprotected imports!! 🚨'; exit 1)
+                    uv pip install -e ".[quality]" -q
+            - run:
+                name: "Test import with PIL only (no torch, no torchvision)"
+                command: |
+                    uv pip uninstall torch torchvision torchaudio -q
+                    python -c "from transformers import *" || (echo '🚨 import failed with PIL only (no torch). Fix unprotected imports!! 🚨'; exit 1)
+                    uv pip install -e ".[quality]" -q
+            - run:
+                name: "Test import with torch + PIL, no torchvision"
+                command: |
+                    uv pip uninstall torchvision -q
+                    python -c "from transformers import *" || (echo '🚨 import failed with torch+PIL but no torchvision. Fix unprotected imports!! 🚨'; exit 1)
+                    uv pip install -e ".[quality]" -q
+
+
+workflows:
+    version: 2
+    setup_and_quality:
+        when:
+            and:
+                - equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
+                - not: <<pipeline.parameters.nightly>>
+        jobs:
+            - check_circleci_user
+            - check_code_quality
+            - check_repository_consistency
+            - fetch_tests
+
+    setup_and_quality_2:
+        when:
+            not:
+                 equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
+        jobs:
+            - check_circleci_user
+            - check_code_quality
+            - check_repository_consistency
+            - fetch_tests:
+                # [reference] https://circleci.com/docs/contexts/
+                context:
+                    - TRANSFORMERS_CONTEXT
+
+    nightly:
+        when: <<pipeline.parameters.nightly>>
+        jobs:
+            - check_circleci_user
+            - check_code_quality
+            - check_repository_consistency
+            - fetch_all_tests
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -0,0 +1,494 @@
+# Copyright 2022 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import os
+from dataclasses import dataclass
+from typing import Any
+
+import yaml
+
+
+COMMON_ENV_VARIABLES = {
+    "OMP_NUM_THREADS": 1,
+    "TRANSFORMERS_IS_CI": True,
+    "PYTEST_TIMEOUT": 120,
+    "RUN_PIPELINE_TESTS": False,
+    # will be adjust in `CircleCIJob.to_dict`.
+    "RUN_FLAKY": True,
+    "DISABLE_SAFETENSORS_CONVERSION": True,
+    "NETWORK_DEBUG_REPORT": True,
+}
+# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
+COMMON_PYTEST_OPTIONS = {
+    "max-worker-restart": 0,
+    "vvv": None,
+    "rsfE": None,
+    "random-order-bucket": "module",
+    "random-order-seed": "${CIRCLE_BUILD_NUM:-0}",
+}
+DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
+
+# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
+# to rerun the tests that match these patterns.
+FLAKY_TEST_FAILURE_PATTERNS = [
+    "OSError",  # Machine/connection transient error
+    "Timeout",  # Machine/connection transient error
+    "ConnectionError",  # Connection transient error
+    "FileNotFoundError",  # Raised by `datasets` on Hub failures
+    "PIL.UnidentifiedImageError",  # Raised by `PIL.Image.open` on connection issues
+    "HTTPError",  # Also catches HfHubHTTPError
+    "AssertionError: Tensor-likes are not close!",  # `torch.testing.assert_close`, we might have unlucky random values
+    # TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
+    # them under a single message.
+    "TypeError: expected str, bytes or os.PathLike object, not NoneType",
+    "TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
+    "Converting from Tiktoken failed",
+    "KeyError: <class ",
+    "TypeError: not a string",
+]
+
+
+class EmptyJob:
+    job_name = "empty"
+
+    def to_dict(self):
+        steps = [{"run": "ls -la"}]
+        if self.job_name == "collection_job":
+            steps.extend(
+                [
+                    "checkout",
+                    {"run": "pip install requests || true"},
+                    {
+                        "run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""
+                    },
+                    {
+                        "run": "python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true"
+                    },
+                    {"store_artifacts": {"path": "outputs"}},
+                    {"run": 'echo "All required jobs have now completed"'},
+                ]
+            )
+
+        return {
+            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
+            "resource_class": "small",
+            "steps": steps,
+        }
+
+
+@dataclass
+class CircleCIJob:
+    name: str
+    additional_env: dict[str, Any] = None
+    docker_image: list[dict[str, str]] = None
+    install_steps: list[str] = None
+    marker: str | None = None
+    parallelism: int | None = 0
+    pytest_num_workers: int = 8
+    pytest_options: dict[str, Any] = None
+    resource_class: str | None = "xlarge"
+    tests_to_run: list[str] | None = None
+    num_test_files_per_worker: int | None = 10
+    # This should be only used for doctest job!
+    command_timeout: int | None = None
+
+    def __post_init__(self):
+        # Deal with defaults for mutable attributes.
+        if self.additional_env is None:
+            self.additional_env = {}
+        if self.docker_image is None:
+            # Let's avoid changing the default list and make a copy.
+            self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
+        else:
+            # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
+            print(os.environ.get("GIT_COMMIT_MESSAGE"))
+            if (
+                "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "")
+                or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci"
+            ):
+                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
+            print(f"Using {self.docker_image} docker image")
+        if self.install_steps is None:
+            self.install_steps = ["uv pip install ."]
+        # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
+        self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
+        # Install pytest-random-order plugin for test randomization
+        self.install_steps.append("uv pip install pytest-random-order")
+        if self.pytest_options is None:
+            self.pytest_options = {}
+        if isinstance(self.tests_to_run, str):
+            self.tests_to_run = [self.tests_to_run]
+        else:
+            test_file = os.path.join("test_preparation", f"{self.job_name}_test_list.txt")
+            print("Looking for ", test_file)
+            if os.path.exists(test_file):
+                with open(test_file, encoding="utf-8") as f:
+                    expanded_tests = f.read().strip().split("\n")
+                self.tests_to_run = expanded_tests
+                print("Found:", expanded_tests)
+            else:
+                self.tests_to_run = []
+                print("not Found")
+
+    def to_dict(self):
+        env = COMMON_ENV_VARIABLES.copy()
+        # fmt: off
+        # not critical
+        env.update({"HF_TOKEN": "".join(["h", "f", "_", "q", "h", "b", "O", "C", "G", "N", "Y", "x", "D", "K", "C", "P", "J", "n", "q", "m", "O", "q", "g", "q", "s", "f", "q", "S", "v", "f", "s", "j", "q", "w", "j", "C", "T"])})
+        # fmt: on
+
+        # Do not run tests decorated by @is_flaky on pull requests
+        env["RUN_FLAKY"] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
+        env.update(self.additional_env)
+
+        job = {
+            "docker": self.docker_image,
+            "environment": env,
+        }
+        if self.resource_class is not None:
+            job["resource_class"] = self.resource_class
+
+        all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
+        pytest_flags = [
+            f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}"
+            for key, value in all_options.items()
+        ]
+        pytest_flags.append(
+            f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
+        )
+        # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
+        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
+        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
+        junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
+        joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
+        repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
+        parallel = f" << pipeline.parameters.{self.job_name}_parallelism >> "
+        steps = [
+            "checkout",
+            {"attach_workspace": {"at": "test_preparation"}},
+            {"run": "apt-get update && apt-get install -y curl"},
+            {"run": " && ".join(self.install_steps)},
+            {
+                "run": {
+                    "name": "Download NLTK files",
+                    "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """,
+                }
+                if "example" in self.name
+                else "echo Skipping"
+            },
+            {
+                "run": {
+                    "name": "Show installed libraries and their size",
+                    "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true""",
+                }
+            },
+            {
+                "run": {
+                    "name": "Show installed libraries and their versions",
+                    "command": """pip list --format=freeze | tee installed.txt || true""",
+                }
+            },
+            {
+                "run": {
+                    "name": "Show biggest libraries",
+                    "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true""",
+                }
+            },
+            {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
+            {
+                "run": {
+                    "name": "Get files to test",
+                    "command": f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"'
+                    if self.name != "pr_documentation_tests"
+                    else 'echo "Skipped"',
+                }
+            },
+            {
+                "run": {
+                    "name": "Split tests across parallel nodes: show current parallel tests",
+                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'"
+                    if self.parallelism
+                    else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt",
+                }
+            },
+            # During the CircleCI docker images build time, we might already (or not) download the data.
+            # If it's done already, the files are inside the directory `/test_data/`.
+            {
+                "run": {
+                    "name": "fetch hub objects before pytest",
+                    "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py",
+                }
+            },
+            {
+                "run": {
+                    "name": "download and unzip hub cache",
+                    "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/',
+                }
+            },
+            {
+                "run": {
+                    "name": "Run tests",
+                    "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)",
+                }
+            },
+            {
+                "run": {
+                    "name": "Check for test crashes",
+                    "when": "always",
+                    "command": """if [ ! -f tests_output.txt ]; then
+                            echo "ERROR: tests_output.txt does not exist - tests may not have run properly"
+                            exit 1
+                        elif grep -q "crashed and worker restarting disabled" tests_output.txt; then
+                            echo "ERROR: Worker crash detected in test output"
+                            echo "Found: crashed and worker restarting disabled"
+                            exit 1
+                        else
+                            echo "Tests output file exists and no worker crashes detected"
+                        fi""",
+                },
+            },
+            {
+                "run": {
+                    "name": "Expand to show skipped tests",
+                    "when": "always",
+                    "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip",
+                }
+            },
+            {
+                "run": {
+                    "name": "Failed tests: show reasons",
+                    "when": "always",
+                    "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail",
+                }
+            },
+            {
+                "run": {
+                    "name": "Errors",
+                    "when": "always",
+                    "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors",
+                }
+            },
+            {"store_test_results": {"path": "test-results"}},
+            {"store_artifacts": {"path": "test-results/junit.xml"}},
+            {"store_artifacts": {"path": "reports"}},
+            {"store_artifacts": {"path": "tests.txt"}},
+            {"store_artifacts": {"path": "splitted_tests.txt"}},
+            {"store_artifacts": {"path": "installed.txt"}},
+            {"store_artifacts": {"path": "network_debug_report.json"}},
+        ]
+        if self.parallelism:
+            job["parallelism"] = parallel
+        job["steps"] = steps
+        return job
+
+    @property
+    def job_name(self):
+        return (
+            self.name
+            if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name)
+            else f"tests_{self.name}"
+        )
+
+
+# JOBS
+torch_job = CircleCIJob(
+    "torch",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    marker="not generate",
+    parallelism=6,
+)
+
+generate_job = CircleCIJob(
+    "generate",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    # networkx==3.3 (after #36957) cause some issues
+    # TODO: remove this once it works directly
+    install_steps=["uv pip install ."],
+    marker="generate",
+    parallelism=6,
+)
+
+tokenization_job = CircleCIJob(
+    "tokenization",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    parallelism=8,
+)
+
+processor_job = CircleCIJob(
+    "processors",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    parallelism=8,
+)
+
+pipelines_torch_job = CircleCIJob(
+    "pipelines_torch",
+    additional_env={"RUN_PIPELINE_TESTS": True},
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    marker="is_pipeline_test",
+    parallelism=4,
+)
+
+custom_tokenizers_job = CircleCIJob(
+    "custom_tokenizers",
+    additional_env={"RUN_CUSTOM_TOKENIZERS": True},
+    docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
+)
+
+examples_torch_job = CircleCIJob(
+    "examples_torch",
+    additional_env={"OMP_NUM_THREADS": 8},
+    docker_image=[{"image": "huggingface/transformers-examples-torch"}],
+    # TODO @ArthurZucker remove this once docker is easier to build
+    install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
+    pytest_num_workers=4,
+)
+
+exotic_models_job = CircleCIJob(
+    "exotic_models",
+    docker_image=[{"image": "huggingface/transformers-exotic-models"}],
+    parallelism=4,
+    pytest_options={"durations": 100},
+)
+
+repo_utils_job = CircleCIJob(
+    "repo_utils",
+    docker_image=[{"image": "huggingface/transformers-consistency"}],
+    pytest_num_workers=4,
+    resource_class="large",
+)
+
+non_model_job = CircleCIJob(
+    "non_model",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    # networkx==3.3 (after #36957) cause some issues
+    # TODO: remove this once it works directly
+    install_steps=["uv pip install .[serving]"],
+    marker="not generate",
+    parallelism=6,
+)
+
+training_ci_job = CircleCIJob(
+    "training_ci",
+    additional_env={"RUN_TRAINING_TESTS": True},
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    install_steps=["uv pip install ."],
+    marker="is_training_test",
+    parallelism=6,
+)
+
+tensor_parallel_ci_job = CircleCIJob(
+    "tensor_parallel_ci",
+    additional_env={"RUN_TENSOR_PARALLEL_TESTS": True},
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    install_steps=["uv pip install .", "uv pip install torchao"],
+    marker="is_tensor_parallel_test",
+    parallelism=6,
+)
+
+# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
+# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
+# the bash output redirection.)
+py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
+py_command = f"$(python3 -c '{py_command}')"
+command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
+doc_test_job = CircleCIJob(
+    "pr_documentation_tests",
+    docker_image=[{"image": "huggingface/transformers-consistency"}],
+    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
+    install_steps=[
+        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
+        "uv pip install .",
+        "touch dummy.py",
+        command,
+        "cat pr_documentation_tests_temp.txt",
+        "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt",
+    ],
+    tests_to_run="$(cat pr_documentation_tests.txt)",  # noqa
+    pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
+    command_timeout=1200,  # test cannot run longer than 1200 seconds
+    pytest_num_workers=1,
+)
+
+REGULAR_TESTS = [torch_job, tokenization_job, processor_job, generate_job, non_model_job]  # fmt: skip
+EXAMPLES_TESTS = [examples_torch_job]
+PIPELINE_TESTS = [pipelines_torch_job]
+REPO_UTIL_TESTS = [repo_utils_job]
+DOC_TESTS = [doc_test_job]
+TRAINING_CI_TESTS = [training_ci_job]
+TENSOR_PARALLEL_CI_TESTS = [tensor_parallel_ci_job]
+ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] + TRAINING_CI_TESTS + TENSOR_PARALLEL_CI_TESTS  # fmt: skip
+
+
+def create_circleci_config(folder=None):
+    if folder is None:
+        folder = os.getcwd()
+    os.environ["test_preparation_dir"] = folder
+    jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation", f"{k.job_name}_test_list.txt"))]
+    print("The following jobs will be run ", jobs)
+
+    if len(jobs) == 0:
+        jobs = [EmptyJob()]
+    else:
+        print(
+            "Full list of job name inputs",
+            {j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs},
+        )
+        # Add a job waiting all the test jobs and aggregate their test summary files at the end
+        collection_job = EmptyJob()
+        collection_job.job_name = "collection_job"
+        jobs = [collection_job] + jobs
+
+    config = {
+        "version": "2.1",
+        "parameters": {
+            # Only used to accept the parameters from the trigger
+            "nightly": {"type": "boolean", "default": False},
+            # Only used to accept the parameters from GitHub Actions trigger
+            "GHA_Actor": {"type": "string", "default": ""},
+            "GHA_Action": {"type": "string", "default": ""},
+            "GHA_Event": {"type": "string", "default": ""},
+            "GHA_Meta": {"type": "string", "default": ""},
+            "tests_to_run": {"type": "string", "default": ""},
+            **{j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs},
+            **{j.job_name + "_parallelism": {"type": "integer", "default": 1} for j in jobs},
+        },
+        "jobs": {j.job_name: j.to_dict() for j in jobs},
+    }
+    if "CIRCLE_TOKEN" in os.environ:
+        # For private forked repo. (e.g. new model addition)
+        config["workflows"] = {
+            "version": 2,
+            "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]},
+        }
+    else:
+        # For public repo. (e.g. `transformers`)
+        config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
+    with open(os.path.join(folder, "generated_config.yml"), "w", encoding="utf-8") as f:
+        f.write(
+            yaml.dump(config, sort_keys=False, default_flow_style=False)
+            .replace("' << pipeline", " << pipeline")
+            .replace(">> '", " >>")
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--fetcher_folder", type=str, default=None, help="Only test that all tests and modules are accounted for."
+    )
+    args = parser.parse_args()
+
+    create_circleci_config(args.fetcher_folder)
--- a/.circleci/parse_test_outputs.py
+++ b/.circleci/parse_test_outputs.py
@@ -0,0 +1,71 @@
+import argparse
+import re
+
+
+def parse_pytest_output(file_path):
+    skipped_tests = {}
+    skipped_count = 0
+    with open(file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
+            if match:
+                skipped_count += 1
+                test_file, test_line, reason = match.groups()
+                skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
+    for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
+        print(f"{len(v):4} skipped because: {k}")
+    print("Number of skipped tests:", skipped_count)
+
+def parse_pytest_failure_output(file_path):
+    failed_tests = {}
+    failed_count = 0
+    with open(file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
+            if match:
+                failed_count += 1
+                _, error, reason = match.groups()
+                failed_tests[reason] = failed_tests.get(reason, []) + [error]
+    for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
+        print(f"{len(v):4} failed because `{v[0]}` -> {k}")
+    print("Number of failed tests:", failed_count)
+    if failed_count>0:
+        exit(1)
+
+def parse_pytest_errors_output(file_path):
+    print(file_path)
+    error_tests = {}
+    error_count = 0
+    with open(file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
+            if match:
+                error_count += 1
+                _, test_error, reason = match.groups()
+                error_tests[reason] = error_tests.get(reason, []) + [test_error]
+    for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
+        print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
+    print("Number of errors:", error_count)
+    if error_count>0:
+        exit(1)
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--file", help="file to parse")
+    parser.add_argument("--skip", action="store_true", help="show skipped reasons")
+    parser.add_argument("--fail", action="store_true", help="show failed tests")
+    parser.add_argument("--errors", action="store_true", help="show failed tests")
+    args = parser.parse_args()
+
+    if args.skip:
+        parse_pytest_output(args.file)
+
+    if args.fail:
+        parse_pytest_failure_output(args.file)
+
+    if args.errors:
+        parse_pytest_errors_output(args.file)
+
+
+if __name__ == "__main__":
+    main()