first commit

2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions
--- a/tests/trainer/distributed/scripts/eval_ddp.py
+++ b/tests/trainer/distributed/scripts/eval_ddp.py
@@ -0,0 +1,113 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Worker script for eval/predict ordering tests.
+
+Verifies that distributed eval/predict returns all samples in the correct order.
+
+Run via torchrun or accelerate launch.
+"""
+
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+
+from transformers import EvalPrediction, HfArgumentParser, Trainer, TrainingArguments
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+class DummyDataset(Dataset):
+    def __init__(self, length: int = 101):
+        self.length = length
+
+    def __len__(self):
+        return self.length
+
+    def __getitem__(self, i) -> int:
+        return i
+
+
+class DummyDataCollator:
+    def __call__(self, features):
+        return {"input_ids": torch.tensor(features), "labels": torch.tensor(features)}
+
+
+class DummyModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Add some (unused) params otherwise DDP will complain.
+        self.fc = nn.Linear(120, 80)
+
+    def forward(self, input_ids, labels=None):
+        if labels is not None:
+            return torch.tensor(0.0, device=input_ids.device), input_ids
+        else:
+            return input_ids
+
+
+if __name__ == "__main__":
+    parser = HfArgumentParser((TrainingArguments,))
+    training_args = parser.parse_args_into_dataclasses()[0]
+
+    for dataset_length in [49, 7]:
+        dataset = DummyDataset(dataset_length)
+
+        def compute_metrics(p: EvalPrediction) -> dict:
+            sequential = list(range(len(dataset)))
+            success = p.predictions.tolist() == sequential and p.label_ids.tolist() == sequential
+            if not success and training_args.local_process_index == 0:
+                logger.warning(
+                    "Predictions and/or labels do not match expected results:\n  - predictions: "
+                    f"{p.predictions.tolist()}\n  - labels: {p.label_ids.tolist()}\n  - expected: {sequential}"
+                )
+            return {"success": success}
+
+        trainer = Trainer(
+            model=DummyModel(),
+            args=training_args,
+            data_collator=DummyDataCollator(),
+            eval_dataset=dataset,
+            compute_metrics=compute_metrics,
+        )
+        metrics = trainer.evaluate()
+        logger.info(metrics)
+        if metrics["eval_success"] is not True:
+            logger.error(metrics)
+            exit(1)
+
+        p = trainer.predict(dataset)
+        logger.info(p.metrics)
+        if p.metrics["test_success"] is not True:
+            logger.error(p.metrics)
+            exit(1)
+
+        trainer.args.eval_accumulation_steps = 2
+
+        metrics = trainer.evaluate()
+        logger.info(metrics)
+        if metrics["eval_success"] is not True:
+            logger.error(metrics)
+            exit(1)
+
+        p = trainer.predict(dataset)
+        logger.info(p.metrics)
+        if p.metrics["test_success"] is not True:
+            logger.error(p.metrics)
+            exit(1)
+
+        trainer.args.eval_accumulation_steps = None