first commit
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled

This commit is contained in:
陈赣
2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions

View File

View File

@@ -0,0 +1,209 @@
# Copyright 2025 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from datasets import load_dataset
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
import torch
if is_vision_available():
pass
class Sam3ImageProcessingTester:
def __init__(
self,
parent,
batch_size=7,
num_channels=3,
min_resolution=30,
max_resolution=400,
mask_size=None,
do_resize=True,
size=None,
do_normalize=True,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
):
size = size if size is not None else {"height": 20, "width": 20}
mask_size = mask_size if mask_size is not None else {"height": 12, "width": 12}
self.parent = parent
self.batch_size = batch_size
self.num_channels = num_channels
self.min_resolution = min_resolution
self.max_resolution = max_resolution
self.mask_size = mask_size
self.do_resize = do_resize
self.size = size
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
def prepare_image_processor_dict(self):
return {
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_normalize": self.do_normalize,
"do_resize": self.do_resize,
"size": self.size,
"mask_size": self.mask_size,
}
def expected_output_image_shape(self, images):
return self.num_channels, self.size["height"], self.size["width"]
def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False):
return prepare_image_inputs(
batch_size=self.batch_size,
num_channels=self.num_channels,
min_resolution=self.min_resolution,
max_resolution=self.max_resolution,
equal_resolution=equal_resolution,
numpify=numpify,
torchify=torchify,
)
def prepare_semantic_single_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
return example["image"], example["map"]
def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
@require_torch
@require_vision
class Sam3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
def setUp(self):
super().setUp()
self.image_processor_tester = Sam3ImageProcessingTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_call_segmentation_maps(self):
for image_processing_class in self.image_processing_classes.values():
image_processor = image_processing_class(**self.image_processor_dict)
image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True)
maps = []
for image in image_inputs:
self.assertIsInstance(image, torch.Tensor)
maps.append(torch.zeros(image.shape[-2:]).long())
# Test not batched input
encoding = image_processor(image_inputs[0], maps[0], return_tensors="pt")
self.assertEqual(
encoding["pixel_values"].shape,
(
1,
self.image_processor_tester.num_channels,
self.image_processor_tester.size["height"],
self.image_processor_tester.size["width"],
),
)
self.assertEqual(
encoding["labels"].shape,
(
1,
self.image_processor_tester.mask_size["height"],
self.image_processor_tester.mask_size["width"],
),
)
self.assertEqual(encoding["labels"].dtype, torch.long)
self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255)
# Test batched
encoding = image_processor(image_inputs, maps, return_tensors="pt")
self.assertEqual(
encoding["pixel_values"].shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_channels,
self.image_processor_tester.size["height"],
self.image_processor_tester.size["width"],
),
)
self.assertEqual(
encoding["labels"].shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.mask_size["height"],
self.image_processor_tester.mask_size["width"],
),
)
self.assertEqual(encoding["labels"].dtype, torch.long)
self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255)
# Test not batched input (PIL images) with segmentation maps from dataset
image, segmentation_map = prepare_semantic_single_inputs()
encoding = image_processor(image, segmentation_map, return_tensors="pt")
self.assertEqual(
encoding["pixel_values"].shape,
(
1,
self.image_processor_tester.num_channels,
self.image_processor_tester.size["height"],
self.image_processor_tester.size["width"],
),
)
self.assertEqual(
encoding["labels"].shape,
(
1,
self.image_processor_tester.mask_size["height"],
self.image_processor_tester.mask_size["width"],
),
)
self.assertEqual(encoding["labels"].dtype, torch.long)
self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255)
# Test batched input (PIL images)
images, segmentation_maps = prepare_semantic_batch_inputs()
encoding = image_processor(images, segmentation_maps, return_tensors="pt")
self.assertEqual(
encoding["pixel_values"].shape,
(
2,
self.image_processor_tester.num_channels,
self.image_processor_tester.size["height"],
self.image_processor_tester.size["width"],
),
)
self.assertEqual(
encoding["labels"].shape,
(
2,
self.image_processor_tester.mask_size["height"],
self.image_processor_tester.mask_size["width"],
),
)
self.assertEqual(encoding["labels"].dtype, torch.long)
self.assertTrue(encoding["labels"].min().item() >= 0)
self.assertTrue(encoding["labels"].max().item() <= 255)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
# Copyright 2025 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import AutoTokenizer
from transformers.models.sam3.processing_sam3 import Sam3Processor
from transformers.testing_utils import require_torch, require_vision
from ...test_processing_common import ProcessorTesterMixin
@require_torch
@require_vision
class Sam3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor_class = Sam3Processor
@classmethod
def _setup_tokenizer(cls):
return AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32", max_length=32, model_max_length=32)
# Sam3Processor has a custom non-standard __call__ signature (no chat template, extra
# prompting args like input_boxes). Skip mixin tests that assume a standard VLM interface.
def test_chat_template_save_loading(self):
self.skipTest("Sam3Processor does not use a chat template")
def test_model_input_names(self):
self.skipTest("Sam3Processor outputs extra keys (e.g. original_sizes) beyond model_input_names")
def test_tokenizer_defaults(self):
self.skipTest("Sam3Processor always pads tokenizer output to max_length=32")
def test_processor_text_has_no_visual(self):
self.skipTest("Sam3Processor has a custom interface, not a standard VLM text+image interface")
def test_processor_with_multiple_inputs(self):
self.skipTest("Sam3Processor has a custom interface, not a standard VLM text+image interface")
# --- Sam3-specific tests ---
def test_input_boxes_default_labels_mixed_batch(self):
# Regression test for https://github.com/huggingface/transformers/issues/45059:
# None entries should get pad label (-10), real entries should get positive label (1).
processor = self.get_processor()
images = self.prepare_image_inputs(batch_size=2)
inputs = processor(
images=images,
text=["cat", None],
input_boxes=[None, [[100, 100, 200, 200]]],
return_tensors="pt",
)
self.assertIn("input_boxes", inputs)
self.assertIn("input_boxes_labels", inputs)
# The None entry (index 0) should have label -10 (pad value)
self.assertEqual(inputs["input_boxes_labels"][0, 0].item(), -10)
# The real entry (index 1) should have label 1 (positive)
self.assertEqual(inputs["input_boxes_labels"][1, 0].item(), 1)
def test_input_boxes_default_labels_all_real(self):
processor = self.get_processor()
images = self.prepare_image_inputs(batch_size=2)
inputs = processor(
images=images,
text=["cat", "dog"],
input_boxes=[[[50, 50, 150, 150]], [[200, 200, 300, 300]]],
return_tensors="pt",
)
self.assertIn("input_boxes_labels", inputs)
self.assertTrue((inputs["input_boxes_labels"] == 1).all())
def test_no_input_boxes_omits_labels(self):
processor = self.get_processor()
images = self.prepare_image_inputs(batch_size=1)
inputs = processor(
images=images,
text=["cat"],
return_tensors="pt",
)
self.assertNotIn("input_boxes", inputs)
self.assertNotIn("input_boxes_labels", inputs)
def test_user_provided_labels_preserved(self):
processor = self.get_processor()
images = self.prepare_image_inputs(batch_size=2)
inputs = processor(
images=images,
text=["cat", "dog"],
input_boxes=[[[50, 50, 150, 150]], [[200, 200, 300, 300]]],
input_boxes_labels=[[1], [0]],
return_tensors="pt",
)
self.assertEqual(inputs["input_boxes_labels"][0, 0].item(), 1)
self.assertEqual(inputs["input_boxes_labels"][1, 0].item(), 0)