first commit
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled

This commit is contained in:
陈赣
2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions

View File

View File

@@ -0,0 +1,472 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import unittest
import numpy as np
from transformers import MODEL_FOR_MULTIMODAL_LM_MAPPING, is_vision_available
from transformers.pipelines import AnyToAnyPipeline, pipeline
from transformers.testing_utils import (
Expectations,
is_pipeline_test,
require_librosa,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
sys.path.append(".")
from utils.fetch_hub_objects_for_ci import url_to_local_path
if is_vision_available():
import PIL
@is_pipeline_test
@require_vision
@require_librosa
@require_torch
class AnyToAnyPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_MULTIMODAL_LM_MAPPING
# We only need `processor` but the Mixin will pass all possible preprocessing classes for a model.
# So we add them all in signature
def get_test_pipeline(
self, model, tokenizer, processor, image_processor=None, feature_extractor=None, dtype="float32"
):
_is_images_supported = hasattr(processor, "image_processor")
_is_videos_supported = hasattr(processor, "video_processor")
_is_audios_supported = hasattr(processor, "feature_extractor")
image_token = getattr(processor.tokenizer, "image_token", "")
video_token = getattr(processor.tokenizer, "video_token", "")
audio_token = getattr(processor.tokenizer, "audio_token", "")
images_examples = [
{
"images": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"text": f"{image_token}This is a ",
},
{
"images": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"text": f"{image_token}Here I see a ",
},
]
videos_examples = [
{
"videos": url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/Big_Buck_Bunny_720_10s_10MB.mp4"
),
"text": f"{video_token}This video shows a ",
},
{
"video": url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/sample_demo_1.mp4"
),
"text": f"{video_token}In the video I see a ",
},
]
audio_examples = [
{
"audio": url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/audio-test/resolve/main/glass-breaking-151256.mp3"
),
"text": f"{audio_token}This is sound of a ",
},
{
"audio": url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/audio-test/resolve/main/f2641_0_throatclearing.wav"
),
"text": f"{audio_token}Here I hear a ",
},
]
examples = []
if _is_images_supported:
examples.extend(images_examples)
if _is_videos_supported:
examples.extend(videos_examples)
if _is_audios_supported:
examples.extend(audio_examples)
pipe = AnyToAnyPipeline(model=model, processor=processor, dtype=dtype, max_new_tokens=10)
return pipe, examples
def run_pipeline_test(self, pipe, examples):
# Single
outputs = pipe(examples[0])
self.assertEqual(
outputs,
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
)
# Batched but limit to last 2 examples
outputs = pipe(examples[:2])
self.assertEqual(
outputs,
[
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
],
)
# `generation_mode` raises errors when dosn't match with other params
with self.assertRaises(ValueError):
pipe(examples, generation_mode="video")
with self.assertRaises(ValueError):
pipe(examples, generation_mode="audio", return_full_text=True)
with self.assertRaises(ValueError):
pipe(examples, generation_mode="image", return_type=1)
# Chat template
if getattr(pipe.processor, "chat_template", None) is not None:
messages = []
for example in examples[:2]:
example.pop("text")
modality_type, modality_data = list(example.items())[0]
message = {
"role": "user",
"content": [
{"type": "text", "text": "This is a "},
{"type": modality_type, "path": modality_data},
],
}
messages.append([message])
outputs = pipe(messages, return_full_text=True, max_new_tokens=10)
self.assertEqual(
outputs,
[
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
],
)
@slow
def test_small_model_pt_token_text_only(self):
pipe = pipeline("any-to-any", model="google/gemma-3n-E4B-it")
text = "What is the capital of France? Assistant:"
outputs = pipe(text=text, generate_kwargs={"do_sample": False})
EXPECTED_OUTPUT = Expectations(
{
("cuda", 8): [
{
"input_text": "What is the capital of France? Assistant:",
"generated_text": "What is the capital of France? Assistant: The capital of France is Paris.",
}
],
("rocm", (9, 4)): [
{
"input_text": "What is the capital of France? Assistant:",
"generated_text": "What is the capital of France? Assistant: The capital of France is Paris.\n",
}
],
}
).get_expectation()
self.assertEqual(outputs, EXPECTED_OUTPUT)
messages = [
[
{
"role": "user",
"content": [
{"type": "text", "text": "Write a poem on Hugging Face, the company"},
],
},
],
[
{
"role": "user",
"content": [
{"type": "text", "text": "What is the capital of France?"},
],
},
],
]
outputs = pipe(text=messages, generate_kwargs={"do_sample": False})
EXPECTED_OUTPUT = Expectations(
{
("cuda", 8): [
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
},
{
"role": "assistant",
"content": "A digital embrace, a friendly face,Hugging Face, a vibrant space.Where models bloom and knowledge flows,And AI's potential brightly glows.From transformers deep, a powerful core,To datasets vast, and so much more.A community thrives, a helping hand,Sharing insights across the land.Pipelines built with elegant ease,For NLP tasks, designed to please.Fine-tuning models, a joyful art,To tailor AI to play its part.Spaces open wide, for demos bright,Showcasing wonders, day and night.From text to image, code to sound,Innovation's fertile ground.A platform built on open grace,Democratizing AI's embrace.For researchers, builders, and all who seek,To unlock the future, bold and sleek.So raise a glass to the Face so kind,Hugging Face, expanding the mind.Connecting minds, with code and care,A future of AI, beyond compare.",
},
],
}
],
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
},
{"role": "assistant", "content": "The capital of France is **Paris**. "},
],
}
],
],
("rocm", (9, 4)): [
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
},
{
"role": "assistant",
"content": "A digital embrace, a friendly face,\nHugging Face, a vibrant space.\nWhere models bloom and knowledge flows,\nAnd AI's potential brightly glows.\n\nFrom transformers deep, a powerful core,\nTo datasets vast, and so much more.\nA community thrives, a helping hand,\nSharing insights across the land.\n\nPipelines built with elegant ease,\nFor NLP tasks, designed to please.\nFine-tuning models, a joyful art,\nTo tailor AI to play its part.\n\nSpaces open wide, for demos bright,\nShowcasing wonders, day and night.\nFrom text to image, code to sound,\nInnovation's fertile ground.\n\nA platform built on open grace,\nDemocratizing AI's embrace.\nFor researchers, builders, and all who seek,\nTo unlock the future, bold and sleek.\n\nSo raise a glass to the Face so kind,\nHugging Face, expanding the mind.\nConnecting minds, with code and care,\nA future of AI, beyond compare.\n\n\n\n",
},
],
}
],
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
},
{"role": "assistant", "content": "The capital of France is **Paris**. \n"},
],
}
],
],
}
).get_expectation()
self.assertEqual(outputs, EXPECTED_OUTPUT)
@slow
@require_torch
def test_small_model_pt_chat_with_response_parsing(self):
pipe = pipeline("any-to-any", model="google/gemma-3n-E4B-it")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What is the capital of France?"},
],
},
]
pipe.tokenizer.response_schema = {
# A real response schema should probably have things like "role" and "content"
# and "reasoning_content" but it's unlikely we'd get a tiny model to reliably
# output anything like that, so let's keep it simple.
"type": "object",
"properties": {
"first_word": {"type": "string", "x-regex": r"^\s*([a-zA-Z]+)"},
"last_word": {"type": "string", "x-regex": r"([a-zA-Z]+)\s*$"},
},
}
outputs = pipe(text=messages, generate_kwargs={"do_sample": False})
parsed_message = outputs[0]["generated_text"][-1]
# The parsed message should be a dict with the schema keys, not {"role": "assistant", "content": ...}
self.assertIn("first_word", parsed_message)
self.assertIn("last_word", parsed_message)
self.assertNotIn("role", parsed_message)
self.assertIsInstance(parsed_message["first_word"], str)
self.assertIsInstance(parsed_message["last_word"], str)
@slow
def test_small_model_pt_token_audio_input(self):
pipe = pipeline("any-to-any", model="google/gemma-3n-E4B-it")
audio_path = url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/audio-test/resolve/main/f2641_0_throatclearing.wav"
)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What do you hear in this audio?"},
{"type": "audio", "url": audio_path},
],
},
]
outputs = pipe(text=messages, return_type=1, generate_kwargs={"do_sample": False}) # return new text
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "What do you hear in this audio?"},
{
"type": "audio",
"url": "https://huggingface.co/datasets/raushan-testing-hf/audio-test/resolve/main/f2641_0_throatclearing.wav",
},
],
}
],
"generated_None": "user\nWhat do you hear in this audio?\n\n\n\n\nmodel\nThe audio contains the repeated sound of someone **coughing**. It's a fairly consistent, forceful cough throughout the duration.",
}
],
)
@slow
def test_small_model_pt_token_audio_gen(self):
pipe = pipeline("any-to-any", model="Qwen/Qwen2.5-Omni-3B", dtype="bfloat16")
video_path = url_to_local_path(
"https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/Cooking_cake.mp4"
)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this video."},
{"type": "video", "video": video_path},
],
},
]
outputs = pipe(
text=messages,
num_frames=16,
max_new_tokens=50,
load_audio_from_video=True,
generate_kwargs={"use_audio_in_video": True, "talker_do_sample": False, "do_sample": False},
)
EXPECTED_CONTENT = Expectations(
{
(
"cuda",
8,
): "system\nYou are a helpful assistant.\nuser\nDescribe this video.\nassistant\nThe video begins with a man standing in a kitchen, wearing a black shirt. He is holding a large glass bowl filled with flour and a spoon. The man starts to mix the flour in the bowl, creating a dough. As he mixes, he continues to talk to the camera, explaining the process. The kitchen has wooden cabinets and a white refrigerator in the background. The man's movements are deliberate and focused as he works with the dough. The video ends with the man still mixing the dough in the bowl. Overall, the video provides a clear and detailed demonstration of how to make dough using flour and a spoon.",
(
"rocm",
(9, 4),
): "The video begins with a man standing in a kitchen, wearing a black shirt. He is positioned in front of a refrigerator and wooden cabinets. The man is speaking and gesturing with his hands, possibly explaining something or giving instructions. The kitchen appears to be well-lit and has a clean, organized appearance.\n\nAs the video progresses, the man continues to speak and gesture, maintaining his position in the kitchen. The camera remains focused on him, capturing his upper body and face. The background remains consistent, showing the refrigerator and wooden cabinets. The lighting in the kitchen stays bright, and the overall atmosphere remains calm and focused.\n\nThroughout the video, the man's movements and expressions suggest that he is engaged in a conversation or presentation. His gestures and facial expressions indicate that he is actively communicating and possibly demonstrating something related to cooking or food preparation. The kitchen setting provides a practical and relatable backdrop for his actions and words.\n\nOverall, the video depicts a man in a kitchen, speaking and gesturing while possibly explaining or demonstrating something related to cooking or food preparation. The kitchen setting, with its well-lit and organized appearance, serves as a suitable environment for his actions and words.",
}
).get_expectation()
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this video."},
{
"type": "video",
"video": "https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/Cooking_cake.mp4",
},
],
}
],
"generated_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this video."},
{
"type": "video",
"video": "https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/Cooking_cake.mp4",
},
],
},
{
"role": "assistant",
"content": EXPECTED_CONTENT,
},
],
}
],
)
outputs = pipe(text=messages, generation_mode="audio", num_frames=16, max_new_tokens=20)
self.assertEqual(len(outputs), len(messages))
self.assertIsInstance(outputs[0], dict)
for out in outputs:
self.assertTrue("input_text" in out)
self.assertTrue("generated_audio" in out)
self.assertIsInstance(out["generated_audio"], np.ndarray)
@slow
def test_small_model_pt_image_gen(self):
pipe = pipeline("any-to-any", model="deepseek-community/Janus-Pro-1B")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "A dog running under the rain."},
],
},
]
outputs = pipe(text=messages, generation_mode="image")
self.assertEqual(len(outputs), len(messages))
self.assertIsInstance(outputs[0], dict)
for out in outputs:
self.assertTrue("input_text" in out)
self.assertTrue("generated_image" in out)
self.assertIsInstance(out["generated_image"], PIL.Image.Image)

View File

@@ -0,0 +1,248 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
import numpy as np
from huggingface_hub import AudioClassificationOutputElement
from transformers import (
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
is_torch_available,
)
from transformers.pipelines import AudioClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_torchaudio,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
@is_pipeline_test
class AudioClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
cls._dataset = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
audio_classifier = AudioClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
# test with a raw waveform
audio = np.zeros((34000,))
audio2 = np.zeros((14000,))
return audio_classifier, [audio2, audio]
def run_pipeline_test(self, audio_classifier, examples):
audio2, audio = examples
output = audio_classifier(audio)
# by default a model is initialized with num_labels=2
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
output = audio_classifier(audio, top_k=1)
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
],
)
self.run_torchaudio(audio_classifier)
for single_output in output:
compare_pipeline_output_to_hub_spec(single_output, AudioClassificationOutputElement)
@require_torchaudio
def run_torchaudio(self, audio_classifier):
self._load_dataset()
# test with a local file
audio = self._dataset[0]["audio"]["array"]
output = audio_classifier(audio)
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
@require_torch
def test_small_model_pt(self):
model = "anton-l/wav2vec2-random-tiny-classifier"
audio_classifier = pipeline("audio-classification", model=model)
audio = np.ones((8000,))
output = audio_classifier(audio, top_k=4)
EXPECTED_OUTPUT = [
{"score": 0.0842, "label": "no"},
{"score": 0.0838, "label": "up"},
{"score": 0.0837, "label": "go"},
{"score": 0.0834, "label": "right"},
]
EXPECTED_OUTPUT_PT_2 = [
{"score": 0.0845, "label": "stop"},
{"score": 0.0844, "label": "on"},
{"score": 0.0841, "label": "right"},
{"score": 0.0834, "label": "left"},
]
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
audio_dict = {"array": np.ones((8000,)), "sampling_rate": audio_classifier.feature_extractor.sampling_rate}
output = audio_classifier(audio_dict, top_k=4)
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
@require_torch
def test_small_model_pt_fp16(self):
model = "anton-l/wav2vec2-random-tiny-classifier"
audio_classifier = pipeline("audio-classification", model=model, dtype=torch.float16)
audio = np.ones((8000,))
output = audio_classifier(audio, top_k=4)
# Expected outputs are collected running the test on torch 2.6 in few scenarios.
# Running on CUDA T4/A100 and on XPU PVC:
EXPECTED_OUTPUT = [
{"score": 0.0833, "label": "go"},
{"score": 0.0833, "label": "off"},
{"score": 0.0833, "label": "stop"},
{"score": 0.0833, "label": "on"},
]
# Running on CPU:
EXPECTED_OUTPUT_PT_2 = [
{"score": 0.0839, "label": "no"},
{"score": 0.0837, "label": "go"},
{"score": 0.0836, "label": "yes"},
{"score": 0.0835, "label": "right"},
]
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
audio_dict = {"array": np.ones((8000,)), "sampling_rate": audio_classifier.feature_extractor.sampling_rate}
output = audio_classifier(audio_dict, top_k=4)
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
@require_torch
@slow
def test_large_model_pt(self):
model = "superb/wav2vec2-base-superb-ks"
audio_classifier = pipeline("audio-classification", model=model)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
audio = np.array(dataset[3]["speech"], dtype=np.float32)
output = audio_classifier(audio, top_k=4)
self.assertEqual(
nested_simplify(output, decimals=3),
[
{"score": 0.981, "label": "go"},
{"score": 0.007, "label": "up"},
{"score": 0.006, "label": "_unknown_"},
{"score": 0.001, "label": "down"},
],
)
@require_torch
@slow
def test_top_k_none_returns_all_labels(self):
model_name = "superb/wav2vec2-base-superb-ks" # model with more than 5 labels
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=None,
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should return all labels when top_k is None")
@require_torch
@slow
def test_top_k_none_with_few_labels(self):
model_name = "superb/hubert-base-superb-er" # model with fewer labels
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=None,
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should handle models with fewer labels correctly")
@require_torch
@slow
def test_top_k_greater_than_labels(self):
model_name = "superb/hubert-base-superb-er"
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=100, # intentionally large number
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should cap top_k to number of labels")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,982 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import logging
import os
import sys
import tempfile
import unittest
from pathlib import Path
import datasets
from huggingface_hub import delete_repo, snapshot_download
from huggingface_hub.errors import HfHubHTTPError
from transformers import (
AutomaticSpeechRecognitionPipeline,
AutoModelForSequenceClassification,
AutoTokenizer,
DistilBertForSequenceClassification,
MaskGenerationPipeline,
TextClassificationPipeline,
TextGenerationPipeline,
pipeline,
)
from transformers.pipelines import PIPELINE_REGISTRY, get_task
from transformers.pipelines.base import Pipeline, _pad
from transformers.testing_utils import (
TOKEN,
USER,
CaptureLogger,
RequestCounter,
backend_empty_cache,
is_pipeline_test,
is_staging_test,
nested_simplify,
require_peft,
require_torch,
require_torch_accelerator,
require_torch_multi_accelerator,
slow,
torch_device,
)
from transformers.utils import direct_transformers_import, is_torch_available
from transformers.utils import logging as transformers_logging
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
from test_module.custom_pipeline import PairClassificationPipeline # noqa E402
logger = logging.getLogger(__name__)
PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent.parent, "src/transformers")
# Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
transformers_module = direct_transformers_import(PATH_TO_TRANSFORMERS)
class ANY:
def __init__(self, *_types):
self._types = _types
def __eq__(self, other):
return isinstance(other, self._types)
def __repr__(self):
return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
@is_pipeline_test
class CommonPipelineTest(unittest.TestCase):
@require_torch
def test_pipeline_iteration(self):
from torch.utils.data import Dataset
class MyDataset(Dataset):
data = [
"This is a test",
"This restaurant is great",
"This restaurant is awful",
]
def __len__(self):
return 3
def __getitem__(self, i):
return self.data[i]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
dataset = MyDataset()
for output in text_classifier(dataset):
self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
@require_torch
def test_check_task_auto_inference(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
self.assertIsInstance(pipe, TextClassificationPipeline)
@require_torch
def test_pipeline_batch_size_global(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
self.assertEqual(pipe._batch_size, None)
self.assertEqual(pipe._num_workers, None)
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert", batch_size=2, num_workers=1)
self.assertEqual(pipe._batch_size, 2)
self.assertEqual(pipe._num_workers, 1)
@require_torch
def test_pipeline_pathlike(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
with tempfile.TemporaryDirectory() as d:
pipe.save_pretrained(d)
path = Path(d)
newpipe = pipeline(task="text-classification", model=path)
self.assertIsInstance(newpipe, TextClassificationPipeline)
@require_torch
def test_pipeline_override(self):
class MyPipeline(TextClassificationPipeline):
pass
text_classifier = pipeline(model="hf-internal-testing/tiny-random-distilbert", pipeline_class=MyPipeline)
self.assertIsInstance(text_classifier, MyPipeline)
@require_torch
def test_pipeline_tokenizer_tuple_respects_use_fast_override(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-bert",
tokenizer=("hf-internal-testing/tiny-random-bert", {"use_fast": False}),
)
self.assertIsInstance(text_classifier, TextClassificationPipeline)
self.assertEqual(type(text_classifier.tokenizer).__name__, "BertTokenizer")
def test_check_task(self):
task = get_task("openai-community/gpt2")
self.assertEqual(task, "text-generation")
with self.assertRaises(RuntimeError):
# Wrong framework
get_task("espnet/siddhana_slurp_entity_asr_train_asr_conformer_raw_en_word_valid.acc.ave_10best")
@require_torch
def test_iterator_data(self):
def data(n: int):
for _ in range(n):
yield "This is a test"
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
results = []
for out in pipe(data(10)):
self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
results.append(out)
self.assertEqual(len(results), 10)
# When using multiple workers on streamable data it should still work
# This will force using `num_workers=1` with a warning for now.
results = []
for out in pipe(data(10), num_workers=2):
self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
results.append(out)
self.assertEqual(len(results), 10)
@require_torch
def test_unbatch_attentions_hidden_states(self):
model = DistilBertForSequenceClassification.from_pretrained(
"hf-internal-testing/tiny-random-distilbert", output_hidden_states=True, output_attentions=True
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-distilbert")
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
# Used to throw an error because `hidden_states` are a tuple of tensors
# instead of the expected tensor.
outputs = text_classifier(["This is great !"] * 20, batch_size=32)
self.assertEqual(len(outputs), 20)
@require_torch
def test_dtype_property(self):
import torch
model_id = "hf-internal-testing/tiny-random-distilbert"
# If dtype is specified in the pipeline constructor, the property should return that type
pipe = pipeline(model=model_id, dtype=torch.float16)
self.assertEqual(pipe.dtype, torch.float16)
# If the underlying model changes dtype, the property should return the new type
pipe.model.to(torch.bfloat16)
self.assertEqual(pipe.dtype, torch.bfloat16)
# If dtype is NOT specified in the pipeline constructor, the property should just return
# the dtype of the underlying model (default)
pipe = pipeline(model=model_id)
self.assertEqual(pipe.dtype, torch.float32)
# If underlying model doesn't have dtype property, simply return None
pipe.model = None
self.assertIsNone(pipe.dtype)
@require_torch
def test_auto_model_pipeline_registration_from_local_dir(self):
with tempfile.TemporaryDirectory() as tmp_dir:
snapshot_download("hf-internal-testing/tiny-random-custom-architecture", local_dir=tmp_dir)
pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True)
self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load
@require_peft
@require_torch
def test_pipeline_from_local_with_embedded_adapter(self):
"""
Test for issue #43746: Only overwrite the pretrained_model_name_or_path if needed with adapter.
This test ensures that when a pipeline loads from a local directory that contains a base model
with an embedded adapter (i.e., it has a config.json file), the path should NOT be overwritten
with the base_model_name_or_path from the adapter config. The fix is applied in
src/transformers/pipelines/__init__.py in the pipeline function.
"""
peft_test_model = "peft-internal-testing/tiny-OPTForCausalLM-lora"
transformers_test_model = "hf-internal-testing/tiny-random-OPTForCausalLM"
# Create a temporary directory with a complete adapter model structure
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_dir = Path(tmp_dir)
# Save the model and adapter locally
from transformers import AutoConfig, AutoModel
config = AutoConfig.from_pretrained(transformers_test_model)
model = AutoModel.from_pretrained(transformers_test_model)
adapter_model = AutoModel.from_pretrained(peft_test_model)
config.save_pretrained(tmp_dir)
model.save_pretrained(tmp_dir)
adapter_model.save_pretrained(tmp_dir)
# Overwrite the base_model_name_or_path to an invalid value that
# would cause the pipeline load to fail later
import json
from transformers.utils import ADAPTER_CONFIG_NAME
adapter_config_path = tmp_dir / ADAPTER_CONFIG_NAME
with open(adapter_config_path, "r") as handle:
adapter_config = json.load(handle)
adapter_config["base_model_name_or_path"] = "some/model/that/does/not/exist"
with open(adapter_config_path, "w") as handle:
json.dump(adapter_config, handle)
# Load from the saved path and make sure it actually loads despite
# the invalid adapter config path
pipeline("text-generation", tmp_dir)
@is_pipeline_test
@require_torch
class PipelineScikitCompatTest(unittest.TestCase):
def test_pipeline_predict(self):
data = ["This is a test"]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
expected_output = [{"label": ANY(str), "score": ANY(float)}]
actual_output = text_classifier.predict(data)
self.assertEqual(expected_output, actual_output)
def test_pipeline_transform(self):
data = ["This is a test"]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
expected_output = [{"label": ANY(str), "score": ANY(float)}]
actual_output = text_classifier.transform(data)
self.assertEqual(expected_output, actual_output)
@is_pipeline_test
class PipelinePadTest(unittest.TestCase):
@require_torch
def test_pipeline_padding(self):
import torch
items = [
{
"label": "label1",
"input_ids": torch.LongTensor([[1, 23, 24, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
},
{
"label": "label2",
"input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "right"),
torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "left"),
torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
)
)
@require_torch
def test_pipeline_image_padding(self):
import torch
items = [
{
"label": "label1",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
{
"label": "label2",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "pixel_values", 10, "right"),
torch.zeros((2, 3, 10, 10)),
)
)
@require_torch
def test_pipeline_offset_mapping(self):
import torch
items = [
{
"offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
},
{
"offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
},
]
self.assertTrue(
torch.allclose(
_pad(items, "offset_mappings", 0, "right"),
torch.zeros((2, 11, 2), dtype=torch.long),
),
)
@is_pipeline_test
class PipelineUtilsTest(unittest.TestCase):
@require_torch
def test_pipeline_dataset(self):
from transformers.pipelines.pt_utils import PipelineDataset
dummy_dataset = [0, 1, 2, 3]
def add(number, extra=0):
return number + extra
dataset = PipelineDataset(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
outputs = [dataset[i] for i in range(4)]
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_iterator(self):
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [0, 1, 2, 3]
def add(number, extra=0):
return number + extra
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_iterator_no_len(self):
from transformers.pipelines.pt_utils import PipelineIterator
def dummy_dataset():
yield from range(4)
def add(number, extra=0):
return number + extra
dataset = PipelineIterator(dummy_dataset(), add, {"extra": 2})
with self.assertRaises(TypeError):
len(dataset)
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_batch_unbatch_iterator(self):
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [{"id": [0, 1, 2]}, {"id": [3]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]]}
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
@require_torch
def test_pipeline_batch_unbatch_iterator_tensors(self):
import torch
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [{"id": torch.LongTensor([[10, 20], [0, 1], [0, 2]])}, {"id": torch.LongTensor([[3]])}]
def add(number, extra=0):
return {"id": number["id"] + extra}
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(
nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
)
@require_torch
def test_pipeline_chunk_iterator(self):
from transformers.pipelines.pt_utils import PipelineChunkIterator
def preprocess_chunk(n: int):
yield from range(n)
dataset = [2, 3]
dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [0, 1, 0, 1, 2])
@require_torch
def test_pipeline_pack_iterator(self):
from transformers.pipelines.pt_utils import PipelinePackIterator
def pack(item):
return {"id": item["id"] + 1, "is_last": item["is_last"]}
dataset = [
{"id": 0, "is_last": False},
{"id": 1, "is_last": True},
{"id": 0, "is_last": False},
{"id": 1, "is_last": False},
{"id": 2, "is_last": True},
]
dataset = PipelinePackIterator(dataset, pack, {})
outputs = list(dataset)
self.assertEqual(
outputs,
[
[
{"id": 1},
{"id": 2},
],
[
{"id": 1},
{"id": 2},
{"id": 3},
],
],
)
@require_torch
def test_pipeline_pack_unbatch_iterator(self):
from transformers.pipelines.pt_utils import PipelinePackIterator
dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, True, False]}, {"id": [3], "is_last": [True]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]])
# is_false Across batch
dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, False, False]}, {"id": [3], "is_last": [True]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
def test_pipeline_negative_device(self):
# To avoid regressing, pipeline used to accept device=-1
classifier = pipeline("text-generation", "hf-internal-testing/tiny-random-bert", device=-1)
expected_output = [{"generated_text": ANY(str)}]
actual_output = classifier("Test input.")
self.assertEqual(expected_output, actual_output)
@require_torch_accelerator
def test_pipeline_no_device(self):
# Test when no device is passed to pipeline
import torch
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
# Case 1: Model is manually moved to device
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-bert", dtype=torch.float16).to(
torch_device
)
model_device = model.device
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, model_device)
# Case 2: Model is loaded by accelerate
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-bert", device_map=torch_device, dtype=torch.float16
)
model_device = model.device
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, model_device)
# Case 3: device_map is passed to model and device is passed to pipeline
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-bert", device_map=torch_device, dtype=torch.float16
)
with self.assertRaises(ValueError):
pipe = pipeline("text-generation", model=model, device="cpu", tokenizer=tokenizer)
@require_torch_multi_accelerator
def test_pipeline_device_not_equal_model_device(self):
# Test when device ids are different, pipeline should move the model to the passed device id
import torch
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
model_device = f"{torch_device}:1"
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-bert", dtype=torch.float16).to(
model_device
)
target_device = f"{torch_device}:0"
self.assertNotEqual(model_device, target_device)
pipe = pipeline("text-generation", model=model, device=target_device, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, torch.device(target_device))
@slow
@require_torch
def test_load_default_pipelines_pt(self):
import torch
from transformers.pipelines import SUPPORTED_TASKS
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
for task in SUPPORTED_TASKS:
if task == "table-question-answering":
# test table in separate test due to more dependencies
continue
self.check_default_pipeline(task, set_seed_fn, self.check_models_equal_pt)
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
backend_empty_cache(torch_device)
@slow
@require_torch
def test_load_default_pipelines_pt_table_qa(self):
import torch
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
self.check_default_pipeline("table-question-answering", set_seed_fn, self.check_models_equal_pt)
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
backend_empty_cache(torch_device)
@slow
@require_torch
@require_torch_accelerator
def test_pipeline_accelerator(self):
pipe = pipeline("text-generation", device=torch_device)
_ = pipe("Hello")
@slow
@require_torch
@require_torch_accelerator
def test_pipeline_accelerator_indexed(self):
pipe = pipeline("text-generation", device=torch_device)
_ = pipe("Hello")
@slow
@require_torch
def test_bc_torch_device(self):
import torch
from transformers.pipelines import get_supported_tasks
for task in get_supported_tasks():
# Check that it works for all dtypes
for dtype in ["float16", "bfloat16", "float32", "auto", torch.float16, torch.bfloat16, torch.float32]:
pipe_torch_dtype = pipeline(task, torch_dtype=dtype)
pipe_dtype = pipeline(task, dtype=dtype)
# Make sure all parameters have the same dtype
for (k1, v1), (k2, v2) in zip(
pipe_torch_dtype.model.named_parameters(), pipe_dtype.model.named_parameters()
):
self.assertEqual(k1, k2)
self.assertEqual(v1.dtype, v2.dtype)
pipe_torch_dtype = pipeline(task, model_kwargs={"torch_dtype": dtype})
pipe_dtype = pipeline(task, model_kwargs={"dtype": dtype})
# Make sure all parameters have the same dtype
for (k1, v1), (k2, v2) in zip(
pipe_torch_dtype.model.named_parameters(), pipe_dtype.model.named_parameters()
):
self.assertEqual(k1, k2)
self.assertEqual(v1.dtype, v2.dtype)
def check_default_pipeline(self, task, set_seed_fn, check_models_equal_fn):
from transformers.pipelines import SUPPORTED_TASKS, pipeline
task_dict = SUPPORTED_TASKS[task]
# test to compare pipeline to manually loading the respective model
model = None
relevant_auto_classes = task_dict["pt"]
if len(relevant_auto_classes) == 0:
# task has no default
self.skipTest(f"{task} in pytorch has no default")
# by default use first class
auto_model_cls = relevant_auto_classes[0]
# retrieve correct model ids
model_id, revision = task_dict["default"]["model"]
model_ids = [model_id]
revisions = [revision]
tasks = [task]
# check for equality
for model_id, revision, task in zip(model_ids, revisions, tasks):
# load default model
try:
set_seed_fn()
model = auto_model_cls.from_pretrained(model_id, revision=revision)
except ValueError:
# first auto class is possible not compatible with model, go to next model class
auto_model_cls = relevant_auto_classes[1]
set_seed_fn()
model = auto_model_cls.from_pretrained(model_id, revision=revision)
# load default pipeline
set_seed_fn()
default_pipeline = pipeline(task)
# compare pipeline model with default model
models_are_equal = check_models_equal_fn(default_pipeline.model, model)
self.assertTrue(models_are_equal, f"{task} model doesn't match pipeline.")
def check_models_equal_pt(self, model1, model2):
models_are_equal = True
for model1_p, model2_p in zip(model1.parameters(), model2.parameters()):
if model1_p.data.ne(model2_p.data).sum() > 0:
models_are_equal = False
return models_are_equal
class CustomPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
preprocess_kwargs = {}
if "maybe_arg" in kwargs:
preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
return preprocess_kwargs, {}, {}
def preprocess(self, text, maybe_arg=2):
input_ids = self.tokenizer(text, return_tensors="pt")
return input_ids
def _forward(self, model_inputs):
outputs = self.model(**model_inputs)
return outputs
def postprocess(self, model_outputs):
return model_outputs["logits"].softmax(-1).numpy()
@is_pipeline_test
class CustomPipelineTest(unittest.TestCase):
def test_warning_logs(self):
transformers_logging.set_verbosity_debug()
logger_ = transformers_logging.get_logger("transformers.pipelines.base")
alias = "text-classification"
# Get the original task, so we can restore it at the end.
# (otherwise the subsequential tests in `TextClassificationPipelineTests` will fail)
_, original_task, _ = PIPELINE_REGISTRY.check_task(alias)
try:
with CaptureLogger(logger_) as cm:
PIPELINE_REGISTRY.register_pipeline(alias, PairClassificationPipeline)
self.assertIn(f"{alias} is already registered", cm.out)
finally:
# restore
PIPELINE_REGISTRY.supported_tasks[alias] = original_task
def test_register_pipeline(self):
PIPELINE_REGISTRY.register_pipeline(
"custom-text-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification if is_torch_available() else None,
default={"model": ("hf-internal-testing/tiny-random-distilbert", "2ef615d")},
type="text",
)
assert "custom-text-classification" in PIPELINE_REGISTRY.get_supported_tasks()
_, task_def, _ = PIPELINE_REGISTRY.check_task("custom-text-classification")
self.assertEqual(task_def["pt"], (AutoModelForSequenceClassification,) if is_torch_available() else ())
self.assertEqual(task_def["type"], "text")
self.assertEqual(task_def["impl"], PairClassificationPipeline)
self.assertEqual(task_def["default"], {"model": ("hf-internal-testing/tiny-random-distilbert", "2ef615d")})
# Clean registry for next tests.
del PIPELINE_REGISTRY.supported_tasks["custom-text-classification"]
@require_torch
def test_dynamic_pipeline(self):
PIPELINE_REGISTRY.register_pipeline(
"pair-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification if is_torch_available() else None,
)
classifier = pipeline("pair-classification", model="hf-internal-testing/tiny-random-bert")
# Clean registry as we won't need the pipeline to be in it for the rest to work.
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
with tempfile.TemporaryDirectory() as tmp_dir:
classifier.save_pretrained(tmp_dir)
# checks
self.assertDictEqual(
classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": "custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",) if is_torch_available() else (),
}
},
)
# Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError):
_ = pipeline(model=tmp_dir)
new_classifier = pipeline(model=tmp_dir, trust_remote_code=True)
# Using trust_remote_code=False forces the traditional pipeline tag
old_classifier = pipeline("text-classification", model=tmp_dir, trust_remote_code=False)
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
self.assertEqual(new_classifier.task, "pair-classification")
results = new_classifier("I hate you", second_text="I love you")
self.assertDictEqual(
nested_simplify(results),
{"label": "LABEL_0", "score": 0.505, "logits": [-0.003, -0.024]},
)
self.assertEqual(old_classifier.__class__.__name__, "TextClassificationPipeline")
self.assertEqual(old_classifier.task, "text-classification")
results = old_classifier("I hate you", text_pair="I love you")
self.assertListEqual(
nested_simplify(results),
[{"label": "LABEL_0", "score": 0.505}],
)
@require_torch
def test_cached_pipeline_has_minimum_calls_to_head(self):
# Make sure we have cached the pipeline.
_ = pipeline("text-classification", model="hf-internal-testing/tiny-random-bert")
with RequestCounter() as counter:
_ = pipeline("text-classification", model="hf-internal-testing/tiny-random-bert")
self.assertEqual(counter["GET"], 0)
self.assertEqual(counter["HEAD"], 1)
self.assertEqual(counter.total_calls, 1)
@require_torch
def test_chunk_pipeline_batching_single_file(self):
# Make sure we have cached the pipeline.
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
audio = ds[40]["audio"]["array"]
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
# For some reason scoping doesn't work if not using `self.`
self.COUNT = 0
forward = pipe.model.forward
def new_forward(*args, **kwargs):
self.COUNT += 1
return forward(*args, **kwargs)
pipe.model.forward = new_forward
for out in pipe(audio, return_timestamps="char", chunk_length_s=3, stride_length_s=[1, 1], batch_size=1024):
pass
self.assertEqual(self.COUNT, 1)
@require_torch
def test_custom_code_with_string_tokenizer(self):
# This test checks for an edge case - tokenizer loading used to fail when using a custom code model
# with a separate tokenizer that was passed as a repo name rather than a tokenizer object.
# See https://github.com/huggingface/transformers/issues/31669
text_generator = pipeline(
"text-generation",
model="hf-internal-testing/tiny-random-custom-architecture",
tokenizer="hf-internal-testing/tiny-random-custom-architecture",
trust_remote_code=True,
)
self.assertIsInstance(text_generator, TextGenerationPipeline) # Assert successful loading
@require_torch
def test_custom_code_with_string_feature_extractor(self):
speech_recognizer = pipeline(
"automatic-speech-recognition",
model="hf-internal-testing/fake-custom-wav2vec2",
feature_extractor="hf-internal-testing/fake-custom-wav2vec2",
tokenizer="facebook/wav2vec2-base-960h", # Test workaround - the pipeline requires a tokenizer
trust_remote_code=True,
)
self.assertIsInstance(speech_recognizer, AutomaticSpeechRecognitionPipeline) # Assert successful loading
@require_torch
def test_custom_code_with_string_preprocessor(self):
mask_generator = pipeline(
"mask-generation",
model="hf-internal-testing/fake-custom-sam",
processor="hf-internal-testing/fake-custom-sam",
trust_remote_code=True,
)
self.assertIsInstance(mask_generator, MaskGenerationPipeline) # Assert successful loading
@require_torch
@is_staging_test
class DynamicPipelineTester(unittest.TestCase):
vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "I", "love", "hate", "you"]
@classmethod
def setUpClass(cls):
cls._token = TOKEN
@classmethod
def tearDownClass(cls):
try:
delete_repo(token=cls._token, repo_id="test-dynamic-pipeline")
except HfHubHTTPError:
pass
@unittest.skip("Broken, TODO @Yih-Dar")
def test_push_to_hub_dynamic_pipeline(self):
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
PIPELINE_REGISTRY.register_pipeline(
"pair-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification,
)
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertForSequenceClassification(config).eval()
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
classifier = pipeline("pair-classification", model=model, tokenizer=tokenizer)
# Clean registry as we won't need the pipeline to be in it for the rest to work.
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
classifier.save_pretrained(tmp_dir)
# checks if the configuration has been added after calling the save_pretrained method
self.assertDictEqual(
classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": "custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
}
},
)
# use push_to_hub method to push the pipeline
classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
# Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError):
_ = pipeline(model=f"{USER}/test-dynamic-pipeline")
new_classifier = pipeline(model=f"{USER}/test-dynamic-pipeline", trust_remote_code=True)
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
# check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub
# useful for cases such as finetuning
self.assertDictEqual(
new_classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
}
},
)
# test if the pipeline still works after the model is finetuned
# (we are actually testing if the pipeline still works from the final repo)
# this is where the user/repo--module.class is used for
new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token)
del new_classifier # free up memory
new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True)
results = classifier("I hate you", second_text="I love you")
new_results = new_classifier("I hate you", second_text="I love you")
self.assertDictEqual(nested_simplify(results), nested_simplify(new_results))
# Using trust_remote_code=False forces the traditional pipeline tag
old_classifier = pipeline(
"text-classification", model=f"{USER}/test-dynamic-pipeline", trust_remote_code=False
)
self.assertEqual(old_classifier.__class__.__name__, "TextClassificationPipeline")
self.assertEqual(old_classifier.task, "text-classification")
new_results = old_classifier("I hate you", text_pair="I love you")
self.assertListEqual(
nested_simplify([{"label": results["label"], "score": results["score"]}]), nested_simplify(new_results)
)

View File

@@ -0,0 +1,161 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import DepthEstimationOutput
from huggingface_hub.utils import insecure_hashlib
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
from transformers.pipelines import DepthEstimationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class DepthEstimationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
depth_estimator = DepthEstimationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return depth_estimator, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
def run_pipeline_test(self, depth_estimator, examples):
self._load_dataset()
outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs)
outputs = depth_estimator(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
)
self.assertEqual(
[
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
],
outputs,
)
for single_output in outputs:
compare_pipeline_output_to_hub_spec(single_output, DepthEstimationOutput)
@slow
@require_torch
def test_large_model_pt(self):
model_id = "Intel/dpt-large"
depth_estimator = pipeline("depth-estimation", model=model_id)
outputs = depth_estimator("http://images.cocodataset.org/val2017/000000039769.jpg")
outputs["depth"] = hashimage(outputs["depth"])
# This seems flaky.
# self.assertEqual(outputs["depth"], "1a39394e282e9f3b0741a90b9f108977")
self.assertEqual(nested_simplify(outputs["predicted_depth"].max().item()), 29.306)
self.assertEqual(nested_simplify(outputs["predicted_depth"].min().item()), 2.662)
@require_torch
def test_small_model_pt(self):
# This is highly irregular to have no small tests.
self.skipTest(reason="There is not hf-internal-testing tiny model for either GLPN nor DPT")
@require_torch
def test_multiprocess(self):
depth_estimator = pipeline(
model="hf-internal-testing/tiny-random-DepthAnythingForDepthEstimation",
num_workers=2,
)
outputs = depth_estimator(
[
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
)
self.assertEqual(
[
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
],
outputs,
)

View File

@@ -0,0 +1,424 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
AutoTokenizer,
is_torch_available,
is_vision_available,
)
from transformers.pipelines import DocumentQuestionAnsweringPipeline, pipeline
from transformers.pipelines.document_question_answering import apply_tesseract
from transformers.testing_utils import (
is_pipeline_test,
nested_simplify,
require_detectron2,
require_pytesseract,
require_torch,
require_torch_bf16,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers.image_utils import load_image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def load_image(_):
return None
# This is a pinned image from a specific revision of a document question answering space, hosted by HuggingFace,
# so we can expect it to be available.
INVOICE_URL = (
"https://huggingface.co/spaces/impira/docquery/resolve/2f6c96314dc84dfda62d40de9da55f2f5165d403/invoice.png"
)
@is_pipeline_test
@require_torch
@require_vision
class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING
@require_pytesseract
@require_vision
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
dqa_pipeline = DocumentQuestionAnsweringPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
image = INVOICE_URL
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
question = "What is the placebo?"
examples = [
{
"image": load_image(image),
"question": question,
},
{
"image": image,
"question": question,
},
{
"image": image,
"question": question,
"word_boxes": word_boxes,
},
]
return dqa_pipeline, examples
def run_pipeline_test(self, dqa_pipeline, examples):
outputs = dqa_pipeline(examples, top_k=2)
self.assertEqual(
outputs,
[
[
{"score": ANY(float), "answer": ANY(str), "start": ANY(int), "end": ANY(int)},
{"score": ANY(float), "answer": ANY(str), "start": ANY(int), "end": ANY(int)},
]
]
* 3,
)
@require_torch
@require_detectron2
@require_pytesseract
def test_small_model_pt(self):
dqa_pipeline = pipeline(
"document-question-answering", model="hf-internal-testing/tiny-random-layoutlmv2-for-dqa-test"
)
image = INVOICE_URL
question = "How many cats are there?"
expected_output = [
{"score": 0.0001, "answer": "oy 2312/2019", "start": 38, "end": 39},
{"score": 0.0001, "answer": "oy 2312/2019 DUE", "start": 38, "end": 40},
]
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
# This image does not detect ANY text in it, meaning layoutlmv2 should fail.
# Empty answer probably
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(outputs, [])
# We can optionally pass directly the words and bounding boxes
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
words = []
boxes = []
outputs = dqa_pipeline(image=image, question=question, words=words, boxes=boxes, top_k=2)
self.assertEqual(outputs, [])
@require_torch
@require_torch_bf16
@require_detectron2
@require_pytesseract
def test_small_model_pt_bf16(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="hf-internal-testing/tiny-random-layoutlmv2-for-dqa-test",
dtype=torch.bfloat16,
)
image = INVOICE_URL
question = "How many cats are there?"
expected_output = [
{"score": 0.0001, "answer": "oy 2312/2019", "start": 38, "end": 39},
{"score": 0.0001, "answer": "oy 2312/2019 DUE", "start": 38, "end": 40},
]
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
# This image does not detect ANY text in it, meaning layoutlmv2 should fail.
# Empty answer probably
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(outputs, [])
# We can optionally pass directly the words and bounding boxes
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
words = []
boxes = []
outputs = dqa_pipeline(image=image, question=question, words=words, boxes=boxes, top_k=2)
self.assertEqual(outputs, [])
# TODO: Enable this once hf-internal-testing/tiny-random-donut is implemented
# @require_torch
# def test_small_model_pt_donut(self):
# dqa_pipeline = pipeline("document-question-answering", model="hf-internal-testing/tiny-random-donut")
# # dqa_pipeline = pipeline("document-question-answering", model="../tiny-random-donut")
# image = "https://templates.invoicehome.com/invoice-template-us-neat-750px.png"
# question = "How many cats are there?"
#
# outputs = dqa_pipeline(image=image, question=question, top_k=2)
# self.assertEqual(
# nested_simplify(outputs, decimals=4), [{"score": 0.8799, "answer": "2"}, {"score": 0.296, "answer": "1"}]
# )
@slow
@require_torch
@require_detectron2
@require_pytesseract
def test_large_model_pt(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa",
revision="9977165",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
]
* 2,
)
@slow
@require_torch
@require_detectron2
@require_pytesseract
def test_large_model_pt_chunk(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa",
revision="9977165",
max_seq_len=50,
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
]
]
* 2,
)
@slow
@require_torch
@require_pytesseract
@require_vision
def test_large_model_pt_layoutlm(self):
tokenizer = AutoTokenizer.from_pretrained(
"impira/layoutlm-document-qa", revision="3dc6de3", add_prefix_space=True
)
dqa_pipeline = pipeline(
"document-question-answering",
model="impira/layoutlm-document-qa",
tokenizer=tokenizer,
revision="3dc6de3",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
]
]
* 2,
)
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
# This model should also work if `image` is set to None
outputs = dqa_pipeline({"image": None, "word_boxes": word_boxes, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
@slow
@require_torch
@require_pytesseract
@require_vision
def test_large_model_pt_layoutlm_chunk(self):
tokenizer = AutoTokenizer.from_pretrained(
"impira/layoutlm-document-qa", revision="3dc6de3", add_prefix_space=True
)
dqa_pipeline = pipeline(
"document-question-answering",
model="impira/layoutlm-document-qa",
tokenizer=tokenizer,
revision="3dc6de3",
max_seq_len=50,
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
]
]
* 2,
)
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
# This model should also work if `image` is set to None
outputs = dqa_pipeline({"image": None, "word_boxes": word_boxes, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
],
)
@slow
@require_torch
def test_large_model_pt_donut(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="naver-clova-ix/donut-base-finetuned-docvqa",
tokenizer=AutoTokenizer.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa"),
image_processor="naver-clova-ix/donut-base-finetuned-docvqa",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), [{"answer": "us-001"}])

View File

@@ -0,0 +1,158 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from transformers import (
FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
MODEL_MAPPING,
FeatureExtractionPipeline,
LxmertConfig,
is_torch_available,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch
if is_torch_available():
import torch
@is_pipeline_test
class FeatureExtractionPipelineTests(unittest.TestCase):
model_mapping = MODEL_MAPPING
@require_torch
def test_small_model_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
outputs = feature_extractor("This is a test")
self.assertEqual(
nested_simplify(outputs),
[[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip
@require_torch
def test_tokenization_small_model_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
# test with empty parameters
outputs = feature_extractor("This is a test")
self.assertEqual(
nested_simplify(outputs),
[[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip
# test with various tokenizer parameters
tokenize_kwargs = {"max_length": 3}
outputs = feature_extractor("This is a test", tokenize_kwargs=tokenize_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (3, 32))
tokenize_kwargs = {"truncation": True, "padding": True, "max_length": 4}
outputs = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
tokenize_kwargs=tokenize_kwargs,
)
self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32))
tokenize_kwargs = {"padding": True, "max_length": 4}
outputs = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
truncation=True,
tokenize_kwargs=tokenize_kwargs,
)
self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32))
# raise value error if truncation parameter given for two places
tokenize_kwargs = {"truncation": True}
with self.assertRaises(ValueError):
_ = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
truncation=True,
tokenize_kwargs=tokenize_kwargs,
)
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
outputs = feature_extractor("This is a test", return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
def get_shape(self, input_, shape=None):
if shape is None:
shape = []
if isinstance(input_, list):
subshapes = [self.get_shape(in_, shape) for in_ in input_]
if all(s == 0 for s in subshapes):
shape.append(len(input_))
else:
subshape = subshapes[0]
shape = [len(input_), *subshape]
elif isinstance(input_, float):
return 0
else:
raise TypeError("We expect lists of floats, nothing else")
return shape
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if tokenizer is None:
self.skipTest(reason="No tokenizer")
elif (
type(model.config) in FEATURE_EXTRACTOR_MAPPING
or isinstance(model.config, LxmertConfig)
or type(model.config) in IMAGE_PROCESSOR_MAPPING
):
self.skipTest(
reason="This is a bimodal model, we need to find a more consistent way to switch on those models."
)
elif model.config.is_encoder_decoder:
self.skipTest(
"""encoder_decoder models are trickier for this pipeline.
Do we want encoder + decoder inputs to get some features?
Do we want encoder only features ?
For now ignore those.
"""
)
feature_extractor_pipeline = FeatureExtractionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return feature_extractor_pipeline, ["This is a test", "This is another test"]
def run_pipeline_test(self, feature_extractor, examples):
outputs = feature_extractor("This is a test")
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 1)
# If we send too small input
# there's a bug within FunnelModel (output with shape [1, 4, 2, 1] doesn't match the broadcast shape [1, 4, 2, 2])
outputs = feature_extractor(["This is a test", "Another longer test"])
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 2)
outputs = feature_extractor("This is a test" * 100, truncation=True)
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 1)

View File

@@ -0,0 +1,412 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
from transformers import MODEL_FOR_MASKED_LM_MAPPING, FillMaskPipeline, pipeline
from transformers.pipelines import PipelineException
from transformers.testing_utils import (
backend_empty_cache,
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from .test_pipelines_common import ANY
@is_pipeline_test
class FillMaskPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_MASKED_LM_MAPPING
def tearDown(self):
super().tearDown()
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
if is_torch_available():
backend_empty_cache(torch_device)
@require_torch
def test_small_model_pt(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2)
outputs = unmasker("My name is <mask>")
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{"sequence": "My name is Maul", "score": 2.2e-05, "token": 35676, "token_str": " Maul"},
{"sequence": "My name isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
],
)
outputs = unmasker("The largest city in France is <mask>")
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{
"sequence": "The largest city in France is Maul",
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
},
{"sequence": "The largest city in France isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
],
)
outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{"sequence": "My name is Patrick", "score": 2.1e-05, "token": 3499, "token_str": " Patrick"},
{"sequence": "My name is Te", "score": 2e-05, "token": 2941, "token_str": " Te"},
{"sequence": "My name is Clara", "score": 2e-05, "token": 13606, "token_str": " Clara"},
],
)
outputs = unmasker("My name is <mask> <mask>", top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
[
{
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
"sequence": "<s>My name is Maul<mask></s>",
},
{"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name isELS<mask></s>"},
],
[
{
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
"sequence": "<s>My name is<mask> Maul</s>",
},
{"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name is<mask>ELS</s>"},
],
],
)
@require_torch_accelerator
def test_fp16_casting(self):
pipe = pipeline(
"fill-mask",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
)
# convert model to fp16
pipe.model.half()
response = pipe("Paris is the [MASK] of France.")
# We actually don't care about the result, we just want to make sure
# it works, meaning the float16 tensor got casted back to float32
# for postprocessing.
self.assertIsInstance(response, list)
@slow
@require_torch
def test_large_model_pt(self):
unmasker = pipeline(task="fill-mask", model="distilbert/distilroberta-base", top_k=2)
self.run_large_test(unmasker)
def run_large_test(self, unmasker):
outputs = unmasker("My name is <mask>")
self.assertEqual(
nested_simplify(outputs),
[
{"sequence": "My name is John", "score": 0.008, "token": 610, "token_str": " John"},
{"sequence": "My name is Chris", "score": 0.007, "token": 1573, "token_str": " Chris"},
],
)
outputs = unmasker("The largest city in France is <mask>")
self.assertEqual(
nested_simplify(outputs),
[
{
"sequence": "The largest city in France is Paris",
"score": 0.251,
"token": 2201,
"token_str": " Paris",
},
{
"sequence": "The largest city in France is Lyon",
"score": 0.214,
"token": 12790,
"token_str": " Lyon",
},
],
)
outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
self.assertEqual(
nested_simplify(outputs),
[
{"sequence": "My name is Patrick", "score": 0.005, "token": 3499, "token_str": " Patrick"},
{"sequence": "My name is Clara", "score": 0.000, "token": 13606, "token_str": " Clara"},
{"sequence": "My name is Te", "score": 0.000, "token": 2941, "token_str": " Te"},
],
)
dummy_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit," * 100
outputs = unmasker(
"My name is <mask>" + dummy_str,
tokenizer_kwargs={"truncation": True},
)
simplified = nested_simplify(outputs, decimals=4)
self.assertEqual(
[{"sequence": x["sequence"][:100]} for x in simplified],
[
{"sequence": f"My name is,{dummy_str}"[:100]},
{"sequence": f"My name is:,{dummy_str}"[:100]},
],
)
self.assertEqual(
[{k: x[k] for k in x if k != "sequence"} for x in simplified],
[
{"score": 0.2819, "token": 6, "token_str": ","},
{"score": 0.0954, "token": 46686, "token_str": ":,"},
],
)
@require_torch
def test_model_no_pad_pt(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base")
unmasker.tokenizer.pad_token_id = None
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker, [])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
fill_masker = FillMaskPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
examples = [
f"This is another {tokenizer.mask_token} test",
]
return fill_masker, examples
def run_pipeline_test(self, fill_masker, examples):
tokenizer = fill_masker.tokenizer
model = fill_masker.model
outputs = fill_masker(
f"This is a {tokenizer.mask_token}",
)
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
outputs = fill_masker([f"This is a {tokenizer.mask_token}"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
outputs = fill_masker([f"This is a {tokenizer.mask_token}", f"Another {tokenizer.mask_token} great test."])
self.assertEqual(
outputs,
[
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
],
)
with self.assertRaises(ValueError):
fill_masker([None])
# No mask_token is not supported
with self.assertRaises(PipelineException):
fill_masker("This is")
self.run_test_top_k(model, tokenizer)
self.run_test_targets(model, tokenizer)
self.run_test_top_k_targets(model, tokenizer)
self.fill_mask_with_duplicate_targets_and_top_k(model, tokenizer)
self.fill_mask_with_multiple_masks(model, tokenizer)
def run_test_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
targets = sorted(vocab.keys())[:2]
# Pipeline argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual({el["token"] for el in outputs}, target_ids)
processed_targets = [tokenizer.decode([x]) for x in target_ids]
self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
# Call argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual({el["token"] for el in outputs}, target_ids)
processed_targets = [tokenizer.decode([x]) for x in target_ids]
self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
# Score equivalence
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
tokens = [top_mask["token_str"] for top_mask in outputs]
scores = [top_mask["score"] for top_mask in outputs]
# For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
if set(tokens) == set(targets):
unmasked_targets = fill_masker(f"This is a {tokenizer.mask_token}", targets=tokens)
target_scores = [top_mask["score"] for top_mask in unmasked_targets]
self.assertEqual(nested_simplify(scores), nested_simplify(target_scores))
# Raises with invalid
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[])
# For some tokenizers, `""` is actually in the vocabulary and the expected error won't raised
if "" not in tokenizer.get_vocab():
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[""])
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets="")
def run_test_top_k(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, top_k=2)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2)
self.assertEqual(
outputs2,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
def run_test_top_k_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
# top_k=2, ntargets=3
targets = sorted(vocab.keys())[:3]
outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets)
# If we use the most probably targets, and filter differently, we should still
# have the same results
targets2 = [el["token_str"] for el in sorted(outputs, key=lambda x: x["score"], reverse=True)]
# For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
if set(targets2).issubset(targets):
outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=3, targets=targets2)
# They should yield exactly the same result
self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
def fill_mask_with_duplicate_targets_and_top_k(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
vocab = tokenizer.get_vocab()
# String duplicates + id duplicates
targets = sorted(vocab.keys())[:3]
targets = [targets[0], targets[1], targets[0], targets[2], targets[1]]
outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10)
# The target list contains duplicates, so we can't output more
# than them
self.assertEqual(len(outputs), 3)
def fill_mask_with_multiple_masks(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs = fill_masker(
f"This is a {tokenizer.mask_token} {tokenizer.mask_token} {tokenizer.mask_token}", top_k=2
)
self.assertEqual(
outputs,
[
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
],
)

View File

@@ -0,0 +1,297 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import ImageClassificationOutputElement
from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
PreTrainedTokenizerBase,
is_torch_available,
is_vision_available,
)
from transformers.pipelines import ImageClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_torch
@require_vision
class ImageClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_classifier = ImageClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
top_k=2,
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
return image_classifier, examples
def run_pipeline_test(self, image_classifier, examples):
self._load_dataset()
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual(
outputs,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
# Accepts URL + PIL.Image + lists
outputs = image_classifier(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
)
self.assertEqual(
outputs,
[
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
],
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageClassificationOutputElement)
@require_torch
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-vit"
image_classifier = pipeline("image-classification", model=small_model)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
outputs = image_classifier(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
top_k=2,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
],
)
def test_custom_tokenizer(self):
tokenizer = PreTrainedTokenizerBase()
# Assert that the pipeline can be initialized with a feature extractor that is not in any mapping
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", tokenizer=tokenizer
)
self.assertIs(image_classifier.tokenizer, tokenizer)
@require_torch
def test_torch_float16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", dtype=torch.float16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
@require_torch
def test_torch_bfloat16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", dtype=torch.bfloat16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
@slow
@require_torch
def test_perceiver(self):
# Perceiver is not tested by `run_pipeline_test` properly.
# That is because the type of feature_extractor and model preprocessor need to be kept
# in sync, which is not the case in the current design
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-conv")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.4385, "label": "tabby, tabby cat"},
{"score": 0.321, "label": "tiger cat"},
{"score": 0.0502, "label": "Egyptian cat"},
{"score": 0.0137, "label": "crib, cot"},
{"score": 0.007, "label": "radiator"},
],
)
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-fourier")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.5658, "label": "tabby, tabby cat"},
{"score": 0.1309, "label": "tiger cat"},
{"score": 0.0722, "label": "Egyptian cat"},
{"score": 0.0707, "label": "remote control, remote"},
{"score": 0.0082, "label": "computer keyboard, keypad"},
],
)
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-learned")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.3022, "label": "tabby, tabby cat"},
{"score": 0.2362, "label": "Egyptian cat"},
{"score": 0.1856, "label": "tiger cat"},
{"score": 0.0324, "label": "remote control, remote"},
{"score": 0.0096, "label": "quilt, comforter, comfort, puff"},
],
)
@slow
@require_torch
def test_multilabel_classification(self):
small_model = "hf-internal-testing/tiny-random-vit"
# Sigmoid is applied for multi-label classification
image_classifier = pipeline("image-classification", model=small_model)
image_classifier.model.config.problem_type = "multi_label_classification"
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
)
outputs = image_classifier(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
],
)
@slow
@require_torch
def test_function_to_apply(self):
small_model = "hf-internal-testing/tiny-random-vit"
# Sigmoid is applied for multi-label classification
image_classifier = pipeline("image-classification", model=small_model)
outputs = image_classifier(
"http://images.cocodataset.org/val2017/000000039769.jpg",
function_to_apply="sigmoid",
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
)

View File

@@ -0,0 +1,139 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import pytest
from transformers import (
MODEL_MAPPING,
TOKENIZER_MAPPING,
ImageFeatureExtractionPipeline,
is_torch_available,
is_vision_available,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@is_pipeline_test
class ImageFeatureExtractionPipelineTests(unittest.TestCase):
model_mapping = MODEL_MAPPING
@require_torch
def test_small_model_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
img = prepare_img()
outputs = feature_extractor(img)
self.assertEqual(
nested_simplify(outputs[0][0]),
[-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981]) # fmt: skip
@require_torch
def test_small_model_w_pooler_pt(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler"
)
img = prepare_img()
outputs = feature_extractor(img, pool=True)
self.assertEqual(
nested_simplify(outputs[0]),
[-0.056, 0.083, 0.021, 0.038, 0.242, -0.279, -0.033, -0.003, 0.200, -0.192, 0.045, -0.095, -0.077, 0.017, -0.058, -0.063, -0.029, -0.204, 0.014, 0.042, 0.305, -0.205, -0.099, 0.146, -0.287, 0.020, 0.168, -0.052, 0.046, 0.048, -0.156, 0.093]) # fmt: skip
@require_torch
def test_image_processing_small_model_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
# test with image processor parameters
image_processor_kwargs = {"size": {"height": 300, "width": 300}}
img = prepare_img()
with pytest.raises(ValueError):
# Image doesn't match model input size
feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
image_processor_kwargs = {"image_mean": [0, 0, 0], "image_std": [1, 1, 1]}
img = prepare_img()
outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (226, 32))
# Test pooling option
outputs = feature_extractor(img, pool=True)
self.assertEqual(np.squeeze(outputs).shape, (32,))
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
img = prepare_img()
outputs = feature_extractor(img, return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if image_processor is None:
self.skipTest(reason="No image processor")
elif type(model.config) in TOKENIZER_MAPPING:
self.skipTest(
reason="This is a bimodal model, we need to find a more consistent way to switch on those models."
)
elif model.config.is_encoder_decoder:
self.skipTest(
"""encoder_decoder models are trickier for this pipeline.
Do we want encoder + decoder inputs to get some features?
Do we want encoder only features ?
For now ignore those.
"""
)
feature_extractor_pipeline = ImageFeatureExtractionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
img = prepare_img()
return feature_extractor_pipeline, [img, img]
def run_pipeline_test(self, feature_extractor, examples):
imgs = examples
outputs = feature_extractor(imgs[0])
self.assertEqual(len(outputs), 1)
outputs = feature_extractor(imgs)
self.assertEqual(len(outputs), 2)

View File

@@ -0,0 +1,763 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import tempfile
import unittest
import datasets
import httpx
import numpy as np
from datasets import load_dataset
from huggingface_hub import ImageSegmentationOutputElement
from huggingface_hub.utils import insecure_hashlib
from transformers import (
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
AutoImageProcessor,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
DetrForSegmentation,
ImageSegmentationPipeline,
MaskFormerForInstanceSegmentation,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]
def mask_to_test_readable(mask: Image) -> dict:
npimg = np.array(mask)
white_pixels = (npimg == 255).sum()
shape = npimg.shape
return {"hash": hashimage(mask), "white_pixels": white_pixels, "shape": shape}
def mask_to_test_readable_only_shape(mask: Image) -> dict:
npimg = np.array(mask)
shape = npimg.shape
return {"shape": shape}
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class ImageSegmentationPipelineTests(unittest.TestCase):
model_mapping = dict(
(list(MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()) if MODEL_FOR_IMAGE_SEGMENTATION_MAPPING else [])
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
)
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_segmenter = ImageSegmentationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
def run_pipeline_test(self, image_segmenter, examples):
self._load_dataset()
outputs = image_segmenter(
"./tests/fixtures/tests_samples/COCO/000000039769.png",
threshold=0.0,
mask_threshold=0,
overlap_mask_area_threshold=0,
)
self.assertIsInstance(outputs, list)
n = len(outputs)
if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation, DetrForSegmentation)):
# Instance segmentation (maskformer, and detr) have a slot for null class
# and can output nothing even with a low threshold
self.assertGreaterEqual(n, 0)
else:
self.assertGreaterEqual(n, 1)
# XXX: PIL.Image implements __eq__ which bypasses ANY, so we inverse the comparison
# to make it work
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n, outputs)
# RGBA
outputs = image_segmenter(
self._dataset[0]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# LA
outputs = image_segmenter(
self._dataset[1]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# L
outputs = image_segmenter(
self._dataset[2]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
if isinstance(image_segmenter.model, DetrForSegmentation):
# We need to test batch_size with images with the same size.
# Detr doesn't normalize the size of the images, meaning we can have
# 800x800 or 800x1200, meaning we cannot batch simply.
# We simply bail on this
batch_size = 1
else:
batch_size = 2
# 5 times the same image so the output shape is predictable
batch = [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
outputs = image_segmenter(
batch,
threshold=0.0,
mask_threshold=0,
overlap_mask_area_threshold=0,
batch_size=batch_size,
)
self.assertEqual(len(batch), len(outputs))
self.assertEqual(len(outputs[0]), n)
self.assertEqual(
[
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
],
outputs,
f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageSegmentationOutputElement)
@require_torch
def test_small_model_pt_no_panoptic(self):
model_id = "hf-internal-testing/tiny-random-mobilevit"
# The default task is `image-classification` we need to override
pipe = pipeline(task="image-segmentation", model=model_id)
# This model does NOT support neither `instance` nor `panoptic`
# We should error out
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic")
self.assertEqual(
str(e.exception),
"Subtask panoptic is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
self.assertEqual(
str(e.exception),
"Subtask instance is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
@require_torch
def test_small_model_pt(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = ImageSegmentationPipeline(
model=model,
image_processor=image_processor,
subtask="panoptic",
threshold=0.0,
mask_threshold=0.0,
overlap_mask_area_threshold=0.0,
)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg",
)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
# This is extremely brittle, and those values are made specific for the CI.
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
)
outputs = image_segmenter(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
)
for output in outputs:
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
],
)
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
)
# This must be surprising to the reader.
# The `panoptic` returns only LABEL_215, and this returns 3 labels.
#
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="semantic")
output_masks = [o["mask"] for o in output]
# page links (to visualize)
expected_masks = [
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_0.png",
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_1.png",
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_2.png",
]
# actual links to get files
expected_masks = [x.replace("/blob/", "/resolve/") for x in expected_masks]
expected_masks = [
Image.open(io.BytesIO(httpx.get(image, follow_redirects=True).content)) for image in expected_masks
]
# Convert masks to numpy array
output_masks = [np.array(x) for x in output_masks]
expected_masks = [np.array(x) for x in expected_masks]
self.assertEqual(output_masks[0].shape, expected_masks[0].shape)
self.assertEqual(output_masks[1].shape, expected_masks[1].shape)
self.assertEqual(output_masks[2].shape, expected_masks[2].shape)
# With un-trained tiny random models, the output `logits` tensor is very likely to contain many values
# close to each other, which cause `argmax` to give quite different results when running the test on 2
# environments. We use a lower threshold `0.9` here to avoid flakiness.
self.assertGreaterEqual(np.mean(output_masks[0] == expected_masks[0]), 0.9)
self.assertGreaterEqual(np.mean(output_masks[1] == expected_masks[1]), 0.9)
self.assertGreaterEqual(np.mean(output_masks[2] == expected_masks[2]), 0.9)
for o in output:
o["mask"] = mask_to_test_readable_only_shape(o["mask"])
self.maxDiff = None
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"label": "LABEL_88",
"mask": {"shape": (480, 640)},
"score": None,
},
{
"label": "LABEL_101",
"mask": {"shape": (480, 640)},
"score": None,
},
{
"label": "LABEL_215",
"mask": {"shape": (480, 640)},
"score": None,
},
],
)
@require_torch
def test_small_model_pt_semantic(self):
model_id = "hf-internal-testing/tiny-random-beit-pipeline"
image_segmenter = pipeline(model=model_id)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
for o in outputs:
# shortening by hashing
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": None,
"label": "LABEL_0",
"mask": {"hash": "42d0907228", "shape": (480, 640), "white_pixels": 10714},
},
{
"score": None,
"label": "LABEL_1",
"mask": {"hash": "46b8cc3976", "shape": (480, 640), "white_pixels": 296486},
},
],
)
@require_torch
@slow
def test_integration_torch_image_segmentation(self):
model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline(
"image-segmentation",
model=model_id,
threshold=0.0,
overlap_mask_area_threshold=0.0,
)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg",
)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
)
outputs = image_segmenter(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
)
# Shortening by hashing
for output in outputs:
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
],
)
@require_torch
@slow
def test_threshold(self):
model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline("image-segmentation", model=model_id)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.999)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "d02404f578", "shape": (480, 640), "white_pixels": 2789},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "eaa115b40c", "shape": (480, 640), "white_pixels": 304411},
},
],
)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.5)
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
)
@require_torch
@slow
def test_maskformer(self):
threshold = 0.8
model_id = "facebook/maskformer-swin-base-ade"
model = AutoModelForInstanceSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(image, threshold=threshold)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9974,
"label": "wall",
"mask": {"hash": "a547b7c062", "shape": (512, 683), "white_pixels": 14252},
},
{
"score": 0.949,
"label": "house",
"mask": {"hash": "0da9b7b38f", "shape": (512, 683), "white_pixels": 132177},
},
{
"score": 0.9995,
"label": "grass",
"mask": {"hash": "1d07ea0a26", "shape": (512, 683), "white_pixels": 53444},
},
{
"score": 0.9976,
"label": "tree",
"mask": {"hash": "6cdc97c7da", "shape": (512, 683), "white_pixels": 7944},
},
{
"score": 0.8239,
"label": "plant",
"mask": {"hash": "1ab4ce378f", "shape": (512, 683), "white_pixels": 4136},
},
{
"score": 0.9942,
"label": "road, route",
"mask": {"hash": "39c5d17be5", "shape": (512, 683), "white_pixels": 1941},
},
{
"score": 1.0,
"label": "sky",
"mask": {"hash": "a3756324a6", "shape": (512, 683), "white_pixels": 135802},
},
],
)
@require_torch
@slow
def test_oneformer(self):
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(image, threshold=0.99)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9981,
"label": "grass",
"mask": {"hash": "3a92904d4c", "white_pixels": 118131, "shape": (512, 683)},
},
{
"score": 0.9992,
"label": "sky",
"mask": {"hash": "fa2300cc9a", "white_pixels": 231565, "shape": (512, 683)},
},
],
)
# Different task
outputs = image_segmenter(image, threshold=0.99, subtask="instance")
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9991,
"label": "sky",
"mask": {"hash": "8b1ffad016", "white_pixels": 230566, "shape": (512, 683)},
},
{
"score": 0.9981,
"label": "grass",
"mask": {"hash": "9bbdf83d3d", "white_pixels": 119130, "shape": (512, 683)},
},
],
)
# Different task
outputs = image_segmenter(image, subtask="semantic")
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": None,
"label": "wall",
"mask": {"hash": "897fb20b7f", "white_pixels": 14506, "shape": (512, 683)},
},
{
"score": None,
"label": "building",
"mask": {"hash": "f2a68c63e4", "white_pixels": 125019, "shape": (512, 683)},
},
{
"score": None,
"label": "sky",
"mask": {"hash": "e0ca3a548e", "white_pixels": 135330, "shape": (512, 683)},
},
{
"score": None,
"label": "tree",
"mask": {"hash": "7c9544bcac", "white_pixels": 16263, "shape": (512, 683)},
},
{
"score": None,
"label": "road, route",
"mask": {"hash": "2c7704e491", "white_pixels": 2143, "shape": (512, 683)},
},
{
"score": None,
"label": "grass",
"mask": {"hash": "bf6c2867e0", "white_pixels": 53040, "shape": (512, 683)},
},
{
"score": None,
"label": "plant",
"mask": {"hash": "93c4b7199e", "white_pixels": 3335, "shape": (512, 683)},
},
{
"score": None,
"label": "house",
"mask": {"hash": "93ec419ad5", "white_pixels": 60, "shape": (512, 683)},
},
],
)
def test_save_load(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = pipeline(
task="image-segmentation",
model=model,
image_processor=image_processor,
)
with tempfile.TemporaryDirectory() as tmpdirname:
image_segmenter.save_pretrained(tmpdirname)
pipeline(task="image-segmentation", model=tmpdirname)

View File

@@ -0,0 +1,506 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import unittest
from transformers import MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING, is_vision_available
from transformers.pipelines import ImageTextToTextPipeline, pipeline
from transformers.testing_utils import (
Expectations,
is_pipeline_test,
require_deterministic_for_xpu,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
class ImageTextToTextPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
def get_test_pipeline(self, model, tokenizer, processor, image_processor, dtype="float32"):
pipe = ImageTextToTextPipeline(model=model, processor=processor, dtype=dtype, max_new_tokens=10)
image_token = getattr(processor.tokenizer, "image_token", "")
examples = [
{
"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"text": f"{image_token}This is a ",
},
{
"images": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"text": f"{image_token}Here I see a ",
},
]
return pipe, examples
def run_pipeline_test(self, pipe, examples):
outputs = pipe(examples[0].get("images"), text=examples[0].get("text"))
self.assertEqual(
outputs,
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
)
@require_torch
def test_small_model_pt_token_text_only(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
text = "What is the capital of France? Assistant:"
outputs = pipe(text=text)
self.assertEqual(
outputs,
[
{
"input_text": "What is the capital of France? Assistant:",
"generated_text": "What is the capital of France? Assistant: The capital of France is Paris.",
}
],
)
messages = [
[
{
"role": "user",
"content": [
{"type": "text", "text": "Write a poem on Hugging Face, the company"},
],
},
],
[
{
"role": "user",
"content": [
{"type": "text", "text": "What is the capital of France?"},
],
},
],
]
outputs = pipe(text=messages)
EXPECTED_CONTENT = Expectations(
{
(
"cuda",
8,
): "Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom natural language processing\nTo machine learning and more, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and",
(
"rocm",
(9, 4),
): "Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom natural language processing\nTo machine learning and more, they do it all\n\nThey help us to create and share\nContent that is both true and true\nAnd make the world a better place\nWith their tools and services, we can do it all\n\nFrom image and video to text and speech\nThey make it all possible\nWith their tools and services, we can do it all\nAnd make the world a better place\n\nSo let us embrace and use\nHugging Face's tools and services\nTo create and share\nContent that is true and true\nAnd make the world a better place.",
(
"xpu",
3,
): "Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom natural language processing\nTo machine learning and more, they do it all\n\nThey help us to create and share\nContent that's both engaging and informative\nWith their tools, we can write\nAnd create stories that are both true and true\n\nThey help us to analyze\nAnd make sense of data that's hard to see\nWith their tools, we can see\nAnd make sense of the world around us\n\nThey help us to create\nAnd share content that's both true and true\nWith their tools, we can see\nAnd make sense of the world around us\n\nSo here's to Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom natural language processing\nTo machine learning and more, they do it all\n\nThank you, Hugging Face, for all you do\nWith tools and services that make our lives easier\nSo here's to you, and all the great things you do",
}
).get_expectation()
self.assertEqual(
outputs,
[
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
},
{
"role": "assistant",
"content": EXPECTED_CONTENT,
},
],
}
],
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "What is the capital of France?"}],
},
{"role": "assistant", "content": "Paris"},
],
}
],
],
)
@require_torch
@require_deterministic_for_xpu
def test_small_model_pt_token(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
text = "<image> What this is? Assistant: This is"
outputs = pipe(image, text=text)
EXPECTED_CONTENT = Expectations(
{
(
"cuda",
8,
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are sleeping and appear to be comfortable. The photo captures a moment of tranquility and companionship between the two feline friends.",
(
"rocm",
(9, 4),
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a couch, and the cats are positioned in such a way that they are facing the camera. The image captures a peaceful moment between the two cats, and it's a great way to showcase their cuteness and relaxed demeanor.",
(
"xpu",
3,
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a surface that looks like a couch or a chair, and it is covered with a soft fabric. The cats' fur is a mix of black, white, and brown, and they have a variety of patterns on their bodies. The image captures a moment of tranquility and companionship between the cats.",
}
).get_expectation()
self.assertEqual(
outputs,
[
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": EXPECTED_CONTENT,
}
],
)
outputs = pipe([image, image], text=[text, text])
EXPECTED_CONTENT = Expectations(
{
(
"cuda",
8,
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a couch, and the cats are positioned in such a way that they are facing the camera. The image captures a peaceful moment between the two cats, and it's a great way to showcase their cuteness and relaxed demeanor.",
(
"rocm",
(9, 4),
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a couch, and the overall setting is cozy and comfortable.",
(
"xpu",
3,
): "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a surface that looks like a couch or a chair, and it is covered with a soft fabric. The cats' fur is a mix of black, white, and brown, and they have a variety of patterns on their bodies. The image captures a moment of tranquility and companionship between the cats.",
}
).get_expectation()
self.assertEqual(
outputs,
[
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": EXPECTED_CONTENT,
},
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": EXPECTED_CONTENT,
},
],
)
@require_torch
def test_consistent_batching_behaviour(self):
pipe = pipeline("image-text-to-text", model="microsoft/kosmos-2-patch14-224")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
prompt = "a photo of"
outputs = pipe([image, image], text=[prompt, prompt], max_new_tokens=10)
outputs_batched = pipe([image, image], text=[prompt, prompt], batch_size=2, max_new_tokens=10)
self.assertEqual(outputs, outputs_batched)
@slow
@require_torch
def test_model_pt_chat_template_with_response_parsing(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What's the difference between these two images?"},
{
"type": "image",
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
},
{
"type": "image",
"url": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
},
],
}
]
pipe.tokenizer.response_schema = {
# A real response schema should probably have things like "role" and "content"
# and "reasoning_content" but it's unlikely we'd get a tiny model to reliably
# output anything like that, so let's keep it simple.
"type": "object",
"properties": {
"first_word": {"type": "string", "x-regex": r"^\s*([a-zA-Z]+)"},
"last_word": {"type": "string", "x-regex": r"([a-zA-Z]+)\s*$"},
},
}
outputs = pipe(text=messages, do_sample=False, max_new_tokens=10)
parsed_message = outputs[0]["generated_text"][-1]
# The parsed message should be a dict with the schema keys, not {"role": "assistant", "content": ...}
self.assertIn("first_word", parsed_message)
self.assertIn("last_word", parsed_message)
self.assertNotIn("role", parsed_message)
self.assertIsInstance(parsed_message["first_word"], str)
self.assertIsInstance(parsed_message["last_word"], str)
@slow
@require_torch
def test_model_pt_chat_template(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
image_ny = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
image_chicago = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{"type": "image"},
{"type": "image"},
],
}
]
# Deprecated behavior should raise an error after v5
with self.assertRaises(ValueError):
outputs = pipe([image_ny, image_chicago], text=messages, return_full_text=True, max_new_tokens=10)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{"type": "image", "url": image_ny},
{"type": "image", "url": image_chicago},
],
}
]
outputs = pipe(text=messages, return_full_text=True, max_new_tokens=10)
EXPECTED_CONTENT = Expectations(
{
("rocm", (9, 4)): "The first image shows a statue of the Statue of",
("cuda", 8): "The first image shows a statue of Liberty in the",
("xpu", 3): "The first image shows a statue of Liberty in the",
}
).get_expectation()
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{
"type": "image",
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
},
{
"type": "image",
"url": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
},
],
}
],
"generated_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{
"type": "image",
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
},
{
"type": "image",
"url": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
},
],
},
{
"role": "assistant",
"content": EXPECTED_CONTENT,
},
],
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_continue_final_message(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{
"role": "assistant",
"content": [
{"type": "text", "text": "There is a dog and"},
],
},
]
outputs = pipe(text=messages, max_new_tokens=10)
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{"role": "assistant", "content": [{"type": "text", "text": "There is a dog and"}]},
],
"generated_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "There is a dog and a person in the image. The dog is sitting",
}
],
},
],
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_new_text(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
],
"generated_text": "In the image, a woman is sitting on the",
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_image_url(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
},
},
{"type": "text", "text": "Describe this image in one sentence."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)[0]["generated_text"]
self.assertEqual(outputs, "A statue of liberty in the foreground of a city")
@slow
@require_torch
def test_model_pt_chat_template_image_url_base64(self):
with open("./tests/fixtures/tests_samples/COCO/000000039769.png", "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
pipe = pipeline("image-text-to-text", model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
{"type": "text", "text": "Describe this image in one sentence."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)[0]["generated_text"]
self.assertEqual(outputs, "Two cats are sleeping on a pink blanket, with")

View File

@@ -0,0 +1,193 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from transformers.models.auto.modeling_auto import MODEL_FOR_KEYPOINT_MATCHING_MAPPING
from transformers.pipelines import KeypointMatchingPipeline, pipeline
from transformers.testing_utils import (
is_pipeline_test,
is_vision_available,
require_torch,
require_vision,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
@is_pipeline_test
@require_torch
@require_vision
class KeypointMatchingPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_KEYPOINT_MATCHING_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
cls._dataset = datasets.load_dataset("hf-internal-testing/image-matching-dataset", split="train")
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
torch_dtype="float32",
):
image_matcher = KeypointMatchingPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
torch_dtype=torch_dtype,
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
return image_matcher, examples
def run_pipeline_test(self, image_matcher, examples):
self._load_dataset()
outputs = image_matcher(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
outputs,
[
{
"keypoint_image_0": {"x": ANY(float), "y": ANY(float)},
"keypoint_image_1": {"x": ANY(float), "y": ANY(float)},
"score": ANY(float),
}
]
* 2, # 2 matches per image pair
)
# Accepts URL + PIL.Image + lists
outputs = image_matcher(
[
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
[self._dataset[0]["image"], self._dataset[1]["image"]],
[self._dataset[1]["image"], self._dataset[2]["image"]],
[self._dataset[2]["image"], self._dataset[0]["image"]],
]
)
self.assertEqual(
outputs,
[
[
{
"keypoint_image_0": {"x": ANY(float), "y": ANY(float)},
"keypoint_image_1": {"x": ANY(float), "y": ANY(float)},
"score": ANY(float),
}
]
* 2 # 2 matches per image pair
]
* 4, # 4 image pairs
)
@require_torch
def test_single_image(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
with self.assertRaises(ValueError):
image_matcher(
self._dataset[0]["image"],
threshold=0.0,
)
with self.assertRaises(ValueError):
image_matcher(
[self._dataset[0]["image"]],
threshold=0.0,
)
@require_torch
def test_single_pair(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
image_0: Image.Image = self._dataset[0]["image"]
image_1: Image.Image = self._dataset[1]["image"]
outputs = image_matcher((image_0, image_1), threshold=0.0)
output = outputs[0] # first match from image pair
self.assertAlmostEqual(output["keypoint_image_0"]["x"], 698, places=1)
self.assertAlmostEqual(output["keypoint_image_0"]["y"], 469, places=1)
self.assertAlmostEqual(output["keypoint_image_1"]["x"], 434, places=1)
self.assertAlmostEqual(output["keypoint_image_1"]["y"], 440, places=1)
self.assertAlmostEqual(output["score"], 0.9905, places=3)
@require_torch
def test_multiple_pairs(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
image_0: Image.Image = self._dataset[0]["image"]
image_1: Image.Image = self._dataset[1]["image"]
image_2: Image.Image = self._dataset[2]["image"]
outputs = image_matcher(
[
(image_0, image_1),
(image_1, image_2),
(image_2, image_0),
],
threshold=1e-4,
)
# Test first pair (image_0, image_1)
output_0 = outputs[0][0] # First match from first pair
self.assertAlmostEqual(output_0["keypoint_image_0"]["x"], 698, places=1)
self.assertAlmostEqual(output_0["keypoint_image_0"]["y"], 469, places=1)
self.assertAlmostEqual(output_0["keypoint_image_1"]["x"], 434, places=1)
self.assertAlmostEqual(output_0["keypoint_image_1"]["y"], 440, places=1)
self.assertAlmostEqual(output_0["score"], 0.9905, places=3)
# Test second pair (image_1, image_2)
output_1 = outputs[1][0] # First match from second pair
self.assertAlmostEqual(output_1["keypoint_image_0"]["x"], 272, places=1)
self.assertAlmostEqual(output_1["keypoint_image_0"]["y"], 310, places=1)
self.assertAlmostEqual(output_1["keypoint_image_1"]["x"], 228, places=1)
self.assertAlmostEqual(output_1["keypoint_image_1"]["y"], 568, places=1)
self.assertAlmostEqual(output_1["score"], 0.9890, places=3)
# Test third pair (image_2, image_0)
output_2 = outputs[2][0] # First match from third pair
self.assertAlmostEqual(output_2["keypoint_image_0"]["x"], 385, places=1)
self.assertAlmostEqual(output_2["keypoint_image_0"]["y"], 677, places=1)
self.assertAlmostEqual(output_2["keypoint_image_1"]["x"], 689, places=1)
self.assertAlmostEqual(output_2["keypoint_image_1"]["y"], 351, places=1)
self.assertAlmostEqual(output_2["score"], 0.9900, places=3)

View File

@@ -0,0 +1,186 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from huggingface_hub.utils import insecure_hashlib
from transformers import (
MODEL_FOR_MASK_GENERATION_MAPPING,
is_torch_available,
is_vision_available,
pipeline,
)
from transformers.pipelines import MaskGenerationPipeline
from transformers.testing_utils import (
Expectations,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
if is_torch_available():
from transformers import MODEL_FOR_MASK_GENERATION_MAPPING
else:
MODEL_FOR_MASK_GENERATION_MAPPING = None
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]
def mask_to_test_readable(mask: Image) -> dict:
npimg = np.array(mask)
shape = npimg.shape
return {"hash": hashimage(mask), "shape": shape}
@is_pipeline_test
@require_vision
@require_torch
class MaskGenerationPipelineTests(unittest.TestCase):
model_mapping = dict(list(MODEL_FOR_MASK_GENERATION_MAPPING.items()) if MODEL_FOR_MASK_GENERATION_MAPPING else [])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_segmenter = MaskGenerationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
@unittest.skip(reason="TODO @Arthur: Implement me")
def run_pipeline_test(self, mask_generator, examples):
pass
def test_preprocess_is_last(self):
mask_generator = pipeline("mask-generation", model="hf-internal-testing/tiny-random-SamModel")
mask_generator.image_processor.pad_size = {"height": 24, "width": 24}
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
for points_per_batch in (100, 64):
with self.subTest(points_per_batch=points_per_batch):
batches = list(mask_generator.preprocess(image, points_per_batch=points_per_batch))
self.assertTrue(batches[-1]["is_last"])
self.assertFalse(any(b["is_last"] for b in batches[:-1]))
@slow
@require_torch
def test_small_model_pt(self):
image_segmenter = pipeline("mask-generation", model="facebook/sam-vit-huge")
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", points_per_batch=256)
# Shortening by hashing
new_output = []
for i, o in enumerate(outputs["masks"]):
new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
# fmt: off
last_output = Expectations({
("xpu", None): {'mask': {'hash': 'b5f47c9191', 'shape': (480, 640)}, 'scores': 0.8872},
("cuda", None): {'mask': {'hash': 'b5f47c9191', 'shape': (480, 640)}, 'scores': 0.8871},
("rocm", (9, 5)): {'mask': {'hash': 'b5f47c9191', 'shape': (480, 640)}, 'scores': 0.8872}
}).get_expectation()
self.assertEqual(
nested_simplify(new_output, decimals=4),
[
{'mask': {'hash': '115ad19f5f', 'shape': (480, 640)}, 'scores': 1.0444},
{'mask': {'hash': '6affa964c6', 'shape': (480, 640)}, 'scores': 1.021},
{'mask': {'hash': 'dfe28a0388', 'shape': (480, 640)}, 'scores': 1.0167},
{'mask': {'hash': 'c0a5f4a318', 'shape': (480, 640)}, 'scores': 1.0132},
{'mask': {'hash': 'fe8065c197', 'shape': (480, 640)}, 'scores': 1.0053},
{'mask': {'hash': 'e2d0b7a0b7', 'shape': (480, 640)}, 'scores': 0.9967},
{'mask': {'hash': '453c7844bd', 'shape': (480, 640)}, 'scores': 0.993},
{'mask': {'hash': '3d44f2926d', 'shape': (480, 640)}, 'scores': 0.9909},
{'mask': {'hash': '64033ddc3f', 'shape': (480, 640)}, 'scores': 0.9879},
{'mask': {'hash': '801064ff79', 'shape': (480, 640)}, 'scores': 0.9834},
{'mask': {'hash': '6172f276ef', 'shape': (480, 640)}, 'scores': 0.9716},
{'mask': {'hash': 'b49e60e084', 'shape': (480, 640)}, 'scores': 0.9612},
{'mask': {'hash': 'a811e775fd', 'shape': (480, 640)}, 'scores': 0.9599},
{'mask': {'hash': 'a6a8ebcf4b', 'shape': (480, 640)}, 'scores': 0.9552},
{'mask': {'hash': '9d8257e080', 'shape': (480, 640)}, 'scores': 0.9532},
{'mask': {'hash': '32de6454a8', 'shape': (480, 640)}, 'scores': 0.9516},
{'mask': {'hash': 'af3d4af2c8', 'shape': (480, 640)}, 'scores': 0.9499},
{'mask': {'hash': '3c6db475fb', 'shape': (480, 640)}, 'scores': 0.9483},
{'mask': {'hash': 'c290813fb9', 'shape': (480, 640)}, 'scores': 0.9464},
{'mask': {'hash': 'b6f0b8f606', 'shape': (480, 640)}, 'scores': 0.943},
{'mask': {'hash': '92ce16bfdf', 'shape': (480, 640)}, 'scores': 0.943},
{'mask': {'hash': 'c749b25868', 'shape': (480, 640)}, 'scores': 0.9408},
{'mask': {'hash': 'efb6cab859', 'shape': (480, 640)}, 'scores': 0.9335},
{'mask': {'hash': '1ff2eafb30', 'shape': (480, 640)}, 'scores': 0.9326},
{'mask': {'hash': '788b798e24', 'shape': (480, 640)}, 'scores': 0.9262},
{'mask': {'hash': 'abea804f0e', 'shape': (480, 640)}, 'scores': 0.8999},
{'mask': {'hash': '7b9e8ddb73', 'shape': (480, 640)}, 'scores': 0.8986},
{'mask': {'hash': 'cd24047c8a', 'shape': (480, 640)}, 'scores': 0.8984},
{'mask': {'hash': '6943e6bcbd', 'shape': (480, 640)}, 'scores': 0.8873},
last_output
],
)
# fmt: on
@require_torch
@slow
def test_threshold(self):
model_id = "facebook/sam-vit-huge"
image_segmenter = pipeline("mask-generation", model=model_id)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg", pred_iou_thresh=1, points_per_batch=256
)
# Shortening by hashing
new_output = []
for i, o in enumerate(outputs["masks"]):
new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
self.assertEqual(
nested_simplify(new_output, decimals=4),
[
{"mask": {"hash": "115ad19f5f", "shape": (480, 640)}, "scores": 1.0444},
{"mask": {"hash": "6affa964c6", "shape": (480, 640)}, "scores": 1.0210},
{"mask": {"hash": "dfe28a0388", "shape": (480, 640)}, "scores": 1.0167},
{"mask": {"hash": "c0a5f4a318", "shape": (480, 640)}, "scores": 1.0132},
{"mask": {"hash": "fe8065c197", "shape": (480, 640)}, "scores": 1.0053},
],
)

View File

@@ -0,0 +1,297 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import ObjectDetectionOutputElement
from transformers import (
MODEL_FOR_OBJECT_DETECTION_MAPPING,
AutoImageProcessor,
AutoModelForObjectDetection,
ObjectDetectionPipeline,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_pytesseract,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class ObjectDetectionPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
object_detector = ObjectDetectionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
def run_pipeline_test(self, object_detector, examples):
self._load_dataset()
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
self.assertGreater(len(outputs), 0)
for detected_object in outputs:
self.assertEqual(
detected_object,
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
},
)
batch = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
batch_outputs = object_detector(batch, threshold=0.0)
self.assertEqual(len(batch), len(batch_outputs))
for outputs in batch_outputs:
self.assertGreater(len(outputs), 0)
for detected_object in outputs:
self.assertEqual(
detected_object,
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
},
)
compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement)
@require_torch
def test_small_model_pt(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3"
model = AutoModelForObjectDetection.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
object_detector = ObjectDetectionPipeline(model=model, image_processor=image_processor)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
threshold=0.0,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
],
)
@require_torch
@slow
def test_large_model_pt(self):
model_id = "facebook/detr-resnet-50"
model = AutoModelForObjectDetection.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
object_detector = ObjectDetectionPipeline(model=model, image_processor=image_processor)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
],
)
@require_torch
@slow
def test_integration_torch_object_detection(self):
model_id = "facebook/detr-resnet-50"
object_detector = pipeline("object-detection", model=model_id)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
],
)
@require_torch
@slow
def test_threshold(self):
threshold = 0.9985
model_id = "facebook/detr-resnet-50"
object_detector = pipeline("object-detection", model=model_id)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
@require_torch
@require_pytesseract
@slow
def test_layoutlm(self):
model_id = "Narsil/layoutlmv3-finetuned-funsd"
threshold = 0.9993
object_detector = pipeline("object-detection", model=model_id, threshold=threshold)
outputs = object_detector(
"https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png"
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}},
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}},
],
)

View File

@@ -0,0 +1,373 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
AutoModelForTableQuestionAnswering,
AutoTokenizer,
TableQuestionAnsweringPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
slow,
)
@is_pipeline_test
class TQAPipelineTests(unittest.TestCase):
# Putting it there for consistency, but TQA do not have fast tokenizer
# which are needed to generate automatic tests
model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
@require_torch
def test_small_model_pt(self, dtype="float32"):
model_id = "lysandre/tiny-tapas-random-wtq"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, dtype=dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
self.assertIsInstance(model.config.aggregation_labels, dict)
self.assertIsInstance(model.config.no_aggregation_label_index, int)
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query="how many movies has george clooney played in?",
)
self.assertEqual(
outputs,
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
)
self.assertEqual(
outputs,
[
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
],
)
outputs = table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
query=[
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most"
" active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
],
)
self.assertEqual(
outputs,
[
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
],
)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table=None)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table="")
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table={})
with self.assertRaises(ValueError):
table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
)
with self.assertRaises(ValueError):
table_querier(
query="",
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
with self.assertRaises(ValueError):
table_querier(
query=None,
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@require_torch
def test_slow_tokenizer_sqa_pt(self, dtype="float32"):
model_id = "lysandre/tiny-tapas-random-sqa"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, dtype=dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
inputs = {
"table": {
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
}
sequential_outputs = table_querier(**inputs, sequential=True)
batch_outputs = table_querier(**inputs, sequential=False)
self.assertEqual(len(sequential_outputs), 3)
self.assertEqual(len(batch_outputs), 3)
self.assertEqual(sequential_outputs[0], batch_outputs[0])
self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
# self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query="how many movies has george clooney played in?",
)
self.assertEqual(
outputs,
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
)
self.assertEqual(
outputs,
[
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
],
)
outputs = table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
query=[
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most"
" active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
],
)
self.assertEqual(
outputs,
[
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
],
)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table=None)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table="")
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table={})
with self.assertRaises(ValueError):
table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
)
with self.assertRaises(ValueError):
table_querier(
query="",
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
with self.assertRaises(ValueError):
table_querier(
query=None,
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
@require_torch
def test_slow_tokenizer_sqa_pt_fp16(self):
self.test_slow_tokenizer_sqa_pt(dtype="float16")
@slow
@require_torch
def test_integration_wtq_pt(self, dtype="float32"):
table_querier = pipeline("table-question-answering", dtype=dtype)
data = {
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
queries = [
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
]
results = table_querier(data, queries)
expected_results = [
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{
"answer": "COUNT > Transformers, Datasets, Tokenizers",
"coordinates": [(0, 0), (1, 0), (2, 0)],
"cells": ["Transformers", "Datasets", "Tokenizers"],
"aggregator": "COUNT",
},
{
"answer": "AVERAGE > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "AVERAGE",
},
{
"answer": "SUM > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "SUM",
},
]
self.assertListEqual(results, expected_results)
@slow
@require_torch
def test_integration_wtq_pt_fp16(self):
self.test_integration_wtq_pt(dtype="float16")
@slow
@require_torch
def test_integration_sqa_pt(self, dtype="float32"):
table_querier = pipeline(
"table-question-answering",
model="google/tapas-base-finetuned-sqa",
tokenizer="google/tapas-base-finetuned-sqa",
dtype=dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
"Age": ["56", "45", "59"],
"Number of movies": ["87", "53", "69"],
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
results = table_querier(data, queries, sequential=True)
expected_results = [
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
]
self.assertListEqual(results, expected_results)
@slow
@require_torch
def test_integration_sqa_pt_fp16(self):
self.test_integration_sqa_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt_tapex(self, dtype="float32"):
model_id = "microsoft/tapex-large-finetuned-wtq"
table_querier = pipeline(
"table-question-answering",
model=model_id,
dtype=dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
"Age": ["56", "45", "59"],
"Number of movies": ["87", "53", "69"],
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = [
"How many movies has George Clooney played in?",
"How old is Mr Clooney ?",
"What's the date of birth of Leonardo ?",
]
results = table_querier(data, queries, sequential=True)
expected_results = [
{"answer": " 69"},
{"answer": " 59"},
{"answer": " 10 june 1996"},
]
self.assertListEqual(results, expected_results)

View File

@@ -0,0 +1,197 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TextClassificationPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
require_torch_bf16,
require_torch_fp16,
slow,
torch_device,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
class TextClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if not hasattr(model_mapping, "is_dummy"):
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ not in _TO_SKIP}
@require_torch
def test_small_model_pt(self):
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
outputs = text_classifier("This is great !", top_k=2)
self.assertEqual(
nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}]
)
outputs = text_classifier(["This is great !", "This is bad"], top_k=2)
self.assertEqual(
nested_simplify(outputs),
[
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
],
)
outputs = text_classifier("This is great !", top_k=1)
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
# Do not apply any function to output for regression tasks
# hack: changing problem_type artificially (so keep this test at last)
text_classifier.model.config.problem_type = "regression"
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.01}])
@require_torch
def test_accepts_torch_device(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@require_torch_fp16
def test_accepts_torch_fp16(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
dtype=torch.float16,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@require_torch_bf16
def test_accepts_torch_bf16(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
dtype=torch.bfloat16,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@slow
@require_torch
def test_pt_bert(self):
text_classifier = pipeline("text-classification")
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}])
outputs = text_classifier("This is bad !")
self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}])
outputs = text_classifier("Birds are a type of animal")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
text_classifier = TextClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return text_classifier, ["HuggingFace is in", "This is another test"]
def run_pipeline_test(self, text_classifier, _):
model = text_classifier.model
# Small inputs because BartTokenizer tiny has maximum position embeddings = 22
valid_inputs = "HuggingFace is in"
outputs = text_classifier(valid_inputs)
self.assertEqual(nested_simplify(outputs), [{"label": ANY(str), "score": ANY(float)}])
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
valid_inputs = ["HuggingFace is in ", "Paris is in France"]
outputs = text_classifier(valid_inputs)
self.assertEqual(
nested_simplify(outputs),
[{"label": ANY(str), "score": ANY(float)}, {"label": ANY(str), "score": ANY(float)}],
)
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
self.assertTrue(outputs[1]["label"] in model.config.id2label.values())
# Forcing to get all results with `top_k=None`
# This is NOT the legacy format
outputs = text_classifier(valid_inputs, top_k=None)
N = len(model.config.id2label.values())
self.assertEqual(
nested_simplify(outputs),
[[{"label": ANY(str), "score": ANY(float)}] * N, [{"label": ANY(str), "score": ANY(float)}] * N],
)
valid_inputs = {"text": "HuggingFace is in ", "text_pair": "Paris is in France"}
outputs = text_classifier(valid_inputs)
self.assertEqual(
nested_simplify(outputs),
{"label": ANY(str), "score": ANY(float)},
)
self.assertTrue(outputs["label"] in model.config.id2label.values())
# This might be used a text pair, but tokenizer + pipe interaction
# makes it hard to understand that it's not using the pair properly
# https://github.com/huggingface/transformers/issues/17305
# We disabled this usage instead as it was outputting wrong outputs.
invalid_input = [["HuggingFace is in ", "Paris is in France"]]
with self.assertRaises(ValueError):
text_classifier(invalid_input)
# This used to be valid for doing text pairs
# We're keeping it working because of backward compatibility
outputs = text_classifier([[["HuggingFace is in ", "Paris is in France"]]])
self.assertEqual(
nested_simplify(outputs),
[{"label": ANY(str), "score": ANY(float)}],
)
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())

View File

@@ -0,0 +1,617 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest.mock import patch
from transformers import (
MODEL_FOR_CAUSAL_LM_MAPPING,
TextGenerationPipeline,
logging,
pipeline,
)
from transformers.testing_utils import (
CaptureLogger,
is_pipeline_test,
require_accelerate,
require_torch,
require_torch_accelerator,
torch_device,
)
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
class TextGenerationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
@require_torch
def test_small_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-random-LlamaForCausalLM",
max_new_tokens=10,
)
# Using `do_sample=False` to force deterministic output
outputs = text_generator("This is a test", do_sample=False)
self.assertEqual(outputs, [{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}])
outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
self.assertEqual(
outputs,
[
[{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}],
[{"generated_text": "This is a second testкт MéxicoWSAnimImportдели Düsseld bootstrap learn user"}],
],
)
outputs = text_generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True)
self.assertEqual(
outputs,
[
{"generated_token_ids": ANY(list)},
{"generated_token_ids": ANY(list)},
],
)
@require_torch
def test_small_chat_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat1},
],
)
outputs = text_generator([chat1, chat2], do_sample=False, max_new_tokens=10)
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
@require_torch
def test_small_chat_model_continue_final_message(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{"role": "assistant", "content": "This is"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{
"role": "assistant",
"content": "This is stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_continue_final_message_override(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10, continue_final_message=True)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{
"role": "user",
"content": "This is a test stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_with_dataset_pt(self):
from torch.utils.data import Dataset
from transformers.pipelines.pt_utils import KeyDataset
class MyDataset(Dataset):
data = [
[
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
],
]
def __len__(self):
return 1
def __getitem__(self, i):
return {"text": self.data[i]}
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
dataset = MyDataset()
key_dataset = KeyDataset(dataset, "text")
for outputs in text_generator(key_dataset, do_sample=False, max_new_tokens=10):
expected_chat = dataset.data[0] + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat},
],
)
@require_torch
def test_small_chat_model_with_iterator_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
def data():
yield from [chat1, chat2]
outputs = text_generator(data(), do_sample=False, max_new_tokens=10)
outputs = list(outputs)
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
@require_torch
def test_small_chat_model_with_response_parsing(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
text_generator.tokenizer.response_schema = {
# A real response schema should probably have things like "role" and "content"
# and "reasoning_content" but it's unlikely we'd get a tiny model to reliably
# output anything like that, so let's keep it simple.
"type": "object",
"properties": {
"first_word": {"type": "string", "x-regex": r"^\s*([a-zA-Z]+)"},
"last_word": {"type": "string", "x-regex": r"([a-zA-Z]+)\s*$"},
},
}
outputs = text_generator(chat, do_sample=False, max_new_tokens=10)
parsed_message = outputs[0]["generated_text"][-1]
self.assertEqual(parsed_message, {"first_word": "factors", "last_word": "factors"})
@require_torch
def test_return_full_text_false_with_chat_template(self):
"""Regression test for #45854: return_full_text=False must not include prompt when using chat template."""
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat, do_sample=False, max_new_tokens=10, return_full_text=False)
generated = outputs[0]["generated_text"]
# Must return plain string, not a list of message dicts
self.assertIsInstance(generated, str)
# Must not contain the prompt content
self.assertNotIn("This is a test", generated)
self.assertNotIn("This is a system message.", generated)
@require_torch
def test_return_full_text_true_with_chat_template(self):
"""return_full_text=True (default) must still return full chat list with chat template."""
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat, do_sample=False, max_new_tokens=10, return_full_text=True)
generated = outputs[0]["generated_text"]
# Must return list of message dicts including original messages
self.assertIsInstance(generated, list)
roles = [m["role"] for m in generated]
self.assertIn("user", roles)
self.assertIn("assistant", roles)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
text_generator = TextGenerationPipeline(
model=model,
tokenizer=tokenizer,
dtype=dtype,
max_new_tokens=5,
)
return text_generator, ["This is a test", "Another test"]
def test_stop_sequence_stopping_criteria(self):
prompt = """Hello I believe in"""
text_generator = pipeline(
"text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5, do_sample=False
)
output = text_generator(prompt)
self.assertEqual(
output,
[{"generated_text": "Hello I believe in fe fe fe fe fe"}],
)
output = text_generator(prompt, stop_sequence=" fe")
self.assertEqual(output, [{"generated_text": "Hello I believe in fe"}])
def run_pipeline_test(self, text_generator, _):
model = text_generator.model
tokenizer = text_generator.tokenizer
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator("This is a test", return_full_text=False)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
text_generator = pipeline(
task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=5
)
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
outputs = text_generator("This is a test", return_full_text=True)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
if text_generator.tokenizer.pad_token is not None:
outputs = text_generator(
["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_text=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_tensors=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_text=True, return_tensors=True)
# Empty prompt is slightly special
# it requires BOS token to exist.
# Special case for Pegasus which will always append EOS so will
# work even without BOS.
if (
text_generator.tokenizer.bos_token_id is not None
or "Pegasus" in tokenizer.__class__.__name__
or "Git" in model.__class__.__name__
):
outputs = text_generator("")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
else:
with self.assertRaises((ValueError, AssertionError)):
outputs = text_generator("", add_special_tokens=False)
# We don't care about infinite range models.
# They already work.
# Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly.
EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = [
"RwkvForCausalLM",
"XGLMForCausalLM",
"GPTNeoXForCausalLM",
"GPTNeoXJapaneseForCausalLM",
"FuyuForCausalLM",
"LlamaForCausalLM",
]
if (
tokenizer.model_max_length < 10000
and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS
):
# Handling of large generations
if str(text_generator.device) == "cpu":
with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
text_generator("This is a test" * 500, max_new_tokens=5)
outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=5)
# Hole strategy cannot work
if str(text_generator.device) == "cpu":
with self.assertRaises(ValueError):
text_generator(
"This is a test" * 500,
handle_long_generation="hole",
max_new_tokens=tokenizer.model_max_length + 10,
)
@require_torch
@require_accelerate
@require_torch_accelerator
def test_small_model_pt_bloom_accelerate(self):
import torch
# Classic `model_kwargs`
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
model_kwargs={"device_map": "auto", "dtype": torch.bfloat16},
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# Upgraded those two to real pipeline arguments (they just get sent for the model as they're unlikely to mean anything else.)
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map="auto",
dtype=torch.bfloat16,
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# dtype will be automatically set to torch.bfloat16 if not provided - check: https://github.com/huggingface/transformers/pull/38882
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom", device_map="auto", max_new_tokens=5, do_sample=False
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
@require_torch
@require_torch_accelerator
def test_small_model_fp16(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test")
@require_torch
@require_accelerate
@require_torch_accelerator
def test_pipeline_accelerate_top_p(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test", do_sample=True, top_p=0.5)
def test_pipeline_length_setting_warning(self):
prompt = """Hello world"""
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5)
logger = logging.get_logger("transformers.generation.utils")
logger_msg = "Both `max_new_tokens`" # The beginning of the message to be checked in this test
# Both are set by the user -> log warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=1)
self.assertIn(logger_msg, cl.out)
# The user only sets one -> no warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_new_tokens=1)
self.assertNotIn(logger_msg, cl.out)
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=None)
self.assertNotIn(logger_msg, cl.out)
def test_return_dict_in_generate(self):
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=2)
out = text_generator(
["This is great !", "Something else"], return_dict_in_generate=True, output_logits=True, output_scores=True
)
self.assertEqual(
out,
[
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
],
)
@require_torch
def test_pipeline_assisted_generation(self):
"""Tests that we can run assisted generation in the pipeline"""
model = "hf-internal-testing/tiny-random-MistralForCausalLM"
pipe = pipeline("text-generation", model=model, assistant_model=model, max_new_tokens=2)
# We can run the pipeline
prompt = "Hello world"
_ = pipe(prompt)
# It is running assisted generation under the hood (e.g. flags incompatible with assisted gen will crash)
with self.assertRaises(ValueError):
_ = pipe(prompt, generate_kwargs={"num_beams": 2})
@require_torch
def test_pipeline_skip_special_tokens(self):
"""Tests that we can use `skip_special_tokens=False` to get the special tokens in the output"""
model_id = "google/gemma-3-270m-it"
chat = [{"role": "user", "content": "What's your name?"}]
generator = pipeline("text-generation", model=model_id)
# normal pipeline use
output = generator(chat, max_new_tokens=20, do_sample=False)
self.assertNotIn("<end_of_turn>", str(output[0]["generated_text"]))
# forcing special tokens to be included in the output
output = generator(chat, max_new_tokens=1000, do_sample=False, skip_special_tokens=False)
self.assertIn("<end_of_turn>", str(output[0]["generated_text"]))
@require_torch
def test_forward_tokenizer_kwargs(self):
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
model = "hf-internal-testing/tiny-gpt2-with-chatml-template"
text_generator = pipeline("text-generation", model, max_new_tokens=5)
tokenizer = text_generator.tokenizer
with patch.object(tokenizer, "apply_chat_template", wraps=tokenizer.apply_chat_template) as mock:
text_generator(chat, tokenizer_encode_kwargs={"enable_thinking": True})
self.assertGreater(mock.call_count, 0)
kw_call_args = mock.call_args[1]
self.assertIn("enable_thinking", kw_call_args)
self.assertEqual(kw_call_args["enable_thinking"], True)

View File

@@ -0,0 +1,382 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import torch
from transformers import (
MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING,
AutoProcessor,
TextToAudioPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
class TextToAudioPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING
# for now only test text_to_waveform and not text_to_spectrogram
@require_torch
def test_small_speecht5_pt(self):
audio_generator = pipeline(task="text-to-audio", model="microsoft/speecht5_tts")
num_channels = 1 # model generates mono audio
forward_params = {
"do_sample": True,
"semantic_max_new_tokens": 5,
"speaker_embeddings": torch.rand(1, 512) * 0.2 - 0.1,
}
outputs = audio_generator("This is a test", forward_params=forward_params)
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 16000}, outputs)
self.assertEqual(len(outputs["audio"].shape), num_channels)
# test two examples side-by-side
outputs = audio_generator(["This is a test", "This is a second test"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching, this time with parameterization in the forward pass
audio_generator = pipeline(task="text-to-audio", model="microsoft/speecht5_tts")
forward_params = {
"do_sample": False,
"max_new_tokens": 5,
"speaker_embeddings": torch.rand(1, 512) * 0.2 - 0.1,
}
outputs = audio_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@require_torch
def test_small_musicgen_pt(self):
music_generator = pipeline(
task="text-to-audio", model="facebook/musicgen-small", do_sample=False, max_new_tokens=5
)
num_channels = 1 # model generates mono audio
outputs = music_generator("This is a test")
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 32000}, outputs)
self.assertEqual(len(outputs["audio"].shape), num_channels)
# test two examples side-by-side
outputs = music_generator(["This is a test", "This is a second test"])
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching, this time with parameterization in the forward pass
music_generator = pipeline(task="text-to-audio", model="facebook/musicgen-small")
forward_params = {"do_sample": False, "max_new_tokens": 5}
outputs = music_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_medium_seamless_m4t_pt(self):
speech_generator = pipeline(task="text-to-audio", model="facebook/hf-seamless-m4t-medium", max_new_tokens=5)
for forward_params in [{"tgt_lang": "eng"}, {"return_intermediate_token_ids": True, "tgt_lang": "eng"}]:
outputs = speech_generator("This is a test", forward_params=forward_params)
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 16000}, outputs)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_small_bark_pt(self):
speech_generator = pipeline(task="text-to-audio", model="suno/bark-small")
num_channels = 1 # model generates mono audio
forward_params = {
# Using `do_sample=False` to force deterministic output
"do_sample": False,
"semantic_max_new_tokens": 5,
}
outputs = speech_generator("This is a test", forward_params=forward_params)
self.assertEqual(
{"audio": ANY(np.ndarray), "sampling_rate": 24000},
outputs,
)
self.assertEqual(len(outputs["audio"].shape), num_channels)
# test two examples side-by-side
outputs = speech_generator(
["This is a test", "This is a second test"],
forward_params=forward_params,
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test other generation strategy
forward_params = {
"do_sample": True,
"semantic_max_new_tokens": 5,
"semantic_num_return_sequences": 2,
}
outputs = speech_generator("This is a test", forward_params=forward_params)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# test using a speaker embedding
processor = AutoProcessor.from_pretrained("suno/bark-small")
temp_inp = processor("hey, how are you?", voice_preset="v2/en_speaker_5")
history_prompt = temp_inp["history_prompt"]
forward_params["history_prompt"] = history_prompt
outputs = speech_generator(
["This is a test", "This is a second test"],
forward_params=forward_params,
batch_size=2,
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch_accelerator
def test_conversion_additional_tensor(self):
speech_generator = pipeline(task="text-to-audio", model="suno/bark-small", device=torch_device)
processor = AutoProcessor.from_pretrained("suno/bark-small")
forward_params = {
"do_sample": True,
"semantic_max_new_tokens": 5,
}
# atm, must do to stay coherent with BarkProcessor
preprocess_params = {
"max_length": 256,
"add_special_tokens": False,
"return_attention_mask": True,
"return_token_type_ids": False,
}
outputs = speech_generator(
"This is a test",
forward_params=forward_params,
preprocess_params=preprocess_params,
)
temp_inp = processor("hey, how are you?", voice_preset="v2/en_speaker_5")
history_prompt = temp_inp["history_prompt"]
forward_params["history_prompt"] = history_prompt
# history_prompt is a torch.Tensor passed as a forward_param
# if generation is successful, it means that it was passed to the right device
outputs = speech_generator(
"This is a test", forward_params=forward_params, preprocess_params=preprocess_params
)
self.assertEqual(
{"audio": ANY(np.ndarray), "sampling_rate": 24000},
outputs,
)
@require_torch
def test_vits_model_pt(self):
speech_generator = pipeline(task="text-to-audio", model="facebook/mms-tts-eng")
outputs = speech_generator("This is a test")
self.assertEqual(outputs["sampling_rate"], 16000)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"])
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(["This is a test", "This is a second test"], batch_size=2)
self.assertEqual(ANY(np.ndarray), outputs[0]["audio"])
@require_torch
def test_forward_model_kwargs(self):
# use vits - a forward model
speech_generator = pipeline(task="text-to-audio", model="kakao-enterprise/vits-vctk")
# for reproducibility
set_seed(555)
outputs = speech_generator("This is a test", forward_params={"speaker_id": 5})
audio = outputs["audio"]
with self.assertRaises(TypeError):
# assert error if generate parameter
outputs = speech_generator("This is a test", forward_params={"speaker_id": 5, "do_sample": True})
forward_params = {"speaker_id": 5}
generate_kwargs = {"do_sample": True}
with self.assertRaises(ValueError):
# assert error if generate_kwargs with forward-only models
outputs = speech_generator(
"This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs
)
self.assertTrue(np.abs(outputs["audio"] - audio).max() < 1e-5)
@require_torch
def test_generative_model_kwargs(self):
# use musicgen - a generative model
music_generator = pipeline(task="text-to-audio", model="facebook/musicgen-small")
forward_params = {
"do_sample": True,
"max_new_tokens": 20,
}
# for reproducibility
set_seed(555)
outputs = music_generator("This is a test", forward_params=forward_params)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# make sure generate kwargs get priority over forward params
forward_params = {
"do_sample": False,
"max_new_tokens": 20,
}
generate_kwargs = {"do_sample": True}
# for reproducibility
set_seed(555)
outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs)
self.assertTrue(np.abs(outputs["audio"] - audio).max() < 1e-5)
@slow
@require_torch
def test_csm_model_pt(self):
speech_generator = pipeline(task="text-to-audio", model="sesame/csm-1b", device=torch_device)
generate_kwargs = {"max_new_tokens": 10}
num_channels = 1 # model generates mono audio
outputs = speech_generator("This is a test", generate_kwargs=generate_kwargs)
self.assertEqual(outputs["sampling_rate"], 24000)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# ensure audio and not discrete codes
self.assertEqual(len(audio.shape), num_channels)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"], generate_kwargs=generate_kwargs)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
self.assertEqual(len(audio[0].shape), num_channels)
# test batching
batch_size = 2
outputs = speech_generator(
["This is a test", "This is a second test"], generate_kwargs=generate_kwargs, batch_size=batch_size
)
self.assertEqual(len(outputs), batch_size)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
self.assertEqual(len(outputs[0]["audio"].shape), num_channels)
@slow
@require_torch
def test_dia_model(self):
speech_generator = pipeline(task="text-to-audio", model="nari-labs/Dia-1.6B-0626", device=torch_device)
generate_kwargs = {"max_new_tokens": 20}
num_channels = 1 # model generates mono audio
outputs = speech_generator("Dia is an open weights text to dialogue model.", generate_kwargs=generate_kwargs)
self.assertEqual(outputs["sampling_rate"], 44100)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# ensure audio (with one channel) and not discrete codes
self.assertEqual(len(audio.shape), num_channels)
# test two examples side-by-side
outputs = speech_generator(
["Dia is an open weights text to dialogue model.", "This is a second example."],
generate_kwargs=generate_kwargs,
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
self.assertEqual(len(audio[0].shape), num_channels)
# test batching
batch_size = 2
outputs = speech_generator(
["Dia is an open weights text to dialogue model.", "This is a second example."],
generate_kwargs=generate_kwargs,
batch_size=2,
)
self.assertEqual(len(outputs), batch_size)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
self.assertEqual(len(outputs[0]["audio"].shape), num_channels)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
model_test_kwargs = {}
if model.can_generate(): # not all models in this pipeline can generate and, therefore, take `generate` kwargs
model_test_kwargs["max_new_tokens"] = 5
model.config._attn_implementation = "eager"
speech_generator = TextToAudioPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
**model_test_kwargs,
)
return speech_generator, ["This is a test", "Another test"]
def run_pipeline_test(self, speech_generator, _):
outputs = speech_generator("This is a test")
self.assertEqual(ANY(np.ndarray), outputs["audio"])
forward_params = (
{"num_return_sequences": 2, "do_sample": True} if speech_generator.model.can_generate() else {}
)
outputs = speech_generator(["This is great !", "Something else"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from huggingface_hub import VideoClassificationOutputElement, hf_hub_download
from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEImageProcessor
from transformers.pipelines import VideoClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_av,
require_torch,
require_vision,
)
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
@require_vision
@require_av
class VideoClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
example_video_filepath = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls.example_video_filepath is None:
cls.example_video_filepath = hf_hub_download(
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
self._load_dataset()
video_classifier = VideoClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
top_k=2,
)
examples = [
self.example_video_filepath,
# TODO: re-enable this once we have a stable hub solution for CI
# "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
]
return video_classifier, examples
def run_pipeline_test(self, video_classifier, examples):
for example in examples:
outputs = video_classifier(example)
self.assertEqual(
outputs,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
for element in outputs:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)
@require_torch
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification"
small_feature_extractor = VideoMAEImageProcessor(
size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10}
)
video_classifier = pipeline(
"video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4
)
video_file_path = hf_hub_download(repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset")
output = video_classifier(video_file_path, top_k=2)
self.assertEqual(
nested_simplify(output, decimals=4),
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
)
for element in output:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)
outputs = video_classifier(
[
video_file_path,
video_file_path,
],
top_k=2,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
],
)
for output in outputs:
for element in output:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)

View File

@@ -0,0 +1,288 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
Pipeline,
ZeroShotClassificationPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
class ZeroShotClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if not hasattr(model_mapping, "is_dummy"):
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ not in _TO_SKIP}
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
classifier = ZeroShotClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
candidate_labels=["polics", "health"],
)
return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]
def run_pipeline_test(self, classifier, _):
outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics")
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
# No kwarg
outputs = classifier("Who are you voting for in 2020?", ["politics"])
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics"])
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics, public health")
self.assertEqual(
outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
)
self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics", "public health"])
self.assertEqual(
outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
)
self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
outputs = classifier(
"Who are you voting for in 2020?", candidate_labels="politics", hypothesis_template="This text is about {}"
)
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
# https://github.com/huggingface/transformers/issues/13846
outputs = classifier(["I am happy"], ["positive", "negative"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
for i in range(1)
],
)
outputs = classifier(["I am happy", "I am sad"], ["positive", "negative"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
for i in range(2)
],
)
with self.assertRaises(ValueError):
classifier("", candidate_labels="politics")
with self.assertRaises(TypeError):
classifier(None, candidate_labels="politics")
with self.assertRaises(ValueError):
classifier("Who are you voting for in 2020?", candidate_labels="")
with self.assertRaises(TypeError):
classifier("Who are you voting for in 2020?", candidate_labels=None)
with self.assertRaises(ValueError):
classifier(
"Who are you voting for in 2020?",
candidate_labels="politics",
hypothesis_template="Not formatting template",
)
with self.assertRaises(AttributeError):
classifier(
"Who are you voting for in 2020?",
candidate_labels="politics",
hypothesis_template=None,
)
self.run_entailment_id(classifier)
def run_entailment_id(self, zero_shot_classifier: Pipeline):
config = zero_shot_classifier.model.config
original_label2id = config.label2id
original_entailment = zero_shot_classifier.entailment_id
config.label2id = {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}
self.assertEqual(zero_shot_classifier.entailment_id, -1)
config.label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 0, "NON-ENTAIL": 1}
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 2, "NEUTRAL": 1, "CONTR": 0}
self.assertEqual(zero_shot_classifier.entailment_id, 2)
zero_shot_classifier.model.config.label2id = original_label2id
self.assertEqual(original_entailment, zero_shot_classifier.entailment_id)
@require_torch
def test_truncation(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
)
# There was a regression in 4.10 for this
# Adding a test so we don't make the mistake again.
# https://github.com/huggingface/transformers/issues/13381#issuecomment-912343499
zero_shot_classifier(
"Who are you voting for in 2020?" * 100, candidate_labels=["politics", "public health", "science"]
)
@require_torch
def test_small_model_pt(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@require_torch
def test_small_model_pt_fp16(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.float16,
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@require_torch
def test_small_model_pt_bf16(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.bfloat16,
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@slow
@require_torch
def test_large_model_pt(self):
zero_shot_classifier = pipeline("zero-shot-classification", model="FacebookAI/roberta-large-mnli")
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["politics", "public health", "science"],
"scores": [0.976, 0.015, 0.009],
},
)
outputs = zero_shot_classifier(
"The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
" in an encoder-decoder configuration. The best performing models also connect the encoder and decoder"
" through an attention mechanism. We propose a new simple network architecture, the Transformer, based"
" solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two"
" machine translation tasks show these models to be superior in quality while being more parallelizable"
" and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014"
" English-to-German translation task, improving over the existing best results, including ensembles by"
" over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new"
" single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small"
" fraction of the training costs of the best models from the literature. We show that the Transformer"
" generalizes well to other tasks by applying it successfully to English constituency parsing both with"
" large and limited training data.",
candidate_labels=["machine learning", "statistics", "translation", "vision"],
multi_label=True,
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": (
"The dominant sequence transduction models are based on complex recurrent or convolutional neural"
" networks in an encoder-decoder configuration. The best performing models also connect the"
" encoder and decoder through an attention mechanism. We propose a new simple network"
" architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence"
" and convolutions entirely. Experiments on two machine translation tasks show these models to be"
" superior in quality while being more parallelizable and requiring significantly less time to"
" train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task,"
" improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014"
" English-to-French translation task, our model establishes a new single-model state-of-the-art"
" BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training"
" costs of the best models from the literature. We show that the Transformer generalizes well to"
" other tasks by applying it successfully to English constituency parsing both with large and"
" limited training data."
),
"labels": ["translation", "machine learning", "vision", "statistics"],
"scores": [0.817, 0.713, 0.018, 0.018],
},
)

View File

@@ -0,0 +1,92 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from datasets import load_dataset
from transformers.pipelines import pipeline
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch, slow
@is_pipeline_test
@require_torch
class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
# Deactivating auto tests since we don't have a good MODEL_FOR_XX mapping,
# and only CLAP would be there for now.
# model_mapping = {CLAPConfig: CLAPModel}
@require_torch
def test_small_model_pt(self, dtype="float32"):
audio_classifier = pipeline(
task="zero-shot-audio-classification",
model="hf-internal-testing/tiny-clap-htsat-unfused",
dtype=dtype,
)
dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
audio = dataset["train"]["audio"][-1]["array"]
output = audio_classifier(audio, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}],
)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt(self):
audio_classifier = pipeline(
task="zero-shot-audio-classification",
model="laion/clap-htsat-unfused",
)
# This is an audio of a dog
dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
audio = dataset["train"]["audio"][-1]["array"]
output = audio_classifier(audio, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
)
output = audio_classifier([audio] * 5, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
]
* 5,
)
output = audio_classifier(
[audio] * 5, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"], batch_size=5
)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
]
* 5,
)

View File

@@ -0,0 +1,248 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from huggingface_hub import ZeroShotImageClassificationOutputElement
from transformers import is_vision_available
from transformers.pipelines import pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
# Deactivating auto tests since we don't have a good MODEL_FOR_XX mapping,
# and only CLIP would be there for now.
# model_mapping = {CLIPConfig: CLIPModel}
# def get_test_pipeline(self, model, tokenizer, processor):
# if tokenizer is None:
# # Side effect of no Fast Tokenizer class for these model, so skipping
# # But the slow tokenizer test should still run as they're quite small
# self.skipTest(reason="No tokenizer available")
# return
# # return None, None
# image_classifier = ZeroShotImageClassificationPipeline(
# model=model, tokenizer=tokenizer, feature_extractor=processor
# )
# # test with a raw waveform
# image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# image2 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# return image_classifier, [image, image2]
# def run_pipeline_test(self, pipe, examples):
# image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# outputs = pipe(image, candidate_labels=["A", "B"])
# self.assertEqual(outputs, {"text": ANY(str)})
# # Batching
# outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])
@require_torch
def test_small_model_pt(self, dtype="float32"):
image_classifier = pipeline(
model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", dtype=dtype
)
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["a", "b", "c"])
# The floating scores are so close, we enter floating error approximation and the order is not guaranteed across
# python and torch versions.
self.assertIn(
nested_simplify(output),
[
[{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}],
[{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}, {"score": 0.333, "label": "b"}],
[{"score": 0.333, "label": "b"}, {"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}],
],
)
output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2)
self.assertEqual(
nested_simplify(output),
# Pipeline outputs are supposed to be deterministic and
# So we could in theory have real values "A", "B", "C" instead
# of ANY(str).
# However it seems that in this particular case, the floating
# scores are so close, we enter floating error approximation
# and the order is not guaranteed anymore with batching.
[
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
],
)
for single_output in output:
for element in single_output:
compare_pipeline_output_to_hub_spec(element, ZeroShotImageClassificationOutputElement)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="openai/clip-vit-base-patch32",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["cat", "plane", "remote"])
self.assertEqual(
nested_simplify(output),
[
{"score": 0.511, "label": "remote"},
{"score": 0.485, "label": "cat"},
{"score": 0.004, "label": "plane"},
],
)
output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.511, "label": "remote"},
{"score": 0.485, "label": "cat"},
{"score": 0.004, "label": "plane"},
],
]
* 5,
)
@slow
@require_torch
def test_siglip_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="google/siglip-base-patch16-224",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["2 cats", "a plane", "a remote"])
self.assertEqual(
nested_simplify(output),
[
{"score": 0.198, "label": "2 cats"},
{"score": 0.0, "label": "a remote"},
{"score": 0.0, "label": "a plane"},
],
)
output = image_classifier([image] * 5, candidate_labels=["2 cats", "a plane", "a remote"], batch_size=2)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.198, "label": "2 cats"},
{"score": 0.0, "label": "a remote"},
{"score": 0.0, "label": "a plane"},
]
]
* 5,
)
@slow
@require_torch
def test_blip2_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="Salesforce/blip2-itm-vit-g",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(
image,
candidate_labels=["2 cats", "a plane", "a remote"],
tokenizer_kwargs={"return_token_type_ids": False},
)
self.assertEqual(
nested_simplify(output),
[
{"score": 0.369, "label": "2 cats"},
{"score": 0.333, "label": "a remote"},
{"score": 0.297, "label": "a plane"},
],
)
output = image_classifier(
[image] * 5,
candidate_labels=["2 cats", "a plane", "a remote"],
batch_size=2,
tokenizer_kwargs={"return_token_type_ids": False},
)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.369, "label": "2 cats"},
{"score": 0.333, "label": "a remote"},
{"score": 0.297, "label": "a plane"},
]
]
* 5,
)

View File

@@ -0,0 +1,235 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING,
ZeroShotObjectDetectionPipeline,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
@require_torch
class ZeroShotObjectDetectionPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
object_detector = ZeroShotObjectDetectionPipeline(
model=model,
processor=processor,
tokenizer=tokenizer,
image_processor=image_processor,
dtype=dtype,
)
examples = [
{
"image": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"candidate_labels": ["cat", "remote", "couch"],
}
]
return object_detector, examples
def run_pipeline_test(self, object_detector, examples):
outputs = object_detector(examples[0].get("image"), examples[0].get("candidate_labels"), threshold=0.0)
n = len(outputs)
self.assertGreater(n, 0)
self.assertEqual(
outputs,
[
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
}
for i in range(n)
],
)
@require_torch
def test_small_model_pt(self):
object_detector = pipeline(
"zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
)
outputs = object_detector(
"./tests/fixtures/tests_samples/COCO/000000039769.png",
candidate_labels=["cat", "remote", "couch"],
threshold=0.64,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.7235, "label": "cat", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7218, "label": "remote", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7184, "label": "couch", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.6748, "label": "remote", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6656, "label": "cat", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6614, "label": "couch", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6456, "label": "remote", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
{"score": 0.642, "label": "remote", "box": {"xmin": 67, "ymin": 274, "xmax": 93, "ymax": 297}},
{"score": 0.6419, "label": "cat", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
],
)
outputs = object_detector(
[
{
"image": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"candidate_labels": ["cat", "remote", "couch"],
}
],
threshold=0.64,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.7235, "label": "cat", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7218, "label": "remote", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7184, "label": "couch", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.6748, "label": "remote", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6656, "label": "cat", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6614, "label": "couch", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6456, "label": "remote", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
{"score": 0.642, "label": "remote", "box": {"xmin": 67, "ymin": 274, "xmax": 93, "ymax": 297}},
{"score": 0.6419, "label": "cat", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
]
],
)
@require_torch
@slow
def test_large_model_pt(self):
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
)
outputs = object_detector(
[
{
"image": "http://images.cocodataset.org/val2017/000000039769.jpg",
"candidate_labels": ["cat", "remote", "couch"],
},
{
"image": "http://images.cocodataset.org/val2017/000000039769.jpg",
"candidate_labels": ["cat", "remote", "couch"],
},
],
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
],
)
@require_torch
@slow
def test_threshold(self):
threshold = 0.2
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
threshold=threshold,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
],
)
@require_torch
@slow
def test_top_k(self):
top_k = 2
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
top_k=top_k,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
],
)