Files
transformers/tests/pipelines/test_pipelines_text_generation.py
陈赣 06f1fd69a6
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
first commit
2026-06-05 16:53:03 +08:00

618 lines
24 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest.mock import patch
from transformers import (
MODEL_FOR_CAUSAL_LM_MAPPING,
TextGenerationPipeline,
logging,
pipeline,
)
from transformers.testing_utils import (
CaptureLogger,
is_pipeline_test,
require_accelerate,
require_torch,
require_torch_accelerator,
torch_device,
)
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
class TextGenerationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
@require_torch
def test_small_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-random-LlamaForCausalLM",
max_new_tokens=10,
)
# Using `do_sample=False` to force deterministic output
outputs = text_generator("This is a test", do_sample=False)
self.assertEqual(outputs, [{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}])
outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
self.assertEqual(
outputs,
[
[{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}],
[{"generated_text": "This is a second testкт MéxicoWSAnimImportдели Düsseld bootstrap learn user"}],
],
)
outputs = text_generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True)
self.assertEqual(
outputs,
[
{"generated_token_ids": ANY(list)},
{"generated_token_ids": ANY(list)},
],
)
@require_torch
def test_small_chat_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat1},
],
)
outputs = text_generator([chat1, chat2], do_sample=False, max_new_tokens=10)
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
@require_torch
def test_small_chat_model_continue_final_message(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{"role": "assistant", "content": "This is"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{
"role": "assistant",
"content": "This is stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_continue_final_message_override(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10, continue_final_message=True)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{
"role": "user",
"content": "This is a test stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_with_dataset_pt(self):
from torch.utils.data import Dataset
from transformers.pipelines.pt_utils import KeyDataset
class MyDataset(Dataset):
data = [
[
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
],
]
def __len__(self):
return 1
def __getitem__(self, i):
return {"text": self.data[i]}
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
dataset = MyDataset()
key_dataset = KeyDataset(dataset, "text")
for outputs in text_generator(key_dataset, do_sample=False, max_new_tokens=10):
expected_chat = dataset.data[0] + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat},
],
)
@require_torch
def test_small_chat_model_with_iterator_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
def data():
yield from [chat1, chat2]
outputs = text_generator(data(), do_sample=False, max_new_tokens=10)
outputs = list(outputs)
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
@require_torch
def test_small_chat_model_with_response_parsing(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
text_generator.tokenizer.response_schema = {
# A real response schema should probably have things like "role" and "content"
# and "reasoning_content" but it's unlikely we'd get a tiny model to reliably
# output anything like that, so let's keep it simple.
"type": "object",
"properties": {
"first_word": {"type": "string", "x-regex": r"^\s*([a-zA-Z]+)"},
"last_word": {"type": "string", "x-regex": r"([a-zA-Z]+)\s*$"},
},
}
outputs = text_generator(chat, do_sample=False, max_new_tokens=10)
parsed_message = outputs[0]["generated_text"][-1]
self.assertEqual(parsed_message, {"first_word": "factors", "last_word": "factors"})
@require_torch
def test_return_full_text_false_with_chat_template(self):
"""Regression test for #45854: return_full_text=False must not include prompt when using chat template."""
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat, do_sample=False, max_new_tokens=10, return_full_text=False)
generated = outputs[0]["generated_text"]
# Must return plain string, not a list of message dicts
self.assertIsInstance(generated, str)
# Must not contain the prompt content
self.assertNotIn("This is a test", generated)
self.assertNotIn("This is a system message.", generated)
@require_torch
def test_return_full_text_true_with_chat_template(self):
"""return_full_text=True (default) must still return full chat list with chat template."""
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat, do_sample=False, max_new_tokens=10, return_full_text=True)
generated = outputs[0]["generated_text"]
# Must return list of message dicts including original messages
self.assertIsInstance(generated, list)
roles = [m["role"] for m in generated]
self.assertIn("user", roles)
self.assertIn("assistant", roles)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
text_generator = TextGenerationPipeline(
model=model,
tokenizer=tokenizer,
dtype=dtype,
max_new_tokens=5,
)
return text_generator, ["This is a test", "Another test"]
def test_stop_sequence_stopping_criteria(self):
prompt = """Hello I believe in"""
text_generator = pipeline(
"text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5, do_sample=False
)
output = text_generator(prompt)
self.assertEqual(
output,
[{"generated_text": "Hello I believe in fe fe fe fe fe"}],
)
output = text_generator(prompt, stop_sequence=" fe")
self.assertEqual(output, [{"generated_text": "Hello I believe in fe"}])
def run_pipeline_test(self, text_generator, _):
model = text_generator.model
tokenizer = text_generator.tokenizer
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator("This is a test", return_full_text=False)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
text_generator = pipeline(
task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=5
)
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
outputs = text_generator("This is a test", return_full_text=True)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
if text_generator.tokenizer.pad_token is not None:
outputs = text_generator(
["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_text=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_tensors=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_text=True, return_tensors=True)
# Empty prompt is slightly special
# it requires BOS token to exist.
# Special case for Pegasus which will always append EOS so will
# work even without BOS.
if (
text_generator.tokenizer.bos_token_id is not None
or "Pegasus" in tokenizer.__class__.__name__
or "Git" in model.__class__.__name__
):
outputs = text_generator("")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
else:
with self.assertRaises((ValueError, AssertionError)):
outputs = text_generator("", add_special_tokens=False)
# We don't care about infinite range models.
# They already work.
# Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly.
EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = [
"RwkvForCausalLM",
"XGLMForCausalLM",
"GPTNeoXForCausalLM",
"GPTNeoXJapaneseForCausalLM",
"FuyuForCausalLM",
"LlamaForCausalLM",
]
if (
tokenizer.model_max_length < 10000
and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS
):
# Handling of large generations
if str(text_generator.device) == "cpu":
with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
text_generator("This is a test" * 500, max_new_tokens=5)
outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=5)
# Hole strategy cannot work
if str(text_generator.device) == "cpu":
with self.assertRaises(ValueError):
text_generator(
"This is a test" * 500,
handle_long_generation="hole",
max_new_tokens=tokenizer.model_max_length + 10,
)
@require_torch
@require_accelerate
@require_torch_accelerator
def test_small_model_pt_bloom_accelerate(self):
import torch
# Classic `model_kwargs`
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
model_kwargs={"device_map": "auto", "dtype": torch.bfloat16},
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# Upgraded those two to real pipeline arguments (they just get sent for the model as they're unlikely to mean anything else.)
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map="auto",
dtype=torch.bfloat16,
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# dtype will be automatically set to torch.bfloat16 if not provided - check: https://github.com/huggingface/transformers/pull/38882
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom", device_map="auto", max_new_tokens=5, do_sample=False
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
@require_torch
@require_torch_accelerator
def test_small_model_fp16(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test")
@require_torch
@require_accelerate
@require_torch_accelerator
def test_pipeline_accelerate_top_p(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test", do_sample=True, top_p=0.5)
def test_pipeline_length_setting_warning(self):
prompt = """Hello world"""
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5)
logger = logging.get_logger("transformers.generation.utils")
logger_msg = "Both `max_new_tokens`" # The beginning of the message to be checked in this test
# Both are set by the user -> log warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=1)
self.assertIn(logger_msg, cl.out)
# The user only sets one -> no warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_new_tokens=1)
self.assertNotIn(logger_msg, cl.out)
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=None)
self.assertNotIn(logger_msg, cl.out)
def test_return_dict_in_generate(self):
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=2)
out = text_generator(
["This is great !", "Something else"], return_dict_in_generate=True, output_logits=True, output_scores=True
)
self.assertEqual(
out,
[
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
],
)
@require_torch
def test_pipeline_assisted_generation(self):
"""Tests that we can run assisted generation in the pipeline"""
model = "hf-internal-testing/tiny-random-MistralForCausalLM"
pipe = pipeline("text-generation", model=model, assistant_model=model, max_new_tokens=2)
# We can run the pipeline
prompt = "Hello world"
_ = pipe(prompt)
# It is running assisted generation under the hood (e.g. flags incompatible with assisted gen will crash)
with self.assertRaises(ValueError):
_ = pipe(prompt, generate_kwargs={"num_beams": 2})
@require_torch
def test_pipeline_skip_special_tokens(self):
"""Tests that we can use `skip_special_tokens=False` to get the special tokens in the output"""
model_id = "google/gemma-3-270m-it"
chat = [{"role": "user", "content": "What's your name?"}]
generator = pipeline("text-generation", model=model_id)
# normal pipeline use
output = generator(chat, max_new_tokens=20, do_sample=False)
self.assertNotIn("<end_of_turn>", str(output[0]["generated_text"]))
# forcing special tokens to be included in the output
output = generator(chat, max_new_tokens=1000, do_sample=False, skip_special_tokens=False)
self.assertIn("<end_of_turn>", str(output[0]["generated_text"]))
@require_torch
def test_forward_tokenizer_kwargs(self):
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
model = "hf-internal-testing/tiny-gpt2-with-chatml-template"
text_generator = pipeline("text-generation", model, max_new_tokens=5)
tokenizer = text_generator.tokenizer
with patch.object(tokenizer, "apply_chat_template", wraps=tokenizer.apply_chat_template) as mock:
text_generator(chat, tokenizer_encode_kwargs={"enable_thinking": True})
self.assertGreater(mock.call_count, 0)
kw_call_args = mock.call_args[1]
self.assertIn("enable_thinking", kw_call_args)
self.assertEqual(kw_call_args["enable_thinking"], True)