# Copyright 2025 Bytedance-Seed Ltd and the HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Testing suite for the PyTorch SeedOss model.""" import unittest import pytest from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available from transformers.testing_utils import ( cleanup, require_flash_attn, require_torch, require_torch_large_accelerator, slow, torch_device, ) from ...causal_lm_tester import CausalLMModelTest, CausalLMModelTester if is_torch_available(): import torch from transformers import ( SeedOssModel, ) class SeedOssModelTester(CausalLMModelTester): if is_torch_available(): base_model_class = SeedOssModel def __init__(self, parent): super().__init__(parent=parent) # NOTE(3outeille): must be 0.0 for TP backward tests. In train mode, non-zero dropout causes # different RNG states between the non-TP and TP model forward passes (they run sequentially), # leading to different dropout masks and mismatched losses. self.attention_probs_dropout_prob = 0.0 self.attention_dropout = 0.0 self.residual_dropout = 0.0 @require_torch class SeedOssModelTest(CausalLMModelTest, unittest.TestCase): model_tester_class = SeedOssModelTester _is_stateful = True model_split_percents = [0.5, 0.6] @slow @require_torch_large_accelerator class SeedOssIntegrationTest(unittest.TestCase): input_text = ["How to make pasta?", "Hi ByteDance-Seed"] model_id = "ByteDance-Seed/Seed-OSS-36B-Base" def setUp(self): cleanup(torch_device, gc_collect=True) def tearDown(self): cleanup(torch_device, gc_collect=True) def test_model_36b_eager(self): EXPECTED_TEXTS = [ "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over", "Hi ByteDance-Seed team,\nI am trying to run the code on the seed", ] model = AutoModelForCausalLM.from_pretrained( "ByteDance-Seed/Seed-OSS-36B-Base", torch_dtype=torch.bfloat16, attn_implementation="eager", device_map="auto", ) tokenizer = AutoTokenizer.from_pretrained(self.model_id) inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to( model.model.embed_tokens.weight.device ) output = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_text = tokenizer.batch_decode(output, skip_special_tokens=True) self.assertEqual(output_text, EXPECTED_TEXTS) def test_model_36b_sdpa(self): EXPECTED_TEXTS = [ "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over", "Hi ByteDance-Seed team,\nI am trying to run the code on the seed", ] # default attention is `sdpa` (and this model repo. doesn't specify explicitly) --> we get `sdpa` here model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(self.model_id) inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to( model.model.embed_tokens.weight.device ) output = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_text = tokenizer.batch_decode(output, skip_special_tokens=True) self.assertEqual(output_text, EXPECTED_TEXTS) @require_flash_attn @require_torch_large_accelerator @pytest.mark.flash_attn_test def test_model_36b_flash_attn(self): EXPECTED_TEXTS = [ "How to make pasta?\nHow to make pasta?\nPasta is a popular dish that is enjoyed by people all over", "Hi ByteDance-Seed team,\nI am trying to run the code on the seed", ] model = AutoModelForCausalLM.from_pretrained( self.model_id, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(self.model_id) inputs = tokenizer(self.input_text, return_tensors="pt", padding=True, return_token_type_ids=False).to( model.model.embed_tokens.weight.device ) output = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_text = tokenizer.batch_decode(output, skip_special_tokens=True) self.assertEqual(output_text, EXPECTED_TEXTS)