# Copyright 2026 Tencent HunYuan Team and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for HYV3 (MoE language model) configuration and modeling.""" import unittest from transformers import is_torch_available from transformers.testing_utils import ( Expectations, cleanup, require_torch, require_torch_accelerator, slow, torch_device, ) if is_torch_available(): import torch from transformers.models.hy_v3.modeling_hy_v3 import HYV3ForCausalLM, HYV3Model from ...causal_lm_tester import CausalLMModelTest, CausalLMModelTester class HYV3ModelTester(CausalLMModelTester): if is_torch_available(): base_model_class = HYV3Model @require_torch class HYV3ModelTest(CausalLMModelTest, unittest.TestCase): model_tester_class = HYV3ModelTester test_all_params_have_gradient = False model_split_percents = [0.5, 0.8, 0.9] def test_router_logits_and_no_aux_loss(self): """HYV3 returns router_logits but does not compute aux_loss (always None).""" config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() config.output_router_logits = True for model_class in self.all_model_classes: model = model_class(config).to(torch_device).eval() with torch.no_grad(): result = model(**input_dict) if hasattr(result, "router_logits") and result.router_logits is not None: num_moe_layers = sum(1 for t in config.mlp_layer_types if t == "sparse") self.assertEqual(len(result.router_logits), num_moe_layers) for rl in result.router_logits: self.assertEqual(rl.shape[-1], config.num_experts) if hasattr(result, "aux_loss"): self.assertIsNone(result.aux_loss) @slow @require_torch class HYV3IntegrationTest(unittest.TestCase): """Integration tests for HYV3 with a small randomized model.""" model_id = "hf-internal-testing/HYV3-tiny-random" def setup(self): cleanup(torch_device, gc_collect=True) def tearDown(self): cleanup(torch_device, gc_collect=True) @require_torch_accelerator def test_small_model_logits_batched(self): dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device) attention_mask = dummy_input.ne(0).to(torch.long) model = HYV3ForCausalLM.from_pretrained(self.model_id, dtype=torch.bfloat16).to(torch_device) EXPECTED_LOGITS_LEFT_UNPADDED = Expectations( { ("cuda", (8, 6)): [[0.0608, -0.0933, 0.1348], [-0.0688, -0.1099, 0.1396], [0.0199, -0.0913, 0.1641]], ("cuda", 9): [[0.063, -0.0938, 0.1348], [-0.0693, -0.1128, 0.1357], [0.0209, -0.0923, 0.1611]], ("xpu", 3): [[0.0623, -0.0923, 0.1348], [-0.0684, -0.1108, 0.1338], [0.0201, -0.0938, 0.1611]], } ) expected_left_unpadded = torch.tensor(EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation(), device=torch_device) EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations( { ("cuda", (8, 6)): [[-0.0396, -0.1084, 0.0588], [-0.0100, -0.0903, 0.0747], [0.0645, -0.1172, 0.0508]], ("cuda", 9): [[-0.0378, -0.1089, 0.0581], [-0.0088, -0.0908, 0.0752], [0.064, -0.1167, 0.0483]], ("xpu", 3): [[-0.0376, -0.1084, 0.0586], [-0.0087, -0.0903, 0.0767], [0.0674, -0.1172, 0.0481]], } ) expected_right_unpadded = torch.tensor(EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation(), device=torch_device) with torch.no_grad(): logits = model(dummy_input, attention_mask=attention_mask).logits logits = logits.float() torch.testing.assert_close(logits[0, -3:, :3], expected_left_unpadded, atol=1e-3, rtol=1e-3) torch.testing.assert_close(logits[1, -3:, :3], expected_right_unpadded, atol=1e-3, rtol=1e-3) @require_torch_accelerator def test_small_model_generation(self): EXPECTED_TOKENS = Expectations( { ("cuda", 9): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 8754], ("cuda", (8, 6)): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 11262], ("xpu", 3): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 8754], } ) # fmt: skip expected_tokens = EXPECTED_TOKENS.get_expectation() model = HYV3ForCausalLM.from_pretrained(self.model_id, dtype=torch.bfloat16).to(torch_device) input_ids = torch.LongTensor([[1, 2, 3]]).to(torch_device) generated_ids = model.generate(input_ids, max_new_tokens=20, do_sample=False) self.assertEqual(generated_ids[0].tolist(), expected_tokens)