first commit

2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions
--- a/tests/models/hy_v3/test_modeling_hy_v3.py
+++ b/tests/models/hy_v3/test_modeling_hy_v3.py
@@ -0,0 +1,128 @@
+# Copyright 2026 Tencent HunYuan Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for HYV3 (MoE language model) configuration and modeling."""
+
+import unittest
+
+from transformers import is_torch_available
+from transformers.testing_utils import (
+    Expectations,
+    cleanup,
+    require_torch,
+    require_torch_accelerator,
+    slow,
+    torch_device,
+)
+
+
+if is_torch_available():
+    import torch
+
+    from transformers.models.hy_v3.modeling_hy_v3 import HYV3ForCausalLM, HYV3Model
+
+from ...causal_lm_tester import CausalLMModelTest, CausalLMModelTester
+
+
+class HYV3ModelTester(CausalLMModelTester):
+    if is_torch_available():
+        base_model_class = HYV3Model
+
+
+@require_torch
+class HYV3ModelTest(CausalLMModelTest, unittest.TestCase):
+    model_tester_class = HYV3ModelTester
+    test_all_params_have_gradient = False
+    model_split_percents = [0.5, 0.8, 0.9]
+
+    def test_router_logits_and_no_aux_loss(self):
+        """HYV3 returns router_logits but does not compute aux_loss (always None)."""
+        config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config.output_router_logits = True
+
+        for model_class in self.all_model_classes:
+            model = model_class(config).to(torch_device).eval()
+            with torch.no_grad():
+                result = model(**input_dict)
+
+            if hasattr(result, "router_logits") and result.router_logits is not None:
+                num_moe_layers = sum(1 for t in config.mlp_layer_types if t == "sparse")
+                self.assertEqual(len(result.router_logits), num_moe_layers)
+                for rl in result.router_logits:
+                    self.assertEqual(rl.shape[-1], config.num_experts)
+
+            if hasattr(result, "aux_loss"):
+                self.assertIsNone(result.aux_loss)
+
+
+@slow
+@require_torch
+class HYV3IntegrationTest(unittest.TestCase):
+    """Integration tests for HYV3 with a small randomized model."""
+
+    model_id = "hf-internal-testing/HYV3-tiny-random"
+
+    def setup(self):
+        cleanup(torch_device, gc_collect=True)
+
+    def tearDown(self):
+        cleanup(torch_device, gc_collect=True)
+
+    @require_torch_accelerator
+    def test_small_model_logits_batched(self):
+        dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
+        attention_mask = dummy_input.ne(0).to(torch.long)
+
+        model = HYV3ForCausalLM.from_pretrained(self.model_id, dtype=torch.bfloat16).to(torch_device)
+
+        EXPECTED_LOGITS_LEFT_UNPADDED = Expectations(
+            {
+                ("cuda", (8, 6)): [[0.0608, -0.0933, 0.1348], [-0.0688, -0.1099, 0.1396], [0.0199, -0.0913, 0.1641]],
+                ("cuda", 9): [[0.063, -0.0938, 0.1348], [-0.0693, -0.1128, 0.1357], [0.0209, -0.0923, 0.1611]],
+                ("xpu", 3): [[0.0623, -0.0923, 0.1348], [-0.0684, -0.1108, 0.1338], [0.0201, -0.0938, 0.1611]],
+            }
+        )
+        expected_left_unpadded = torch.tensor(EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation(), device=torch_device)
+
+        EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations(
+            {
+                ("cuda", (8, 6)): [[-0.0396, -0.1084, 0.0588], [-0.0100, -0.0903, 0.0747], [0.0645, -0.1172, 0.0508]],
+                ("cuda", 9): [[-0.0378, -0.1089, 0.0581], [-0.0088, -0.0908, 0.0752], [0.064, -0.1167, 0.0483]],
+                ("xpu", 3): [[-0.0376, -0.1084, 0.0586], [-0.0087, -0.0903, 0.0767], [0.0674, -0.1172, 0.0481]],
+            }
+        )
+        expected_right_unpadded = torch.tensor(EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation(), device=torch_device)
+
+        with torch.no_grad():
+            logits = model(dummy_input, attention_mask=attention_mask).logits
+        logits = logits.float()
+
+        torch.testing.assert_close(logits[0, -3:, :3], expected_left_unpadded, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(logits[1, -3:, :3], expected_right_unpadded, atol=1e-3, rtol=1e-3)
+
+    @require_torch_accelerator
+    def test_small_model_generation(self):
+        EXPECTED_TOKENS = Expectations(
+            {
+                ("cuda", 9): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 8754],
+                ("cuda", (8, 6)): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 11262],
+                ("xpu", 3): [1, 2, 3, 8754, 20977, 8754, 8754, 8754, 8754, 8754, 8754, 8754, 8372, 8754, 8372, 21393, 8754, 8372, 21393, 8754, 8372, 21393, 8754],
+            }
+        )  # fmt: skip
+        expected_tokens = EXPECTED_TOKENS.get_expectation()
+
+        model = HYV3ForCausalLM.from_pretrained(self.model_id, dtype=torch.bfloat16).to(torch_device)
+        input_ids = torch.LongTensor([[1, 2, 3]]).to(torch_device)
+
+        generated_ids = model.generate(input_ids, max_new_tokens=20, do_sample=False)
+        self.assertEqual(generated_ids[0].tolist(), expected_tokens)