first commit
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
This commit is contained in:
32
tests/models/herbert/test_tokenization_herbert.py
Normal file
32
tests/models/herbert/test_tokenization_herbert.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright 2020 HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import HerbertTokenizer
|
||||
from transformers.testing_utils import require_tokenizers
|
||||
|
||||
from ...test_tokenization_common import TokenizerTesterMixin
|
||||
|
||||
|
||||
@require_tokenizers
|
||||
class HerbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
from_pretrained_id = "allegro/herbert-base-cased"
|
||||
tokenizer_class = HerbertTokenizer
|
||||
|
||||
integration_expected_tokens = ['T', 'his</w>', 'is</w>', 'a</w>', 'test</w>', '<unk>', 'I</w>', 'was</w>', 'bor', 'n</w>', 'in</w>', '9', '2000</w>', ',</w>', 'and</w>', 'this</w>', 'is</w>', 'fal', 's', 'é</w>', '.</w>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '是</w>', 'H', 'i</w>', 'Hel', 'lo</w>', 'H', 'i</w>', 'Hel', 'lo</w>', 'Hel', 'lo</w>', '<s>', 'hi</w>', '<s>', 'ther', 'e</w>', 'The</w>', 'fol', 'low', 'ing</w>', 'str', 'ing</w>', 'sho', 'uld</w>', 'be</w>', 'pro', 'per', 'ly</w>', 'en', 'c', 'ode', 'd</w>', ':</w>', 'Hel', 'lo</w>', '.</w>', 'Bu', 't</w>', 'ir', 'd</w>', 'and</w>', '<unk>', 'ี</w>', 'ir', 'd</w>', 'ด</w>', 'He', 'y</w>', 'ho', 'w</w>', 'are</w>', 'you</w>', 'do', 'ing</w>'] # fmt: skip
|
||||
integration_expected_token_ids = [56, 22855, 6869, 1011, 14825, 3, 1056, 9873, 2822, 1016, 2651, 29, 3450, 1947, 7158, 48846, 6869, 7355, 87, 1093, 1899, 3, 3, 3, 3, 3, 1776, 44, 1009, 12156, 6170, 44, 1009, 12156, 6170, 12156, 6170, 0, 21566, 0, 40445, 1015, 7117, 9929, 13194, 5129, 15948, 5129, 14924, 48273, 11072, 2088, 3040, 8172, 2058, 71, 3909, 1038, 1335, 12156, 6170, 1899, 3025, 1026, 17435, 1038, 7158, 3, 1085, 17435, 1038, 1579, 4596, 1005, 3145, 1019, 25720, 20254, 2065, 5129] # fmt: skip
|
||||
expected_tokens_from_ids = ['T', 'his</w>', 'is</w>', 'a</w>', 'test</w>', '<unk>', 'I</w>', 'was</w>', 'bor', 'n</w>', 'in</w>', '9', '2000</w>', ',</w>', 'and</w>', 'this</w>', 'is</w>', 'fal', 's', 'é</w>', '.</w>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '是</w>', 'H', 'i</w>', 'Hel', 'lo</w>', 'H', 'i</w>', 'Hel', 'lo</w>', 'Hel', 'lo</w>', '<s>', 'hi</w>', '<s>', 'ther', 'e</w>', 'The</w>', 'fol', 'low', 'ing</w>', 'str', 'ing</w>', 'sho', 'uld</w>', 'be</w>', 'pro', 'per', 'ly</w>', 'en', 'c', 'ode', 'd</w>', ':</w>', 'Hel', 'lo</w>', '.</w>', 'Bu', 't</w>', 'ir', 'd</w>', 'and</w>', '<unk>', 'ี</w>', 'ir', 'd</w>', 'ด</w>', 'He', 'y</w>', 'ho', 'w</w>', 'are</w>', 'you</w>', 'do', 'ing</w>'] # fmt: skip
|
||||
integration_expected_decoded_text = "This is a test <unk>I was born in 92000 , and this is falsé . <unk><unk><unk><unk><unk>是 Hi Hello Hi Hello Hello <s>hi <s>there The following string should be properly encoded : Hello . But ird and <unk>ี ird ด Hey how are you doing"
|
||||
Reference in New Issue
Block a user