Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
353 lines
19 KiB
Python
353 lines
19 KiB
Python
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
|
||
import json
|
||
import os
|
||
import unittest
|
||
from functools import cached_property
|
||
|
||
from transformers.models.layoutlmv3 import LayoutLMv3Processor, LayoutLMv3Tokenizer, LayoutLMv3TokenizerFast
|
||
from transformers.models.layoutlmv3.tokenization_layoutlmv3 import VOCAB_FILES_NAMES
|
||
from transformers.testing_utils import require_pytesseract, require_tokenizers, require_torch, slow
|
||
from transformers.utils import is_pytesseract_available
|
||
|
||
from ...test_processing_common import ProcessorTesterMixin
|
||
|
||
|
||
if is_pytesseract_available():
|
||
from transformers import LayoutLMv3ImageProcessor
|
||
|
||
|
||
@require_pytesseract
|
||
@require_tokenizers
|
||
class LayoutLMv3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||
processor_class = LayoutLMv3Processor
|
||
|
||
@classmethod
|
||
def _setup_image_processor(cls):
|
||
image_processor_class = cls._get_component_class_from_processor("image_processor")
|
||
return image_processor_class(
|
||
do_resize=True,
|
||
size=224,
|
||
apply_ocr=True,
|
||
)
|
||
|
||
@classmethod
|
||
def _setup_tokenizer(cls):
|
||
tokenizer_class = cls._get_component_class_from_processor("tokenizer", use_fast=False)
|
||
# Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
|
||
vocab = ["l", "o", "w", "e", "r", "s", "t", "i", "d", "n", "\u0120", "\u0120l", "\u0120n", "\u0120lo", "\u0120low", "er", "\u0120lowest", "\u0120newer", "\u0120wider", "<unk>"] # fmt: skip
|
||
vocab_tokens = dict(zip(vocab, range(len(vocab))))
|
||
merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
|
||
|
||
vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
|
||
merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
|
||
with open(vocab_file, "w", encoding="utf-8") as fp:
|
||
fp.write(json.dumps(vocab_tokens) + "\n")
|
||
with open(merges_file, "w", encoding="utf-8") as fp:
|
||
fp.write("\n".join(merges))
|
||
|
||
return tokenizer_class.from_pretrained(cls.tmpdirname, unk_token="<unk>")
|
||
|
||
@unittest.skip("LayoutLMv3 Image Processor doesn't return image tensors")
|
||
def test_image_processor_defaults(self):
|
||
pass
|
||
|
||
|
||
# different use cases tests
|
||
@require_torch
|
||
@require_pytesseract
|
||
class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
|
||
@cached_property
|
||
def get_images(self):
|
||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||
from datasets import load_dataset
|
||
|
||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||
|
||
@cached_property
|
||
def get_tokenizers(self):
|
||
slow_tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base", add_visual_labels=False)
|
||
fast_tokenizer = LayoutLMv3TokenizerFast.from_pretrained("microsoft/layoutlmv3-base", add_visual_labels=False)
|
||
return [slow_tokenizer, fast_tokenizer]
|
||
|
||
@slow
|
||
def test_processor_case_1(self):
|
||
# case 1: document image classification (training, inference) + token classification (inference), apply_ocr = True
|
||
|
||
image_processor = LayoutLMv3ImageProcessor()
|
||
tokenizers = self.get_tokenizers
|
||
images = self.get_images
|
||
|
||
for tokenizer in tokenizers:
|
||
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)
|
||
|
||
# not batched
|
||
input_image_proc = image_processor(images[0], return_tensors="pt")
|
||
input_processor = processor(images[0], return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify image
|
||
self.assertAlmostEqual(
|
||
input_image_proc["pixel_values"].sum(), input_processor["pixel_values"].sum(), delta=1e-2
|
||
)
|
||
|
||
# verify input_ids
|
||
# this was obtained with Tesseract 4.1.1
|
||
expected_decoding = "<s> 11:14 to 11:39 a.m 11:39 to 11:44 a.m. 11:44 a.m. to 12:25 p.m. 12:25 to 12:58 p.m. 12:58 to 4:00 p.m. 2:00 to 5:00 p.m. Coffee Break Coffee will be served for men and women in the lobby adjacent to exhibit area. Please move into exhibit area. (Exhibits Open) TRRF GENERAL SESSION (PART |) Presiding: Lee A. Waller TRRF Vice President “Introductory Remarks” Lee A. Waller, TRRF Vice Presi- dent Individual Interviews with TRRF Public Board Members and Sci- entific Advisory Council Mem- bers Conducted by TRRF Treasurer Philip G. Kuehn to get answers which the public refrigerated warehousing industry is looking for. Plus questions from the floor. Dr. Emil M. Mrak, University of Cal- ifornia, Chairman, TRRF Board; Sam R. Cecil, University of Georgia College of Agriculture; Dr. Stanley Charm, Tufts University School of Medicine; Dr. Robert H. Cotton, ITT Continental Baking Company; Dr. Owen Fennema, University of Wis- consin; Dr. Robert E. Hardenburg, USDA. Questions and Answers Exhibits Open Capt. Jack Stoney Room TRRF Scientific Advisory Council Meeting Ballroom Foyer</s>" # fmt: skip
|
||
decoding = processor.decode(input_processor.input_ids.squeeze().tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# batched
|
||
input_image_proc = image_processor(images, return_tensors="pt")
|
||
input_processor = processor(images, padding=True, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify images
|
||
self.assertAlmostEqual(
|
||
input_image_proc["pixel_values"].sum(), input_processor["pixel_values"].sum(), delta=1e-2
|
||
)
|
||
|
||
# verify input_ids
|
||
# this was obtained with Tesseract 4.1.1
|
||
expected_decoding = "<s> 7 ITC Limited REPORT AND ACCOUNTS 2013 ITC’s Brands: An Asset for the Nation The consumer needs and aspirations they fulfil, the benefit they generate for millions across ITC’s value chains, the future-ready capabilities that support them, and the value that they create for the country, have made ITC’s brands national assets, adding to India’s competitiveness. It is ITC’s aspiration to be the No 1 FMCG player in the country, driven by its new FMCG businesses. A recent Nielsen report has highlighted that ITC's new FMCG businesses are the fastest growing among the top consumer goods companies operating in India. ITC takes justifiable pride that, along with generating economic value, these celebrated Indian brands also drive the creation of larger societal capital through the virtuous cycle of sustainable and inclusive growth. DI WILLS * ; LOVE DELIGHTFULLY SOFT SKIN? aia Ans Source: https://www.industrydocuments.ucsf.edu/docs/snbx0223</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>" # fmt: skip
|
||
decoding = processor.decode(input_processor.input_ids[1].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
@slow
|
||
def test_processor_case_2(self):
|
||
# case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
|
||
|
||
image_processor = LayoutLMv3ImageProcessor(apply_ocr=False)
|
||
tokenizers = self.get_tokenizers
|
||
images = self.get_images
|
||
|
||
for tokenizer in tokenizers:
|
||
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)
|
||
|
||
# not batched
|
||
words = ["hello", "world"]
|
||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]
|
||
input_processor = processor(images[0], words, boxes=boxes, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["input_ids", "bbox", "attention_mask", "pixel_values"]
|
||
actual_keys = list(input_processor.keys())
|
||
for key in expected_keys:
|
||
self.assertIn(key, actual_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> hello world</s>"
|
||
decoding = processor.decode(input_processor.input_ids.squeeze().tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# batched
|
||
words = [["hello", "world"], ["my", "name", "is", "niels"]]
|
||
boxes = [[[1, 2, 3, 4], [5, 6, 7, 8]], [[3, 2, 5, 1], [6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3]]]
|
||
input_processor = processor(images, words, boxes=boxes, padding=True, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> hello world</s><pad><pad><pad>"
|
||
decoding = processor.decode(input_processor.input_ids[0].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# verify bbox
|
||
expected_bbox = [
|
||
[0, 0, 0, 0],
|
||
[3, 2, 5, 1],
|
||
[6, 7, 4, 2],
|
||
[3, 9, 2, 4],
|
||
[1, 1, 2, 3],
|
||
[1, 1, 2, 3],
|
||
[0, 0, 0, 0],
|
||
]
|
||
self.assertListEqual(input_processor.bbox[1].tolist(), expected_bbox)
|
||
|
||
@slow
|
||
def test_processor_case_3(self):
|
||
# case 3: token classification (training), apply_ocr=False
|
||
|
||
image_processor = LayoutLMv3ImageProcessor(apply_ocr=False)
|
||
tokenizers = self.get_tokenizers
|
||
images = self.get_images
|
||
|
||
for tokenizer in tokenizers:
|
||
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)
|
||
|
||
# not batched
|
||
words = ["weirdly", "world"]
|
||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]
|
||
word_labels = [1, 2]
|
||
input_processor = processor(images[0], words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> weirdly world</s>"
|
||
decoding = processor.decode(input_processor.input_ids.squeeze().tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# verify labels
|
||
expected_labels = [-100, 1, -100, 2, -100]
|
||
self.assertListEqual(input_processor.labels.squeeze().tolist(), expected_labels)
|
||
|
||
# batched
|
||
words = [["hello", "world"], ["my", "name", "is", "niels"]]
|
||
boxes = [[[1, 2, 3, 4], [5, 6, 7, 8]], [[3, 2, 5, 1], [6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3]]]
|
||
word_labels = [[1, 2], [6, 3, 10, 2]]
|
||
input_processor = processor(
|
||
images, words, boxes=boxes, word_labels=word_labels, padding=True, return_tensors="pt"
|
||
)
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> my name is niels</s>"
|
||
decoding = processor.decode(input_processor.input_ids[1].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# verify bbox
|
||
expected_bbox = [
|
||
[0, 0, 0, 0],
|
||
[3, 2, 5, 1],
|
||
[6, 7, 4, 2],
|
||
[3, 9, 2, 4],
|
||
[1, 1, 2, 3],
|
||
[1, 1, 2, 3],
|
||
[0, 0, 0, 0],
|
||
]
|
||
self.assertListEqual(input_processor.bbox[1].tolist(), expected_bbox)
|
||
|
||
# verify labels
|
||
expected_labels = [-100, 6, 3, 10, 2, -100, -100]
|
||
self.assertListEqual(input_processor.labels[1].tolist(), expected_labels)
|
||
|
||
@slow
|
||
def test_processor_case_4(self):
|
||
# case 4: visual question answering (inference), apply_ocr=True
|
||
|
||
image_processor = LayoutLMv3ImageProcessor()
|
||
tokenizers = self.get_tokenizers
|
||
images = self.get_images
|
||
|
||
for tokenizer in tokenizers:
|
||
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)
|
||
|
||
# not batched
|
||
question = "What's his name?"
|
||
input_processor = processor(images[0], question, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
# this was obtained with Tesseract 4.1.1
|
||
expected_decoding = "<s> What's his name?</s></s> 11:14 to 11:39 a.m 11:39 to 11:44 a.m. 11:44 a.m. to 12:25 p.m. 12:25 to 12:58 p.m. 12:58 to 4:00 p.m. 2:00 to 5:00 p.m. Coffee Break Coffee will be served for men and women in the lobby adjacent to exhibit area. Please move into exhibit area. (Exhibits Open) TRRF GENERAL SESSION (PART |) Presiding: Lee A. Waller TRRF Vice President “Introductory Remarks” Lee A. Waller, TRRF Vice Presi- dent Individual Interviews with TRRF Public Board Members and Sci- entific Advisory Council Mem- bers Conducted by TRRF Treasurer Philip G. Kuehn to get answers which the public refrigerated warehousing industry is looking for. Plus questions from the floor. Dr. Emil M. Mrak, University of Cal- ifornia, Chairman, TRRF Board; Sam R. Cecil, University of Georgia College of Agriculture; Dr. Stanley Charm, Tufts University School of Medicine; Dr. Robert H. Cotton, ITT Continental Baking Company; Dr. Owen Fennema, University of Wis- consin; Dr. Robert E. Hardenburg, USDA. Questions and Answers Exhibits Open Capt. Jack Stoney Room TRRF Scientific Advisory Council Meeting Ballroom Foyer</s>" # fmt: skip
|
||
decoding = processor.decode(input_processor.input_ids.squeeze().tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# batched
|
||
questions = ["How old is he?", "what's the time"]
|
||
input_processor = processor(
|
||
images, questions, padding="max_length", max_length=20, truncation=True, return_tensors="pt"
|
||
)
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
# this was obtained with Tesseract 4.1.1
|
||
expected_decoding = "<s> what's the time</s></s> 7 ITC Limited REPORT AND ACCOUNTS 2013 ITC</s>"
|
||
decoding = processor.decode(input_processor.input_ids[1].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# verify bbox
|
||
expected_bbox = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 45, 67, 80], [72, 56, 109, 67], [72, 56, 109, 67], [116, 56, 189, 67], [198, 59, 253, 66], [257, 59, 285, 66], [289, 59, 365, 66], [289, 59, 365, 66], [289, 59, 365, 66], [372, 59, 407, 66], [74, 136, 161, 158], [74, 136, 161, 158], [0, 0, 0, 0]] # fmt: skip
|
||
self.assertListEqual(input_processor.bbox[1].tolist(), expected_bbox)
|
||
|
||
@slow
|
||
def test_processor_case_5(self):
|
||
# case 5: visual question answering (inference), apply_ocr=False
|
||
|
||
image_processor = LayoutLMv3ImageProcessor(apply_ocr=False)
|
||
tokenizers = self.get_tokenizers
|
||
images = self.get_images
|
||
|
||
for tokenizer in tokenizers:
|
||
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)
|
||
|
||
# not batched
|
||
question = "What's his name?"
|
||
words = ["hello", "world"]
|
||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]
|
||
input_processor = processor(images[0], question, words, boxes, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> What's his name?</s></s> hello world</s>"
|
||
decoding = processor.decode(input_processor.input_ids.squeeze().tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# batched
|
||
questions = ["How old is he?", "what's the time"]
|
||
words = [["hello", "world"], ["my", "name", "is", "niels"]]
|
||
boxes = [[[1, 2, 3, 4], [5, 6, 7, 8]], [[3, 2, 5, 1], [6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3]]]
|
||
input_processor = processor(images, questions, words, boxes, padding=True, return_tensors="pt")
|
||
|
||
# verify keys
|
||
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
|
||
actual_keys = sorted(input_processor.keys())
|
||
self.assertListEqual(actual_keys, expected_keys)
|
||
|
||
# verify input_ids
|
||
expected_decoding = "<s> How old is he?</s></s> hello world</s><pad><pad>"
|
||
decoding = processor.decode(input_processor.input_ids[0].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
expected_decoding = "<s> what's the time</s></s> my name is niels</s>"
|
||
decoding = processor.decode(input_processor.input_ids[1].tolist())
|
||
self.assertSequenceEqual(decoding, expected_decoding)
|
||
|
||
# verify bbox
|
||
expected_bbox = [[6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3], [1, 1, 2, 3], [0, 0, 0, 0]]
|
||
self.assertListEqual(input_processor.bbox[1].tolist()[-5:], expected_bbox)
|