# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PPChart2Table model."""

import unittest

from transformers import AutoModelForImageTextToText, AutoProcessor
from transformers.testing_utils import cleanup, require_torch, require_vision, slow, torch_device

from ...test_processing_common import url_to_local_path


@slow
@require_vision
@require_torch
class PPChart2TableIntegrationTest(unittest.TestCase):
    def setUp(self):
        model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
        self.model = AutoModelForImageTextToText.from_pretrained(model_path).to(torch_device)
        self.processor = AutoProcessor.from_pretrained(model_path)
        self.conversation = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "url": url_to_local_path(
                            "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png"
                        ),
                    },
                ],
            },
        ]

    def tearDown(self):
        cleanup(torch_device, gc_collect=True)

    def test_small_model_integration_test_pp_chart2table(self):
        inputs = self.processor.apply_chat_template(
            self.conversation,
            tokenize=True,
            add_generation_prompt=True,
            truncation=True,
            return_dict=True,
            return_tensors="pt",
        ).to(self.model.device)

        generated_ids = self.model.generate(**inputs, do_sample=False, max_new_tokens=32)
        generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
        decoded_output = self.processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )

        expected_output = ["年份 | 单家五星级旅游饭店年平均营收 (百万元) | 单家五星级旅游饭店年平均利润 (百万元)\n"]
        self.assertEqual(decoded_output, expected_output)

    def test_small_model_integration_test_pp_chart2table_batched(self):
        inputs = self.processor.apply_chat_template(
            [self.conversation, self.conversation],
            tokenize=True,
            add_generation_prompt=True,
            truncation=True,
            return_dict=True,
            return_tensors="pt",
        ).to(self.model.device)

        generated_ids = self.model.generate(**inputs, do_sample=False, max_new_tokens=6)
        generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
        decoded_output = self.processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )

        expected_output = ["年份 | 单家", "年份 | 单家"]
        self.assertEqual(decoded_output, expected_output)