first commit

This commit is contained in:
陈赣
2026-06-03 12:42:47 +08:00
commit ec23799148
339 changed files with 57120 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
# Deployment

View File

@@ -0,0 +1,61 @@
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
"""
import torch
import torchvision.transforms as T
import numpy as np
import onnxruntime as ort
from PIL import Image, ImageDraw
def draw(images, labels, boxes, scores, thrh = 0.6):
for i, im in enumerate(images):
draw = ImageDraw.Draw(im)
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
for b in box:
draw.rectangle(list(b), outline='red',)
draw.text((b[0], b[1]), text=str(lab[i].item()), fill='blue', )
im.save(f'results_{i}.jpg')
def main(args, ):
"""main
"""
sess = ort.InferenceSession(args.onnx_file)
print(ort.get_device())
im_pil = Image.open(args.im_file).convert('RGB')
w, h = im_pil.size
orig_size = torch.tensor([w, h])[None]
transforms = T.Compose([
T.Resize((640, 640)),
T.ToTensor(),
])
im_data = transforms(im_pil)[None]
output = sess.run(
# output_names=['labels', 'boxes', 'scores'],
output_names=None,
input_feed={'images': im_data.data.numpy(), "orig_target_sizes": orig_size.data.numpy()}
)
labels, boxes, scores = output
draw([im_pil], labels, boxes, scores)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--onnx-file', type=str, )
parser.add_argument('--im-file', type=str, )
# parser.add_argument('-d', '--device', type=str, default='cpu')
args = parser.parse_args()
main(args)

View File

@@ -0,0 +1,5 @@
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
"""
# please reference: https://github.com/guojin-yan/RT-DETR-OpenVINO

View File

@@ -0,0 +1,258 @@
# Copyright 2023 lyuwenyu. All Rights Reserved.
# Copyright (c) 2025 Hitbee-dev. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
# NOTICE: This file has been heavily modified by [Hitbee-dev] from the original source.
# Modifications include restructuring for broader GPU architecture compatibility
# (including NVIDIA Blackwell), improved modularity, and enhanced testability.
# ==============================================================================
import time
import numpy as np
import torch
import tensorrt as trt
from collections import OrderedDict
from PIL import Image, ImageDraw, ImageFont
class TRTInference(object):
"""
A high-level wrapper for TensorRT inference, designed for ease of use and flexibility.
This class handles engine loading, context creation, and dynamic buffer allocation.
"""
def __init__(self, engine_path, device='cuda:0', verbose=False):
"""
Initializes the TRTInference instance.
Args:
engine_path (str): Path to the serialized TensorRT engine file.
device (str): The device to run inference on (e.g., 'cuda:0').
verbose (bool): If True, enables verbose logging from the TensorRT logger.
"""
self.engine_path = engine_path
self.device = torch.device(device)
self.logger = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(self.logger, '')
self.runtime = trt.Runtime(self.logger)
self.engine = self._load_engine(engine_path)
self.context = self.engine.create_execution_context()
self.input_names, self.output_names = self._get_io_names()
self.buffers_allocated = False
self.gpu_buffers = OrderedDict()
self.binding_addrs = OrderedDict()
print(f"[TRTInference] Initialized successfully. Engine: '{engine_path}'.")
def _load_engine(self, path):
"""Loads a TensorRT engine from a file."""
with open(path, 'rb') as f:
engine = self.runtime.deserialize_cuda_engine(f.read())
if engine is None:
raise RuntimeError(f"Failed to load TensorRT engine from '{path}'.")
return engine
def _get_io_names(self):
"""Parses input and output tensor names from the engine."""
input_names, output_names = [], []
for i in range(self.engine.num_io_tensors):
name = self.engine.get_tensor_name(i)
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
input_names.append(name)
else:
output_names.append(name)
return input_names, output_names
def _allocate_buffers(self, blob: dict):
"""
Allocates GPU buffers for inputs and outputs based on the first inference request.
This "lazy allocation" strategy handles dynamic input shapes gracefully.
"""
print("[TRTInference] First inference call detected. Allocating GPU buffers...")
for name in self.input_names:
tensor = blob[name]
shape = tuple(tensor.shape)
dtype = tensor.dtype
self.context.set_input_shape(name, shape)
self.gpu_buffers[name] = torch.empty(shape, dtype=dtype, device=self.device)
self.binding_addrs[name] = self.gpu_buffers[name].data_ptr()
print(f" - Input '{name}': allocated buffer with shape {shape}.")
for name in self.output_names:
shape = tuple(self.context.get_tensor_shape(name))
dtype = trt.nptype(self.engine.get_tensor_dtype(name))
torch_dtype = torch.from_numpy(np.array(0, dtype=dtype)).dtype
self.gpu_buffers[name] = torch.empty(shape, dtype=torch_dtype, device=self.device)
self.binding_addrs[name] = self.gpu_buffers[name].data_ptr()
print(f" - Output '{name}': allocated buffer with shape {shape}.")
self.buffers_allocated = True
print("[TRTInference] GPU buffers allocated successfully.")
def __call__(self, blob: dict):
"""
Executes inference on the loaded TensorRT engine.
Args:
blob (dict): A dictionary mapping input tensor names to their corresponding
torch.Tensor data on the GPU.
Returns:
dict: A dictionary mapping output tensor names to their corresponding
torch.Tensor results on the GPU.
"""
if not self.buffers_allocated:
self._allocate_buffers(blob)
for name in self.input_names:
self.gpu_buffers[name].copy_(blob[name])
self.context.execute_v2(bindings=list(self.binding_addrs.values()))
return {name: self.gpu_buffers[name] for name in self.output_names}
# --- Visualization Utility Function ---
COCO_CLASSES = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def visualize_detections(image_pil, boxes, scores, labels, class_names=COCO_CLASSES, threshold=0.5):
"""
Draws bounding boxes on a PIL image. This function is a general-purpose utility.
Args:
image_pil (PIL.Image.Image): The image to draw on.
boxes (torch.Tensor): A tensor of bounding boxes (shape: [N, 4]).
scores (torch.Tensor): A tensor of confidence scores (shape: [N]).
labels (torch.Tensor): A tensor of class labels (shape: [N]).
class_names (list): A list of strings corresponding to class labels.
threshold (float): The confidence threshold for displaying detections.
Returns:
PIL.Image.Image: The image with detections drawn on it.
"""
img_draw = image_pil.copy()
draw = ImageDraw.Draw(img_draw)
# Ensure tensors are on CPU and converted to NumPy for processing
boxes = boxes.cpu().numpy()
scores = scores.cpu().numpy()
labels = labels.cpu().numpy()
count = 0
for i in range(len(scores)):
score = scores[i]
if score < threshold:
continue
count += 1
box = boxes[i]
label_idx = int(labels[i])
xmin, ymin, xmax, ymax = box
class_name = class_names[label_idx] if label_idx < len(class_names) else f'CLS-{label_idx}'
color = 'red' # Keep it simple or use a color map
draw.rectangle(((xmin, ymin), (xmax, ymax)), outline=color, width=3)
text = f"{class_name}: {score:.2f}"
try:
font = ImageFont.truetype("arial.ttf", 20)
except IOError:
font = ImageFont.load_default()
text_bbox = draw.textbbox((xmin, ymin), text, font=font)
draw.rectangle(text_bbox, fill=color)
draw.text((xmin, ymin), text, fill="white", font=font)
print(f" - Found {count} objects above threshold {threshold}.")
return img_draw
if __name__ == '__main__':
import argparse
import torchvision.transforms as T
import os
parser = argparse.ArgumentParser(description="Test script for the TRTInference wrapper.")
parser.add_argument('--engine', type=str, required=True, help="Path to the TensorRT engine file.")
parser.add_argument('--image', type=str, required=True, help="Path to the input image file.")
parser.add_argument('--output', type=str, default='output.jpg', help="Path to save the output image with detections.")
parser.add_argument('--device', type=str, default='cuda:0', help="Device to run inference on.")
parser.add_argument('--threshold', type=float, default=0.5, help="Confidence threshold for displaying detections.")
args = parser.parse_args()
if not torch.cuda.is_available():
raise SystemExit("CUDA is not available. This script requires a GPU.")
print("--- TRTInference Wrapper Test ---")
print("\n1. Initializing TRTInference...")
trt_model = TRTInference(args.engine, device=args.device)
print("\n2. Preprocessing input image...")
image_pil = Image.open(args.image).convert('RGB')
w, h = image_pil.size
transforms = T.Compose([
T.Resize((640, 640)),
T.ToTensor(),
])
image_tensor = transforms(image_pil).unsqueeze(0).to(args.device)
orig_size_tensor = torch.tensor([[w, h]], dtype=torch.int64, device=args.device)
blob = {
'images': image_tensor,
'orig_target_sizes': orig_size_tensor
}
print(f" - Original image size: {w}x{h}")
print(f" - Input tensor shape: {image_tensor.shape}")
print("\n3. Running inference...")
start_time = time.time()
output_gpu = trt_model(blob)
torch.cuda.synchronize()
end_time = time.time()
print(f"\n4. Inference complete in { (end_time - start_time) * 1000:.2f} ms.")
print("\n5. Post-processing and saving output image...")
output_labels = output_gpu['labels'][0]
output_boxes = output_gpu['boxes'][0]
output_scores = output_gpu['scores'][0]
# Use the new, separate visualization function
result_image = visualize_detections(
image_pil,
output_boxes,
output_scores,
output_labels,
threshold=args.threshold
)
result_image.save(args.output)
print(f" - Output image with detections saved to: {os.path.abspath(args.output)}")
print("\n--- Test finished successfully ---")

View File

@@ -0,0 +1,84 @@
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
"""
import torch
import torch.nn as nn
import torchvision.transforms as T
import numpy as np
from PIL import Image, ImageDraw
from src.core import YAMLConfig
def draw(images, labels, boxes, scores, thrh = 0.6):
for i, im in enumerate(images):
draw = ImageDraw.Draw(im)
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
scrs = scores[i][scr > thrh]
for j,b in enumerate(box):
draw.rectangle(list(b), outline='red',)
draw.text((b[0], b[1]), text=f"{lab[j].item()} {round(scrs[j].item(),2)}", fill='blue', )
im.save(f'results_{i}.jpg')
def main(args, ):
"""main
"""
cfg = YAMLConfig(args.config, resume=args.resume)
if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu')
if 'ema' in checkpoint:
state = checkpoint['ema']['module']
else:
state = checkpoint['model']
else:
raise AttributeError('Only support resume to load model.state_dict by now.')
# NOTE load train mode state -> convert to deploy mode
cfg.model.load_state_dict(state)
class Model(nn.Module):
def __init__(self, ) -> None:
super().__init__()
self.model = cfg.model.deploy()
self.postprocessor = cfg.postprocessor.deploy()
def forward(self, images, orig_target_sizes):
outputs = self.model(images)
outputs = self.postprocessor(outputs, orig_target_sizes)
return outputs
model = Model().to(args.device)
im_pil = Image.open(args.im_file).convert('RGB')
w, h = im_pil.size
orig_size = torch.tensor([w, h])[None].to(args.device)
transforms = T.Compose([
T.Resize((640, 640)),
T.ToTensor(),
])
im_data = transforms(im_pil)[None].to(args.device)
output = model(im_data, orig_size)
labels, boxes, scores = output
draw([im_pil], labels, boxes, scores)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, )
parser.add_argument('-r', '--resume', type=str, )
parser.add_argument('-f', '--im-file', type=str, )
parser.add_argument('-d', '--device', type=str, default='cpu')
args = parser.parse_args()
main(args)