Files
RT-DETR/benchmark/trtinfer.py
2026-06-03 12:42:47 +08:00

154 lines
5.1 KiB
Python

'''by lyuwenyu
'''
import time
import contextlib
from collections import namedtuple, OrderedDict
import torch
import numpy as np
import tensorrt as trt
from utils import TimeProfiler
class TRTInference(object):
def __init__(self, engine_path, device='cuda:0', backend='torch', max_batch_size=32, verbose=False):
self.engine_path = engine_path
self.device = device
self.backend = backend
self.max_batch_size = max_batch_size
self.logger = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.INFO)
self.engine = self.load_engine(engine_path)
self.context = self.engine.create_execution_context()
self.bindings = self.get_bindings(self.engine, self.context, self.max_batch_size, self.device)
self.bindings_addr = OrderedDict((n, v.ptr) for n, v in self.bindings.items())
self.input_names = self.get_input_names()
self.output_names = self.get_output_names()
if self.backend == 'cuda':
self.stream = cuda.Stream()
self.time_profile = TimeProfiler()
def init(self, ):
self.dynamic = False
def load_engine(self, path):
'''load engine
'''
trt.init_libnvinfer_plugins(self.logger, '')
with open(path, 'rb') as f, trt.Runtime(self.logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def get_input_names(self, ):
names = []
for _, name in enumerate(self.engine):
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
names.append(name)
return names
def get_output_names(self, ):
names = []
for _, name in enumerate(self.engine):
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT:
names.append(name)
return names
def get_bindings(self, engine, context, max_batch_size=32, device=None):
'''build binddings
'''
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
bindings = OrderedDict()
# max_batch_size = 1
for i, name in enumerate(engine):
shape = engine.get_tensor_shape(name)
dtype = trt.nptype(engine.get_tensor_dtype(name))
if shape[0] == -1:
dynamic = True
shape[0] = max_batch_size
if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: # dynamic
context.set_input_shape(name, shape)
if self.backend == 'cuda':
if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
data = np.random.randn(*shape).astype(dtype)
ptr = cuda.mem_alloc(data.nbytes)
bindings[name] = Binding(name, dtype, shape, data, ptr)
else:
data = cuda.pagelocked_empty(trt.volume(shape), dtype)
ptr = cuda.mem_alloc(data.nbytes)
bindings[name] = Binding(name, dtype, shape, data, ptr)
else:
data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
bindings[name] = Binding(name, dtype, shape, data, data.data_ptr())
return bindings
def run_torch(self, blob):
'''torch input
'''
for n in self.input_names:
if self.bindings[n].shape != blob[n].shape:
self.context.set_input_shape(n, blob[n].shape)
self.bindings[n] = self.bindings[n]._replace(shape=blob[n].shape)
self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names})
self.context.execute_v2(list(self.bindings_addr.values()))
outputs = {n: self.bindings[n].data for n in self.output_names}
return outputs
def async_run_cuda(self, blob):
'''numpy input
'''
for n in self.input_names:
cuda.memcpy_htod_async(self.bindings_addr[n], blob[n], self.stream)
bindings_addr = [int(v) for _, v in self.bindings_addr.items()]
self.context.execute_async_v2(bindings=bindings_addr, stream_handle=self.stream.handle)
outputs = {}
for n in self.output_names:
cuda.memcpy_dtoh_async(self.bindings[n].data, self.bindings[n].ptr, self.stream)
outputs[n] = self.bindings[n].data
self.stream.synchronize()
return outputs
def __call__(self, blob):
if self.backend == 'torch':
return self.run_torch(blob)
elif self.backend == 'cuda':
return self.async_run_cuda(blob)
def synchronize(self, ):
if self.backend == 'torch' and torch.cuda.is_available():
torch.cuda.synchronize()
elif self.backend == 'cuda':
self.stream.synchronize()
def warmup(self, blob, n):
for _ in range(n):
_ = self(blob)
def speed(self, blob, n):
self.time_profile.reset()
for _ in range(n):
with self.time_profile:
_ = self(blob)
return self.time_profile.total / n