first commit

2026-06-03 12:42:47 +08:00
commit ec23799148
339 changed files with 57120 additions and 0 deletions
--- a/rtdetr_pytorch/src/nn/init.py
+++ b/rtdetr_pytorch/src/nn/init.py
@@ -0,0 +1,7 @@
+
+from .arch import *
+from .criterion import *
+
+# 
+from .backbone import *
+
--- a/rtdetr_pytorch/src/nn/arch/init.py
+++ b/rtdetr_pytorch/src/nn/arch/init.py
@@ -0,0 +1 @@
+from .classification import *
--- a/rtdetr_pytorch/src/nn/arch/classification.py
+++ b/rtdetr_pytorch/src/nn/arch/classification.py
@@ -0,0 +1,41 @@
+import torch 
+import torch.nn as nn 
+
+from src.core import register
+
+
+__all__ = ['Classification', 'ClassHead']
+
+
+@register
+class Classification(nn.Module):
+    __inject__ = ['backbone', 'head']
+
+    def __init__(self, backbone: nn.Module, head: nn.Module=None):
+        super().__init__()
+        
+        self.backbone = backbone
+        self.head = head
+
+    def forward(self, x):
+        x = self.backbone(x)
+
+        if self.head is not None:
+            x = self.head(x)
+
+        return x 
+
+
+@register
+class ClassHead(nn.Module):
+    def __init__(self, hidden_dim, num_classes):
+        super().__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.proj = nn.Linear(hidden_dim, num_classes)  
+
+    def forward(self, x):
+        x = x[0] if isinstance(x, (list, tuple)) else x 
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.proj(x)
+        return x 
--- a/rtdetr_pytorch/src/nn/backbone/init.py
+++ b/rtdetr_pytorch/src/nn/backbone/init.py
@@ -0,0 +1,6 @@
+
+from .presnet import *
+from .test_resnet import *
+from .regnet import *
+from .common import *
+from .dla import *
--- a/rtdetr_pytorch/src/nn/backbone/common.py
+++ b/rtdetr_pytorch/src/nn/backbone/common.py
@@ -0,0 +1,102 @@
+'''by lyuwenyu
+'''
+
+import torch 
+import torch.nn as nn
+
+
+
+class ConvNormLayer(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            ch_in, 
+            ch_out, 
+            kernel_size, 
+            stride, 
+            padding=(kernel_size-1)//2 if padding is None else padding, 
+            bias=bias)
+        self.norm = nn.BatchNorm2d(ch_out)
+        self.act = nn.Identity() if act is None else get_activation(act) 
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+
+class FrozenBatchNorm2d(nn.Module):
+    """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+    Copy-paste from torchvision.misc.ops with added eps before rqsrt,
+    without which any other models than torchvision.models.resnet[18,34,50,101]
+    produce nans.
+    """
+    def __init__(self, num_features, eps=1e-5):
+        super(FrozenBatchNorm2d, self).__init__()
+        n = num_features
+        self.register_buffer("weight", torch.ones(n))
+        self.register_buffer("bias", torch.zeros(n))
+        self.register_buffer("running_mean", torch.zeros(n))
+        self.register_buffer("running_var", torch.ones(n))
+        self.eps = eps
+        self.num_features = n 
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        num_batches_tracked_key = prefix + 'num_batches_tracked'
+        if num_batches_tracked_key in state_dict:
+            del state_dict[num_batches_tracked_key]
+
+        super(FrozenBatchNorm2d, self)._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict,
+            missing_keys, unexpected_keys, error_msgs)
+
+    def forward(self, x):
+        # move reshapes to the beginning
+        # to make it fuser-friendly
+        w = self.weight.reshape(1, -1, 1, 1)
+        b = self.bias.reshape(1, -1, 1, 1)
+        rv = self.running_var.reshape(1, -1, 1, 1)
+        rm = self.running_mean.reshape(1, -1, 1, 1)
+        scale = w * (rv + self.eps).rsqrt()
+        bias = b - rm * scale
+        return x * scale + bias
+
+    def extra_repr(self):
+        return (
+            "{num_features}, eps={eps}".format(**self.__dict__)
+        )
+
+
+def get_activation(act: str, inpace: bool=True):
+    '''get activation
+    '''
+    act = act.lower()
+    
+    if act == 'silu':
+        m = nn.SiLU()
+
+    elif act == 'relu':
+        m = nn.ReLU()
+
+    elif act == 'leaky_relu':
+        m = nn.LeakyReLU()
+
+    elif act == 'silu':
+        m = nn.SiLU()
+    
+    elif act == 'gelu':
+        m = nn.GELU()
+        
+    elif act is None:
+        m = nn.Identity()
+    
+    elif isinstance(act, nn.Module):
+        m = act
+
+    else:
+        raise RuntimeError('')  
+
+    if hasattr(m, 'inplace'):
+        m.inplace = inpace
+    
+    return m 
--- a/rtdetr_pytorch/src/nn/backbone/dla.py
+++ b/rtdetr_pytorch/src/nn/backbone/dla.py
@@ -0,0 +1,452 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import logging
+from os.path import join
+
+import torch
+from torch import nn
+import torch.utils.model_zoo as model_zoo
+# from mmdet.models.builder import BACKBONES
+from src.core import register
+
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+
+def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
+    return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, inplanes, planes, stride=1, dilation=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias=False,
+            dilation=dilation,
+        )
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=1,
+            padding=dilation,
+            bias=False,
+            dilation=dilation,
+        )
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.stride = stride
+
+    def forward(self, x, residual=None):
+        if residual is None:
+            residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 2
+
+    def __init__(self, inplanes, planes, stride=1, dilation=1):
+        super(Bottleneck, self).__init__()
+        expansion = Bottleneck.expansion
+        bottle_planes = planes // expansion
+        self.conv1 = nn.Conv2d(inplanes, bottle_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(
+            bottle_planes,
+            bottle_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias=False,
+            dilation=dilation,
+        )
+        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(bottle_planes, planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.stride = stride
+
+    def forward(self, x, residual=None):
+        if residual is None:
+            residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class BottleneckX(nn.Module):
+    expansion = 2
+    cardinality = 32
+
+    def __init__(self, inplanes, planes, stride=1, dilation=1):
+        super(BottleneckX, self).__init__()
+        cardinality = BottleneckX.cardinality
+        # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
+        # bottle_planes = dim * cardinality
+        bottle_planes = planes * cardinality // 32
+        self.conv1 = nn.Conv2d(inplanes, bottle_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(
+            bottle_planes,
+            bottle_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=dilation,
+            bias=False,
+            dilation=dilation,
+            groups=cardinality,
+        )
+        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(bottle_planes, planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.stride = stride
+
+    def forward(self, x, residual=None):
+        if residual is None:
+            residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Root(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, residual):
+        super(Root, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=1,
+            bias=False,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.residual = residual
+
+    def forward(self, *x):
+        children = x
+        x = self.conv(torch.cat(x, 1))
+        x = self.bn(x)
+        if self.residual:
+            x += children[0]
+        x = self.relu(x)
+
+        return x
+
+
+class Tree(nn.Module):
+    def __init__(
+        self,
+        levels,
+        block,
+        in_channels,
+        out_channels,
+        stride=1,
+        level_root=False,
+        root_dim=0,
+        root_kernel_size=1,
+        dilation=1,
+        root_residual=False,
+    ):
+        super(Tree, self).__init__()
+        if root_dim == 0:
+            root_dim = 2 * out_channels
+        if level_root:
+            root_dim += in_channels
+        if levels == 1:
+            self.tree1 = block(in_channels, out_channels, stride, dilation=dilation)
+            self.tree2 = block(out_channels, out_channels, 1, dilation=dilation)
+        else:
+            self.tree1 = Tree(
+                levels - 1,
+                block,
+                in_channels,
+                out_channels,
+                stride,
+                root_dim=0,
+                root_kernel_size=root_kernel_size,
+                dilation=dilation,
+                root_residual=root_residual,
+            )
+            self.tree2 = Tree(
+                levels - 1,
+                block,
+                out_channels,
+                out_channels,
+                root_dim=root_dim + out_channels,
+                root_kernel_size=root_kernel_size,
+                dilation=dilation,
+                root_residual=root_residual,
+            )
+        if levels == 1:
+            self.root = Root(root_dim, out_channels, root_kernel_size, root_residual)
+        self.level_root = level_root
+        self.root_dim = root_dim
+        self.downsample = None
+        self.project = None
+        self.levels = levels
+        if stride > 1:
+            self.downsample = nn.MaxPool2d(stride, stride=stride)
+        if levels == 1 and in_channels != out_channels:
+            self.project = nn.Sequential(
+                nn.Conv2d(
+                    in_channels, out_channels, kernel_size=1, stride=1, bias=False
+                ),
+                nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM),
+            )
+
+    def forward(self, x, residual=None, children=None):
+        children = [] if children is None else children
+        bottom = self.downsample(x) if self.downsample else x
+        residual = self.project(bottom) if self.project else bottom
+        if self.level_root:
+            children.append(bottom)
+        x1 = self.tree1(x, residual)
+        if self.levels == 1:
+            x2 = self.tree2(x1)
+            x = self.root(x2, x1, *children)
+        else:
+            children.append(x1)
+            x = self.tree2(x1, children=children)
+        return x
+
+
+class DLA(nn.Module):
+    def __init__(
+        self,
+        levels,
+        channels,
+        num_classes=1000,
+        block=BasicBlock,
+        out_indices=(2, 3, 4, 5),
+        residual_root=False,
+        linear_root=False,
+    ):
+        super(DLA, self).__init__()
+        self.channels = channels
+        self.num_classes = num_classes
+        self.out_indices = out_indices
+        self.base_layer = nn.Sequential(
+            nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3, bias=False),
+            nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+        )
+        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
+        self.level1 = self._make_conv_level(
+            channels[0], channels[1], levels[1], stride=2
+        )
+        self.level2 = Tree(
+            levels[2],
+            block,
+            channels[1],
+            channels[2],
+            2,
+            level_root=False,
+            root_residual=residual_root,
+        )
+        self.level3 = Tree(
+            levels[3],
+            block,
+            channels[2],
+            channels[3],
+            2,
+            level_root=True,
+            root_residual=residual_root,
+        )
+        self.level4 = Tree(
+            levels[4],
+            block,
+            channels[3],
+            channels[4],
+            2,
+            level_root=True,
+            root_residual=residual_root,
+        )
+        self.level5 = Tree(
+            levels[5],
+            block,
+            channels[4],
+            channels[5],
+            2,
+            level_root=True,
+            root_residual=residual_root,
+        )
+
+        # for m in self.modules():
+        #     if isinstance(m, nn.Conv2d):
+        #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        #         m.weight.data.normal_(0, math.sqrt(2. / n))
+        #     elif isinstance(m, nn.BatchNorm2d):
+        #         m.weight.data.fill_(1)
+        #         m.bias.data.zero_()
+
+    def _make_level(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes:
+            downsample = nn.Sequential(
+                nn.MaxPool2d(stride, stride=stride),
+                nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False),
+                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample=downsample))
+        for i in range(1, blocks):
+            layers.append(block(inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
+        modules = []
+        for i in range(convs):
+            modules.extend(
+                [
+                    nn.Conv2d(
+                        inplanes,
+                        planes,
+                        kernel_size=3,
+                        stride=stride if i == 0 else 1,
+                        padding=dilation,
+                        bias=False,
+                        dilation=dilation,
+                    ),
+                    nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
+                    nn.ReLU(inplace=True),
+                ]
+            )
+            inplanes = planes
+        return nn.Sequential(*modules)
+
+    def forward(self, x):
+        y = []
+        x = self.base_layer(x)
+        for i in range(6):
+            x = getattr(self, 'level{}'.format(i))(x)
+            if i in self.out_indices:
+                y.append(x)
+        return y
+
+    def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
+        # fc = self.fc
+        if name.endswith('.pth'):
+            model_weights = torch.load(data + name)
+        else:
+            model_url = get_model_url(data, name, hash)
+            model_weights = model_zoo.load_url(model_url)
+        self.load_state_dict(model_weights, strict=False)
+        # self.fc = fc
+
+
+def dla34(pretrained=True, levels=None, in_channels=None, **kwargs):  # DLA-34
+    model = DLA(levels=levels, channels=in_channels, block=BasicBlock, **kwargs)
+    if pretrained:
+        model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
+    return model
+
+@register
+class DLANet(nn.Module):
+    def __init__(
+        self,
+        dla='dla34',
+        pretrained=True,
+        levels=[1, 1, 1, 2, 2, 1],
+        in_channels=[16, 32, 64, 128, 256, 512],
+        return_index = [1, 2, 3],
+        cfg=None,
+    ):
+        super(DLANet, self).__init__()
+        self.cfg = cfg
+        self.in_channels = in_channels
+
+        self.model = eval(dla)(
+            pretrained=pretrained, levels=levels, in_channels=in_channels
+        )
+        self.return_index = return_index
+    def forward(self, x):
+        x = self.model(x)
+        max_list = max(self.return_index)
+        min_list = min(self.return_index)
+        return x[min_list:max_list+1]
+
+
+class Identity(nn.Module):
+    def __init__(self):
+        super(Identity, self).__init__()
+
+    def forward(self, x):
+        return x
+
+
+def fill_fc_weights(layers):
+    for m in layers.modules():
+        if isinstance(m, nn.Conv2d):
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+
+def fill_up_weights(up):
+    w = up.weight.data
+    f = math.ceil(w.size(2) / 2)
+    c = (2 * f - 1 - f % 2) / (2.0 * f)
+    for i in range(w.size(2)):
+        for j in range(w.size(3)):
+            w[0, 0, i, j] = (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
+    for c in range(1, w.size(0)):
+        w[c, 0, :, :] = w[0, 0, :, :]
--- a/rtdetr_pytorch/src/nn/backbone/presnet.py
+++ b/rtdetr_pytorch/src/nn/backbone/presnet.py
@@ -0,0 +1,225 @@
+'''by lyuwenyu
+'''
+import torch
+import torch.nn as nn 
+import torch.nn.functional as F 
+
+from collections import OrderedDict
+
+from .common import get_activation, ConvNormLayer, FrozenBatchNorm2d
+
+from src.core import register
+
+
+__all__ = ['PResNet']
+
+
+ResNet_cfg = {
+    18: [2, 2, 2, 2],
+    34: [3, 4, 6, 3],
+    50: [3, 4, 6, 3],
+    101: [3, 4, 23, 3],
+    # 152: [3, 8, 36, 3],
+}
+
+
+donwload_url = {
+    18: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet18_vd_pretrained_from_paddle.pth',
+    34: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet34_vd_pretrained_from_paddle.pth',
+    50: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet50_vd_ssld_v2_pretrained_from_paddle.pth',
+    101: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet101_vd_ssld_pretrained_from_paddle.pth',
+}
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, ch_in, ch_out, stride, shortcut, act='relu', variant='b'):
+        super().__init__()
+
+        self.shortcut = shortcut
+
+        if not shortcut:
+            if variant == 'd' and stride == 2:
+                self.short = nn.Sequential(OrderedDict([
+                    ('pool', nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
+                    ('conv', ConvNormLayer(ch_in, ch_out, 1, 1))
+                ]))
+            else:
+                self.short = ConvNormLayer(ch_in, ch_out, 1, stride)
+
+        self.branch2a = ConvNormLayer(ch_in, ch_out, 3, stride, act=act)
+        self.branch2b = ConvNormLayer(ch_out, ch_out, 3, 1, act=None)
+        self.act = nn.Identity() if act is None else get_activation(act) 
+
+
+    def forward(self, x):
+        out = self.branch2a(x)
+        out = self.branch2b(out)
+        if self.shortcut:
+            short = x
+        else:
+            short = self.short(x)
+        
+        out = out + short
+        out = self.act(out)
+
+        return out
+
+
+class BottleNeck(nn.Module):
+    expansion = 4
+
+    def __init__(self, ch_in, ch_out, stride, shortcut, act='relu', variant='b'):
+        super().__init__()
+
+        if variant == 'a':
+            stride1, stride2 = stride, 1
+        else:
+            stride1, stride2 = 1, stride
+
+        width = ch_out 
+
+        self.branch2a = ConvNormLayer(ch_in, width, 1, stride1, act=act)
+        self.branch2b = ConvNormLayer(width, width, 3, stride2, act=act)
+        self.branch2c = ConvNormLayer(width, ch_out * self.expansion, 1, 1)
+
+        self.shortcut = shortcut
+        if not shortcut:
+            if variant == 'd' and stride == 2:
+                self.short = nn.Sequential(OrderedDict([
+                    ('pool', nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
+                    ('conv', ConvNormLayer(ch_in, ch_out * self.expansion, 1, 1))
+                ]))
+            else:
+                self.short = ConvNormLayer(ch_in, ch_out * self.expansion, 1, stride)
+
+        self.act = nn.Identity() if act is None else get_activation(act) 
+
+    def forward(self, x):
+        out = self.branch2a(x)
+        out = self.branch2b(out)
+        out = self.branch2c(out)
+
+        if self.shortcut:
+            short = x
+        else:
+            short = self.short(x)
+
+        out = out + short
+        out = self.act(out)
+
+        return out
+
+
+class Blocks(nn.Module):
+    def __init__(self, block, ch_in, ch_out, count, stage_num, act='relu', variant='b'):
+        super().__init__()
+
+        self.blocks = nn.ModuleList()
+        for i in range(count):
+            self.blocks.append(
+                block(
+                    ch_in, 
+                    ch_out,
+                    stride=2 if i == 0 and stage_num != 2 else 1, 
+                    shortcut=False if i == 0 else True,
+                    variant=variant,
+                    act=act)
+            )
+
+            if i == 0:
+                ch_in = ch_out * block.expansion
+
+    def forward(self, x):
+        out = x
+        for block in self.blocks:
+            out = block(out)
+        return out
+
+
+@register
+class PResNet(nn.Module):
+    def __init__(
+        self, 
+        depth, 
+        variant='d', 
+        num_stages=4, 
+        return_idx=[0, 1, 2, 3], 
+        act='relu',
+        freeze_at=-1, 
+        freeze_norm=True, 
+        pretrained=False):
+        super().__init__()
+
+        block_nums = ResNet_cfg[depth]
+        ch_in = 64
+        if variant in ['c', 'd']:
+            conv_def = [
+                [3, ch_in // 2, 3, 2, "conv1_1"],
+                [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
+                [ch_in // 2, ch_in, 3, 1, "conv1_3"],
+            ]
+        else:
+            conv_def = [[3, ch_in, 7, 2, "conv1_1"]]
+
+        self.conv1 = nn.Sequential(OrderedDict([
+            (_name, ConvNormLayer(c_in, c_out, k, s, act=act)) for c_in, c_out, k, s, _name in conv_def
+        ]))
+
+        ch_out_list = [64, 128, 256, 512]
+        block = BottleNeck if depth >= 50 else BasicBlock
+
+        _out_channels = [block.expansion * v for v in ch_out_list]
+        _out_strides = [4, 8, 16, 32]
+
+        self.res_layers = nn.ModuleList()
+        for i in range(num_stages):
+            stage_num = i + 2
+            self.res_layers.append(
+                Blocks(block, ch_in, ch_out_list[i], block_nums[i], stage_num, act=act, variant=variant)
+            )
+            ch_in = _out_channels[i]
+
+        self.return_idx = return_idx
+        self.out_channels = [_out_channels[_i] for _i in return_idx]
+        self.out_strides = [_out_strides[_i] for _i in return_idx]
+
+        if freeze_at >= 0:
+            self._freeze_parameters(self.conv1)
+            for i in range(min(freeze_at, num_stages)):
+                self._freeze_parameters(self.res_layers[i])
+
+        if freeze_norm:
+            self._freeze_norm(self)
+
+        if pretrained:
+            state = torch.hub.load_state_dict_from_url(donwload_url[depth])
+            self.load_state_dict(state)
+            print(f'Load PResNet{depth} state_dict')
+            
+    def _freeze_parameters(self, m: nn.Module):
+        for p in m.parameters():
+            p.requires_grad = False
+
+    def _freeze_norm(self, m: nn.Module):
+        if isinstance(m, nn.BatchNorm2d):
+            m = FrozenBatchNorm2d(m.num_features)
+        else:
+            for name, child in m.named_children():
+                _child = self._freeze_norm(child)
+                if _child is not child:
+                    setattr(m, name, _child)
+        return m
+
+    def forward(self, x):
+        conv1 = self.conv1(x)
+        x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
+        outs = []
+        for idx, stage in enumerate(self.res_layers):
+            x = stage(x)
+            if idx in self.return_idx:
+                outs.append(x)
+        return outs
+
+
--- a/rtdetr_pytorch/src/nn/backbone/regnet.py
+++ b/rtdetr_pytorch/src/nn/backbone/regnet.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn 
+from transformers import RegNetModel
+
+
+from src.core import register
+
+__all__ = ['RegNet']
+
+@register
+class RegNet(nn.Module):
+    def __init__(self, configuration, return_idx=[0, 1, 2, 3]):
+        super(RegNet, self).__init__()  
+        self.model = RegNetModel.from_pretrained("facebook/regnet-y-040")
+        self.return_idx = return_idx
+
+
+    def forward(self, x):
+        
+        outputs = self.model(x, output_hidden_states = True)
+        x = outputs.hidden_states[2:5]
+
+        return x
--- a/rtdetr_pytorch/src/nn/backbone/test_resnet.py
+++ b/rtdetr_pytorch/src/nn/backbone/test_resnet.py
@@ -0,0 +1,81 @@
+import torch
+import torch.nn as nn 
+import torch.nn.functional as F 
+
+from collections import OrderedDict
+
+
+from src.core import register
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()         
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))       
+        out += self.shortcut(x)          
+        out = F.relu(out)
+        return out
+
+
+
+class _ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super().__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        
+        self.linear = nn.Linear(512 * block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion 
+        return nn.Sequential(*layers)
+        
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)              
+        return out
+        
+
+@register
+class MResNet(nn.Module):
+    def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
+        super().__init__()
+        self.model = _ResNet(BasicBlock, num_blocks, num_classes)
+        
+    def forward(self, x):
+        return self.model(x)
+
--- a/rtdetr_pytorch/src/nn/backbone/utils.py
+++ b/rtdetr_pytorch/src/nn/backbone/utils.py
@@ -0,0 +1,58 @@
+"""
+https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
+
+by lyuwenyu
+"""
+
+from collections import OrderedDict
+from typing import Dict, List
+
+
+import torch.nn as nn 
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+    """
+    Module wrapper that returns intermediate layers from a model
+
+    It has a strong assumption that the modules have been registered
+    into the model in the same order as they are used.
+    This means that one should **not** reuse the same nn.Module
+    twice in the forward if you want this to work.
+
+    Additionally, it is only able to query submodules that are directly
+    assigned to the model. So if `model` is passed, `model.feature1` can
+    be returned, but not `model.feature1.layer2`.
+    """
+
+    _version = 3
+
+    def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
+        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
+            raise ValueError("return_layers are not present in model. {}"\
+                .format([name for name, _ in model.named_children()]))
+        orig_return_layers = return_layers
+        return_layers = {str(k): str(k)  for k in return_layers}
+        layers = OrderedDict()
+        for name, module in model.named_children():
+            layers[name] = module
+            if name in return_layers:
+                del return_layers[name]
+            if not return_layers:
+                break
+
+        super().__init__(layers)
+        self.return_layers = orig_return_layers
+
+    def forward(self, x):
+        # out = OrderedDict()
+        outputs = []
+        for name, module in self.items():
+            x = module(x)
+            if name in self.return_layers:
+                # out_name = self.return_layers[name]
+                # out[out_name] = x
+                outputs.append(x)
+        
+        return outputs
+
--- a/rtdetr_pytorch/src/nn/criterion/init.py
+++ b/rtdetr_pytorch/src/nn/criterion/init.py
@@ -0,0 +1,6 @@
+
+import torch.nn as nn 
+from src.core import register
+
+CrossEntropyLoss = register(nn.CrossEntropyLoss)
+
--- a/rtdetr_pytorch/src/nn/criterion/utils.py
+++ b/rtdetr_pytorch/src/nn/criterion/utils.py
@@ -0,0 +1,20 @@
+import torch 
+import torchvision
+
+
+
+def format_target(targets):
+    '''
+    Args:
+        targets (List[Dict]),
+    Return: 
+        tensor (Tensor), [im_id, label, bbox,]
+    '''
+    outputs = []
+    for i, tgt in enumerate(targets):
+        boxes =  torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh') 
+        labels = tgt['labels'].reshape(-1, 1)
+        im_ids = torch.ones_like(labels) * i
+        outputs.append(torch.cat([im_ids, labels, boxes], dim=1))
+
+    return torch.cat(outputs, dim=0)