first commit

This commit is contained in:
陈赣
2026-06-03 12:42:47 +08:00
commit ec23799148
339 changed files with 57120 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
task: detection
evaluator:
type: CocoEvaluator
iou_types: ['bbox', ]
# num_classes: 365
# remap_mscoco_category: False
# num_classes: 91
# remap_mscoco_category: False
num_classes: 80
remap_mscoco_category: True
train_dataloader:
type: DataLoader
dataset:
type: CocoDetection
img_folder: ./dataset/coco/train2017/
ann_file: ./dataset/coco/annotations/instances_train2017.json
return_masks: False
transforms:
type: Compose
ops: ~
shuffle: True
num_workers: 4
drop_last: True
collate_fn:
type: BatchImageCollateFunction
val_dataloader:
type: DataLoader
dataset:
type: CocoDetection
img_folder: ./dataset/coco/val2017/
ann_file: ./dataset/coco/annotations/instances_val2017.json
return_masks: False
transforms:
type: Compose
ops: ~
shuffle: False
num_workers: 4
drop_last: False
collate_fn:
type: BatchImageCollateFunction

View File

@@ -0,0 +1,40 @@
task: detection
evaluator:
type: CocoEvaluator
iou_types: ['bbox', ]
num_classes: 20
train_dataloader:
type: DataLoader
dataset:
type: VOCDetection
root: ./dataset/voc/
ann_file: trainval.txt
label_file: label_list.txt
transforms:
type: Compose
ops: ~
shuffle: True
num_workers: 4
drop_last: True
collate_fn:
type: BatchImageCollateFunction
val_dataloader:
type: DataLoader
dataset:
type: VOCDetection
root: ./dataset/voc/
ann_file: test.txt
label_file: label_list.txt
transforms:
type: Compose
ops: ~
shuffle: False
num_workers: 4
drop_last: False
collate_fn:
type: BatchImageCollateFunction

View File

@@ -0,0 +1,31 @@
train_dataloader:
dataset:
return_masks: False
transforms:
ops:
- {type: RandomPhotometricDistort, p: 0.5}
- {type: RandomZoomOut, fill: 0}
- {type: RandomIoUCrop, p: 0.8}
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: RandomHorizontalFlip}
- {type: Resize, size: [640, 640], }
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
collate_fn:
type: BatchImageCollateFunction
scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
shuffle: True
num_workers: 4
total_batch_size: 16
val_dataloader:
dataset:
transforms:
ops:
- {type: Resize, size: [640, 640]}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
shuffle: False
total_batch_size: 16
num_workers: 8

View File

@@ -0,0 +1,40 @@
use_ema: True
ema:
type: ModelEMA
decay: 0.9999
warmups: 2000
epoches: 72
clip_max_norm: 0.1
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$'
lr: 0.00001
-
params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
lr_scheduler:
type: MultiStepLR
milestones: [1000]
gamma: 0.1
lr_warmup_scheduler:
type: LinearWarmup
warmup_duration: 2000

View File

@@ -0,0 +1,79 @@
task: detection
model: RTDETR
criterion: RTDETRCriterion
postprocessor: RTDETRPostProcessor
use_focal_loss: True
eval_spatial_size: [640, 640] # h w
RTDETR:
backbone: PResNet
encoder: HybridEncoder
decoder: RTDETRTransformer
PResNet:
depth: 50
variant: d
freeze_at: 0
return_idx: [1, 2, 3]
num_stages: 4
freeze_norm: True
pretrained: True
HybridEncoder:
in_channels: [512, 1024, 2048]
feat_strides: [8, 16, 32]
# intra
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
nhead: 8
dim_feedforward: 1024
dropout: 0.
enc_act: 'gelu'
# cross
expansion: 1.0
depth_mult: 1
act: 'silu'
version: v1
RTDETRTransformer:
feat_channels: [256, 256, 256]
feat_strides: [8, 16, 32]
hidden_dim: 256
num_levels: 3
num_layers: 6
num_queries: 300
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0 # 1.0 0.4
eval_idx: -1
RTDETRPostProcessor:
num_top_queries: 300
RTDETRCriterion:
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
losses: ['vfl', 'boxes', ]
alpha: 0.75
gamma: 2.0
matcher:
type: HungarianMatcher
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
alpha: 0.25
gamma: 2.0

View File

@@ -0,0 +1,111 @@
# DETRs Beat YOLOs on Real-time Object Detection
## Introduction
This repository is the official pytorch implementation of [*RTDETR*](https://arxiv.org/abs/2304.08069v1), and is compatiable with [RT-DETR/rtdetr_pytorch](https://github.com/lyuwenyu/RT-DETR/tree/main). For paddle version implementation, please refer to [RT-DETR/rtdetr_paddle](https://github.com/lyuwenyu/RT-DETR/tree/main). **If you are using rtdetr for the first time, it is highly recommended to use [rtdetrv2](../rtdetrv2/)**.
<details open>
<summary> Fig </summary>
<div align="center">
<img src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/42636690-1ecf-4647-b075-842ecb9bc562" width=500>
</div>
</details>
<!--
<div align="center">
<img src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/42636690-1ecf-4647-b075-842ecb9bc562" width=500>
</div> -->
## Model Zoo
| Model | Dataset | Input Size | AP<sup>val</sup> | AP<sub>50</sub><sup>val</sup> | #Params(M) | FPS | checkpoint |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
rtdetr_r18vd | COCO | 640 | 46.4 | 63.7 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth)
rtdetr_r34vd | COCO | 640 | 48.9 | 66.8 | 31 | 161 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth)
rtdetr_r50vd_m | COCO | 640 | 51.3 | 69.5 | 36 | 145 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth)
rtdetr_r50vd | COCO | 640 | 53.1 | 71.2| 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth)
rtdetr_r101vd | COCO | 640 | 54.3 | 72.8 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth)
rtdetr_18vd | COCO+Objects365 | 640 | 49.0 | 66.5 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
rtdetr_r50vd | COCO+Objects365 | 640 | 55.2 | 73.4 | 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
rtdetr_r101vd | COCO+Objects365 | 640 | 56.2 | 74.5 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth)
<!-- rtdetr_r18vd | COCO | 640 | 46.5 | 63.6 | 20 | 217 | [url](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_6x_coco.pth) -->
<!-- rtdetr_r18vd | Objects365 | 640 | 22.9 | 31.2| - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
rtdetr_r50vd | Objects365 | 640 | 35.1 | 46.2 | - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
rtdetr_r101vd | Objects365 | 640 | 36.8 | 48.3 | - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth) -->
Notes
<!-- - AP is evaluated on coco 2017 val dataset -->
<!-- RT-DETR was trained on COCO train2017 and evaluated on val2017. -->
- `COCO + Objects365` in the table means finetuned model on `COCO` using pretrained weights trained on `Objects365`.
- `FPS` is evaluated on a single T4 GPU with $batch\\_size = 1$ and $tensorrt\\_fp16$ mode
- `url`<sup>`*`</sup> is the url of the pretrained weights, converted from the paddle model to save energy. *There may be slight differences between this table and the paper.
## Usage
<details>
<summary> details </summary>
<!-- <summary>1. Training </summary> -->
1. Training
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config &> log.txt 2>&1 &
```
<!-- <summary>2. Testing </summary> -->
2. Testing
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config -r path/to/checkpoint --test-only
```
<!-- <summary>3. Tuning </summary> -->
3. Tuning
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config -t path/to/checkpoint &> log.txt 2>&1 &
```
<!-- <summary>4. Export onnx </summary> -->
4. Export onnx
```shell
python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check
```
<!-- <summary>5. Inference </summary> -->
5. Inference
Support torch, onnxruntime, tensorrt and openvino, see details in *references/deploy*
```shell
python references/deploy/rtdetrv2_onnx.py --onnx-file=model.onnx --im-file=xxxx
python references/deploy/rtdetrv2_tensorrt.py --trt-file=model.trt --im-file=xxxx
python references/deploy/rtdetrv2_torch.py -c path/to/config -r path/to/checkpoint --im-file=xxx --device=cuda:0
```
</details>
## Citation
If you use `RTDETR` in your work, please use the following BibTeX entries:
<details>
<summary> bibtex </summary>
```latex
@misc{lv2023detrs,
title={DETRs Beat YOLOs on Real-time Object Detection},
author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu},
year={2023},
eprint={2304.08069},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@software{Lv_rtdetr_by_cvperception_2023,
author = {Lv, Wenyu},
license = {Apache-2.0},
month = oct,
title = {{rtdetr by cvperception}},
url = {https://github.com/lyuwenyu/cvperception/},
version = {0.0.1dev},
year = {2023}
}
```
</details>

View File

@@ -0,0 +1,41 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetr_r50vd.yml',
]
output_dir: ./output/rtdetr_r101vd_6x_coco
PResNet:
depth: 101
HybridEncoder:
# intra
hidden_dim: 384
dim_feedforward: 2048
RTDETRTransformer:
feat_channels: [384, 384, 384]
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.000001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,48 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetr_r50vd.yml',
]
output_dir: ./output/rtdetr_r18vd_6x_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformer:
num_layers: 3
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?=.*norm|bn).*$'
weight_decay: 0.
lr: 0.00001
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,48 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetr_r50vd.yml',
]
output_dir: ./output/rtdetr_r34vd_6x_coco
PResNet:
depth: 34
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformer:
num_layers: 4
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?=.*norm|bn).*$'
weight_decay: 0.
lr: 0.00001
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,14 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetr_r50vd.yml',
]
output_dir: ./output/rtdetr_r50vd_6x_coco

View File

@@ -0,0 +1,34 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetr_r50vd.yml',
]
output_dir: ./output/rtdetr_r50vd_m_6x_coco
HybridEncoder:
expansion: 0.5
RTDETRTransformer:
eval_idx: 2 # use 3th decoder layer to eval
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,38 @@
train_dataloader:
dataset:
transforms:
ops:
- {type: RandomPhotometricDistort, p: 0.5}
- {type: RandomZoomOut, fill: 0}
- {type: RandomIoUCrop, p: 0.8}
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: RandomHorizontalFlip}
- {type: Resize, size: [640, 640], }
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
policy:
name: stop_epoch
epoch: 71 # epoch in [71, ~) stop `ops`
ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
collate_fn:
type: BatchImageCollateFunction
scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
shuffle: True
total_batch_size: 16 # total batch size equals to 16 (4 * 4)
num_workers: 4
val_dataloader:
dataset:
transforms:
ops:
- {type: Resize, size: [640, 640]}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
shuffle: False
total_batch_size: 32
num_workers: 4

View File

@@ -0,0 +1,37 @@
use_amp: True
use_ema: True
ema:
type: ModelEMA
decay: 0.9999
warmups: 2000
epoches: 72
clip_max_norm: 0.1
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
lr_scheduler:
type: MultiStepLR
milestones: [1000]
gamma: 0.1
lr_warmup_scheduler:
type: LinearWarmup
warmup_duration: 2000

View File

@@ -0,0 +1,83 @@
task: detection
model: RTDETR
criterion: RTDETRCriterionv2
postprocessor: RTDETRPostProcessor
use_focal_loss: True
eval_spatial_size: [640, 640] # h w
RTDETR:
backbone: PResNet
encoder: HybridEncoder
decoder: RTDETRTransformerv2
PResNet:
depth: 50
variant: d
freeze_at: 0
return_idx: [1, 2, 3]
num_stages: 4
freeze_norm: True
pretrained: True
HybridEncoder:
in_channels: [512, 1024, 2048]
feat_strides: [8, 16, 32]
# intra
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
nhead: 8
dim_feedforward: 1024
dropout: 0.
enc_act: 'gelu'
# cross
expansion: 1.0
depth_mult: 1
act: 'silu'
RTDETRTransformerv2:
feat_channels: [256, 256, 256]
feat_strides: [8, 16, 32]
hidden_dim: 256
num_levels: 3
num_layers: 6
num_queries: 300
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0 # 1.0 0.4
eval_idx: -1
# NEW
num_points: [4, 4, 4] # [3,3,3] [2,2,2]
cross_attn_method: default # default, discrete
query_select_method: default # default, agnostic
RTDETRPostProcessor:
num_top_queries: 300
RTDETRCriterionv2:
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
losses: ['vfl', 'boxes', ]
alpha: 0.75
gamma: 2.0
matcher:
type: HungarianMatcher
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
alpha: 0.25
gamma: 2.0

View File

@@ -0,0 +1,50 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_hgnetv2_h_6x_coco
RTDETR:
backbone: HGNetv2
HGNetv2:
name: 'H'
return_idx: [1, 2, 3]
freeze_at: 0
freeze_norm: True
pretrained: True
HybridEncoder:
# intra
hidden_dim: 512
dim_feedforward: 2048
num_encoder_layers: 2
RTDETRTransformerv2:
feat_channels: [512, 512, 512]
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.000005
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,38 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_hgnetv2_l_6x_coco
RTDETR:
backbone: HGNetv2
HGNetv2:
name: 'L'
return_idx: [1, 2, 3]
freeze_at: 0
freeze_norm: True
pretrained: True
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.000005
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,50 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_hgnetv2_x_6x_coco
RTDETR:
backbone: HGNetv2
HGNetv2:
name: 'X'
return_idx: [1, 2, 3]
freeze_at: 0
freeze_norm: True
pretrained: True
HybridEncoder:
# intra
hidden_dim: 384
dim_feedforward: 2048
RTDETRTransformerv2:
feat_channels: [384, 384, 384]
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.000001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,40 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r101vd_6x_coco
PResNet:
depth: 101
HybridEncoder:
# intra
hidden_dim: 384
dim_feedforward: 2048
RTDETRTransformerv2:
feat_channels: [384, 384, 384]
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.000001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,46 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r18vd_120e_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
scales: ~

View File

@@ -0,0 +1,46 @@
__include__: [
'../dataset/voc_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r18vd_120e_voc
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
scales: ~
total_batch_size: 32

View File

@@ -0,0 +1,49 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r18vd_120e_coco.pth
output_dir: ./output/rtdetrv2_r18vd_dsp_3x_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
num_points: [4, 4, 4]
cross_attn_method: discrete
epoches: 36
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 33
collate_fn:
scales: ~

View File

@@ -0,0 +1,47 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r18vd_sp1_120e_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
num_points: [1, 1, 1]
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
scales: ~

View File

@@ -0,0 +1,47 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r18vd_sp2_120e_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
num_points: [2, 2, 2]
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
scales: ~

View File

@@ -0,0 +1,47 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r18vd_sp3_120e_coco
PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 3
num_points: [3, 3, 3]
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
scales: ~

View File

@@ -0,0 +1,57 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r34vd_120e_coco
PResNet:
depth: 34
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 4
epoches: 120
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.00005
-
params: '^(?=.*backbone)(?=.*norm|bn).*$'
lr: 0.00005
weight_decay: 0.
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
train_dataloader:
dataset:
transforms:
policy:
epoch: 117
collate_fn:
stop_epoch: 117

View File

@@ -0,0 +1,59 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r34vd_120e_coco_ema.pth
output_dir: ./output/rtdetrv2_r34vd_dsp_1x_coco
PResNet:
depth: 34
freeze_at: -1
freeze_norm: False
pretrained: True
HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5
RTDETRTransformerv2:
num_layers: 4
cross_attn_method: discrete
epoches: 12
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm|bn).*$'
lr: 0.00005
-
params: '^(?=.*backbone)(?=.*norm|bn).*$'
lr: 0.00005
weight_decay: 0.
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
train_dataloader:
dataset:
transforms:
policy:
epoch: 10
collate_fn:
stop_epoch: 10

View File

@@ -0,0 +1,27 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r50vd_6x_coco
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001

View File

@@ -0,0 +1,27 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_6x_coco_ema.pth
output_dir: ./output/rtdetrv2_r50vd_dsp_1x_coco
RTDETRTransformerv2:
cross_attn_method: discrete
epoches: 12
train_dataloader:
dataset:
transforms:
policy:
epoch: 10
collate_fn:
stop_epoch: 10

View File

@@ -0,0 +1,43 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r50vd_m_6x_coco
HybridEncoder:
expansion: 0.5
RTDETRTransformerv2:
eval_idx: 2 # use 3th decoder layer to eval
epoches: 84
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
train_dataloader:
dataset:
transforms:
policy:
epoch: 81
collate_fn:
stop_epoch: 81

View File

@@ -0,0 +1,44 @@
__include__: [
'../dataset/coco_detection.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
]
output_dir: ./output/rtdetrv2_r50vd_m_dsp_3x_coco
tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_m_7x_coco_ema.pth
HybridEncoder:
expansion: 0.5
RTDETRTransformerv2:
eval_idx: 2 # use 3th decoder layer to eval
cross_attn_method: discrete
epoches: 36
optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.
lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001
train_dataloader:
dataset:
transforms:
policy:
epoch: 33
collate_fn:
stop_epoch: 33

View File

@@ -0,0 +1,21 @@
print_freq: 100
output_dir: './logs'
checkpoint_freq: 1
sync_bn: True
find_unused_parameters: False
use_amp: False
scaler:
type: GradScaler
enabled: True
use_ema: False
ema:
type: ModelEMA
decay: 0.9999
warmups: 2000