first commit
This commit is contained in:
31
rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml
Normal file
31
rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
train_dataloader:
|
||||
dataset:
|
||||
return_masks: False
|
||||
transforms:
|
||||
ops:
|
||||
- {type: RandomPhotometricDistort, p: 0.5}
|
||||
- {type: RandomZoomOut, fill: 0}
|
||||
- {type: RandomIoUCrop, p: 0.8}
|
||||
- {type: SanitizeBoundingBoxes, min_size: 1}
|
||||
- {type: RandomHorizontalFlip}
|
||||
- {type: Resize, size: [640, 640], }
|
||||
- {type: SanitizeBoundingBoxes, min_size: 1}
|
||||
- {type: ConvertPILImage, dtype: 'float32', scale: True}
|
||||
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
|
||||
collate_fn:
|
||||
type: BatchImageCollateFunction
|
||||
scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
|
||||
shuffle: True
|
||||
num_workers: 4
|
||||
total_batch_size: 16
|
||||
|
||||
val_dataloader:
|
||||
dataset:
|
||||
transforms:
|
||||
ops:
|
||||
- {type: Resize, size: [640, 640]}
|
||||
- {type: ConvertPILImage, dtype: 'float32', scale: True}
|
||||
shuffle: False
|
||||
total_batch_size: 16
|
||||
num_workers: 8
|
||||
40
rtdetrv2_pytorch/configs/rtdetr/include/optimizer.yml
Normal file
40
rtdetrv2_pytorch/configs/rtdetr/include/optimizer.yml
Normal file
@@ -0,0 +1,40 @@
|
||||
|
||||
use_ema: True
|
||||
ema:
|
||||
type: ModelEMA
|
||||
decay: 0.9999
|
||||
warmups: 2000
|
||||
|
||||
|
||||
epoches: 72
|
||||
clip_max_norm: 0.1
|
||||
|
||||
|
||||
optimizer:
|
||||
type: AdamW
|
||||
params:
|
||||
-
|
||||
params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$'
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$'
|
||||
weight_decay: 0.
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
|
||||
weight_decay: 0.
|
||||
|
||||
lr: 0.0001
|
||||
betas: [0.9, 0.999]
|
||||
weight_decay: 0.0001
|
||||
|
||||
|
||||
lr_scheduler:
|
||||
type: MultiStepLR
|
||||
milestones: [1000]
|
||||
gamma: 0.1
|
||||
|
||||
|
||||
lr_warmup_scheduler:
|
||||
type: LinearWarmup
|
||||
warmup_duration: 2000
|
||||
79
rtdetrv2_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml
Normal file
79
rtdetrv2_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml
Normal file
@@ -0,0 +1,79 @@
|
||||
task: detection
|
||||
|
||||
model: RTDETR
|
||||
criterion: RTDETRCriterion
|
||||
postprocessor: RTDETRPostProcessor
|
||||
|
||||
|
||||
use_focal_loss: True
|
||||
eval_spatial_size: [640, 640] # h w
|
||||
|
||||
|
||||
RTDETR:
|
||||
backbone: PResNet
|
||||
encoder: HybridEncoder
|
||||
decoder: RTDETRTransformer
|
||||
|
||||
|
||||
PResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
freeze_at: 0
|
||||
return_idx: [1, 2, 3]
|
||||
num_stages: 4
|
||||
freeze_norm: True
|
||||
pretrained: True
|
||||
|
||||
|
||||
HybridEncoder:
|
||||
in_channels: [512, 1024, 2048]
|
||||
feat_strides: [8, 16, 32]
|
||||
|
||||
# intra
|
||||
hidden_dim: 256
|
||||
use_encoder_idx: [2]
|
||||
num_encoder_layers: 1
|
||||
nhead: 8
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.
|
||||
enc_act: 'gelu'
|
||||
|
||||
# cross
|
||||
expansion: 1.0
|
||||
depth_mult: 1
|
||||
act: 'silu'
|
||||
|
||||
version: v1
|
||||
|
||||
RTDETRTransformer:
|
||||
feat_channels: [256, 256, 256]
|
||||
feat_strides: [8, 16, 32]
|
||||
hidden_dim: 256
|
||||
num_levels: 3
|
||||
|
||||
num_layers: 6
|
||||
num_queries: 300
|
||||
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0 # 1.0 0.4
|
||||
|
||||
eval_idx: -1
|
||||
|
||||
|
||||
RTDETRPostProcessor:
|
||||
num_top_queries: 300
|
||||
|
||||
|
||||
RTDETRCriterion:
|
||||
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
|
||||
losses: ['vfl', 'boxes', ]
|
||||
alpha: 0.75
|
||||
gamma: 2.0
|
||||
|
||||
matcher:
|
||||
type: HungarianMatcher
|
||||
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
|
||||
alpha: 0.25
|
||||
gamma: 2.0
|
||||
|
||||
111
rtdetrv2_pytorch/configs/rtdetr/readme.md
Normal file
111
rtdetrv2_pytorch/configs/rtdetr/readme.md
Normal file
@@ -0,0 +1,111 @@
|
||||
# DETRs Beat YOLOs on Real-time Object Detection
|
||||
|
||||
## Introduction
|
||||
This repository is the official pytorch implementation of [*RTDETR*](https://arxiv.org/abs/2304.08069v1), and is compatiable with [RT-DETR/rtdetr_pytorch](https://github.com/lyuwenyu/RT-DETR/tree/main). For paddle version implementation, please refer to [RT-DETR/rtdetr_paddle](https://github.com/lyuwenyu/RT-DETR/tree/main). **If you are using rtdetr for the first time, it is highly recommended to use [rtdetrv2](../rtdetrv2/)**.
|
||||
|
||||
<details open>
|
||||
<summary> Fig </summary>
|
||||
<div align="center">
|
||||
<img src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/42636690-1ecf-4647-b075-842ecb9bc562" width=500>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<!--
|
||||
<div align="center">
|
||||
<img src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/42636690-1ecf-4647-b075-842ecb9bc562" width=500>
|
||||
</div> -->
|
||||
|
||||
|
||||
## Model Zoo
|
||||
| Model | Dataset | Input Size | AP<sup>val</sup> | AP<sub>50</sub><sup>val</sup> | #Params(M) | FPS | checkpoint |
|
||||
| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
|
||||
rtdetr_r18vd | COCO | 640 | 46.4 | 63.7 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth)
|
||||
rtdetr_r34vd | COCO | 640 | 48.9 | 66.8 | 31 | 161 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth)
|
||||
rtdetr_r50vd_m | COCO | 640 | 51.3 | 69.5 | 36 | 145 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth)
|
||||
rtdetr_r50vd | COCO | 640 | 53.1 | 71.2| 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth)
|
||||
rtdetr_r101vd | COCO | 640 | 54.3 | 72.8 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth)
|
||||
rtdetr_18vd | COCO+Objects365 | 640 | 49.0 | 66.5 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
|
||||
rtdetr_r50vd | COCO+Objects365 | 640 | 55.2 | 73.4 | 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
|
||||
rtdetr_r101vd | COCO+Objects365 | 640 | 56.2 | 74.5 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth)
|
||||
|
||||
<!-- rtdetr_r18vd | COCO | 640 | 46.5 | 63.6 | 20 | 217 | [url](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_6x_coco.pth) -->
|
||||
|
||||
<!-- rtdetr_r18vd | Objects365 | 640 | 22.9 | 31.2| - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
|
||||
rtdetr_r50vd | Objects365 | 640 | 35.1 | 46.2 | - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
|
||||
rtdetr_r101vd | Objects365 | 640 | 36.8 | 48.3 | - | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth) -->
|
||||
|
||||
Notes
|
||||
<!-- - AP is evaluated on coco 2017 val dataset -->
|
||||
<!-- RT-DETR was trained on COCO train2017 and evaluated on val2017. -->
|
||||
- `COCO + Objects365` in the table means finetuned model on `COCO` using pretrained weights trained on `Objects365`.
|
||||
- `FPS` is evaluated on a single T4 GPU with $batch\\_size = 1$ and $tensorrt\\_fp16$ mode
|
||||
- `url`<sup>`*`</sup> is the url of the pretrained weights, converted from the paddle model to save energy. *There may be slight differences between this table and the paper.
|
||||
|
||||
|
||||
## Usage
|
||||
<details>
|
||||
<summary> details </summary>
|
||||
|
||||
<!-- <summary>1. Training </summary> -->
|
||||
1. Training
|
||||
```shell
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config &> log.txt 2>&1 &
|
||||
```
|
||||
|
||||
<!-- <summary>2. Testing </summary> -->
|
||||
2. Testing
|
||||
```shell
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config -r path/to/checkpoint --test-only
|
||||
```
|
||||
|
||||
<!-- <summary>3. Tuning </summary> -->
|
||||
3. Tuning
|
||||
```shell
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tools/train.py -c path/to/config -t path/to/checkpoint &> log.txt 2>&1 &
|
||||
```
|
||||
|
||||
<!-- <summary>4. Export onnx </summary> -->
|
||||
4. Export onnx
|
||||
```shell
|
||||
python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check
|
||||
```
|
||||
|
||||
<!-- <summary>5. Inference </summary> -->
|
||||
5. Inference
|
||||
|
||||
Support torch, onnxruntime, tensorrt and openvino, see details in *references/deploy*
|
||||
```shell
|
||||
python references/deploy/rtdetrv2_onnx.py --onnx-file=model.onnx --im-file=xxxx
|
||||
python references/deploy/rtdetrv2_tensorrt.py --trt-file=model.trt --im-file=xxxx
|
||||
python references/deploy/rtdetrv2_torch.py -c path/to/config -r path/to/checkpoint --im-file=xxx --device=cuda:0
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
## Citation
|
||||
If you use `RTDETR` in your work, please use the following BibTeX entries:
|
||||
|
||||
<details>
|
||||
<summary> bibtex </summary>
|
||||
|
||||
```latex
|
||||
@misc{lv2023detrs,
|
||||
title={DETRs Beat YOLOs on Real-time Object Detection},
|
||||
author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu},
|
||||
year={2023},
|
||||
eprint={2304.08069},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
@software{Lv_rtdetr_by_cvperception_2023,
|
||||
author = {Lv, Wenyu},
|
||||
license = {Apache-2.0},
|
||||
month = oct,
|
||||
title = {{rtdetr by cvperception}},
|
||||
url = {https://github.com/lyuwenyu/cvperception/},
|
||||
version = {0.0.1dev},
|
||||
year = {2023}
|
||||
}
|
||||
```
|
||||
</details>
|
||||
41
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml
Normal file
41
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
|
||||
__include__: [
|
||||
'../dataset/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'./include/dataloader.yml',
|
||||
'./include/optimizer.yml',
|
||||
'./include/rtdetr_r50vd.yml',
|
||||
]
|
||||
|
||||
|
||||
output_dir: ./output/rtdetr_r101vd_6x_coco
|
||||
|
||||
|
||||
PResNet:
|
||||
depth: 101
|
||||
|
||||
|
||||
HybridEncoder:
|
||||
# intra
|
||||
hidden_dim: 384
|
||||
dim_feedforward: 2048
|
||||
|
||||
|
||||
RTDETRTransformer:
|
||||
feat_channels: [384, 384, 384]
|
||||
|
||||
|
||||
optimizer:
|
||||
type: AdamW
|
||||
params:
|
||||
-
|
||||
params: '^(?=.*backbone)(?!.*norm|bn).*$'
|
||||
lr: 0.000001
|
||||
-
|
||||
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
|
||||
weight_decay: 0.
|
||||
|
||||
lr: 0.0001
|
||||
betas: [0.9, 0.999]
|
||||
weight_decay: 0.0001
|
||||
|
||||
48
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml
Normal file
48
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
__include__: [
|
||||
'../dataset/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'./include/dataloader.yml',
|
||||
'./include/optimizer.yml',
|
||||
'./include/rtdetr_r50vd.yml',
|
||||
]
|
||||
|
||||
|
||||
output_dir: ./output/rtdetr_r18vd_6x_coco
|
||||
|
||||
|
||||
PResNet:
|
||||
depth: 18
|
||||
freeze_at: -1
|
||||
freeze_norm: False
|
||||
pretrained: True
|
||||
|
||||
|
||||
HybridEncoder:
|
||||
in_channels: [128, 256, 512]
|
||||
hidden_dim: 256
|
||||
expansion: 0.5
|
||||
|
||||
|
||||
RTDETRTransformer:
|
||||
num_layers: 3
|
||||
|
||||
|
||||
|
||||
optimizer:
|
||||
type: AdamW
|
||||
params:
|
||||
-
|
||||
params: '^(?=.*backbone)(?=.*norm|bn).*$'
|
||||
weight_decay: 0.
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*backbone)(?!.*norm|bn).*$'
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
|
||||
weight_decay: 0.
|
||||
|
||||
lr: 0.0001
|
||||
betas: [0.9, 0.999]
|
||||
weight_decay: 0.0001
|
||||
48
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml
Normal file
48
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
__include__: [
|
||||
'../dataset/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'./include/dataloader.yml',
|
||||
'./include/optimizer.yml',
|
||||
'./include/rtdetr_r50vd.yml',
|
||||
]
|
||||
|
||||
|
||||
output_dir: ./output/rtdetr_r34vd_6x_coco
|
||||
|
||||
|
||||
PResNet:
|
||||
depth: 34
|
||||
freeze_at: -1
|
||||
freeze_norm: False
|
||||
pretrained: True
|
||||
|
||||
|
||||
HybridEncoder:
|
||||
in_channels: [128, 256, 512]
|
||||
hidden_dim: 256
|
||||
expansion: 0.5
|
||||
|
||||
|
||||
RTDETRTransformer:
|
||||
num_layers: 4
|
||||
|
||||
|
||||
|
||||
optimizer:
|
||||
type: AdamW
|
||||
params:
|
||||
-
|
||||
params: '^(?=.*backbone)(?=.*norm|bn).*$'
|
||||
weight_decay: 0.
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*backbone)(?!.*norm|bn).*$'
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
|
||||
weight_decay: 0.
|
||||
|
||||
lr: 0.0001
|
||||
betas: [0.9, 0.999]
|
||||
weight_decay: 0.0001
|
||||
14
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml
Normal file
14
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
__include__: [
|
||||
'../dataset/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'./include/dataloader.yml',
|
||||
'./include/optimizer.yml',
|
||||
'./include/rtdetr_r50vd.yml',
|
||||
]
|
||||
|
||||
|
||||
output_dir: ./output/rtdetr_r50vd_6x_coco
|
||||
|
||||
|
||||
|
||||
34
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
Normal file
34
rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
Normal file
@@ -0,0 +1,34 @@
|
||||
__include__: [
|
||||
'../dataset/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'./include/dataloader.yml',
|
||||
'./include/optimizer.yml',
|
||||
'./include/rtdetr_r50vd.yml',
|
||||
]
|
||||
|
||||
output_dir: ./output/rtdetr_r50vd_m_6x_coco
|
||||
|
||||
|
||||
HybridEncoder:
|
||||
expansion: 0.5
|
||||
|
||||
|
||||
RTDETRTransformer:
|
||||
eval_idx: 2 # use 3th decoder layer to eval
|
||||
|
||||
|
||||
|
||||
optimizer:
|
||||
type: AdamW
|
||||
params:
|
||||
-
|
||||
params: '^(?=.*backbone)(?!.*norm|bn).*$'
|
||||
lr: 0.00001
|
||||
-
|
||||
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
|
||||
weight_decay: 0.
|
||||
|
||||
lr: 0.0001
|
||||
betas: [0.9, 0.999]
|
||||
weight_decay: 0.0001
|
||||
|
||||
Reference in New Issue
Block a user