first commit

This commit is contained in:
陈赣
2026-06-03 12:42:47 +08:00
commit ec23799148
339 changed files with 57120 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
metric: COCO
num_classes: 80
TrainDataset:
name: COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
name: COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
allow_empty: true
TestDataset:
name: ImageFolder
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'

View File

@@ -0,0 +1,21 @@
metric: VOC
map_type: 11point
num_classes: 20
TrainDataset:
name: VOCDataSet
dataset_dir: dataset/voc
anno_path: trainval.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
name: VOCDataSet
dataset_dir: dataset/voc
anno_path: test.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
name: ImageFolder
anno_path: dataset/voc/label_list.txt

View File

@@ -0,0 +1,19 @@
epoch: 72
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 1.0
milestones: [100]
use_warmup: true
- !LinearWarmup
start_factor: 0.001
steps: 2000
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,71 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
ema_filter_no_grad: True
hidden_dim: 256
use_focal_loss: True
eval_size: [640, 640] # h, w
DETR:
backbone: ResNet
neck: HybridEncoder
transformer: RTDETRTransformer
detr_head: DINOHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.1, 0.1, 0.1, 0.1]
num_stages: 4
freeze_stem_only: True
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 1.0
RTDETRTransformer:
num_queries: 300
position_embed_type: sine
feat_strides: [8, 16, 32]
num_levels: 3
nhead: 8
num_decoder_layers: 6
dim_feedforward: 1024
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: False
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
use_vfl: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300

View File

@@ -0,0 +1,43 @@
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- RandomDistort: {prob: 0.8}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {prob: 0.8}
- RandomFlip: {}
batch_transforms:
- BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_size: 4
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w)
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 4
shuffle: false
drop_last: false
TestReader:
inputs_def:
image_shape: [3, 640, 640]
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,24 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
find_unused_parameters: True
log_iter: 200
DETR:
backbone: PPHGNetV2
PPHGNetV2:
arch: 'L'
return_idx: [1, 2, 3]
freeze_stem_only: True
freeze_at: 0
freeze_norm: True
lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]

View File

@@ -0,0 +1,40 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
find_unused_parameters: True
log_iter: 200
DETR:
backbone: PPHGNetV2
PPHGNetV2:
arch: 'X'
return_idx: [1, 2, 3]
freeze_stem_only: True
freeze_at: 0
freeze_norm: True
lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
HybridEncoder:
hidden_dim: 384
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 384
nhead: 8
dim_feedforward: 2048
dropout: 0.
activation: 'gelu'
expansion: 1.0

View File

@@ -0,0 +1,37 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r101vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.01, 0.01, 0.01, 0.01]
num_stages: 4
freeze_stem_only: True
HybridEncoder:
hidden_dim: 384
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 384
nhead: 8
dim_feedforward: 2048
dropout: 0.
activation: 'gelu'
expansion: 1.0

View File

@@ -0,0 +1,38 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r18_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
ResNet:
depth: 18
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: -1
num_decoder_layers: 3

View File

@@ -0,0 +1,38 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r34vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
ResNet:
depth: 34
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: -1
num_decoder_layers: 4

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r50vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200

View File

@@ -0,0 +1,28 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r50vd_m_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: 2 # use 3th decoder layer to eval

View File

@@ -0,0 +1,16 @@
use_gpu: true
use_xpu: false
use_mlu: false
use_npu: false
log_iter: 20
save_dir: output
snapshot_epoch: 1
print_flops: false
print_params: false
# Exporting the model
export:
post_process: True # Whether post-processing is included in the network when export model.
nms: True # Whether NMS is included in the network when export model.
benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
fuse_conv_bn: False