epoch: 72 LearningRate: base_lr: 0.0001 schedulers: - !PiecewiseDecay gamma: 1.0 milestones: [100] use_warmup: true - !LinearWarmup start_factor: 0.001 steps: 2000 OptimizerBuilder: clip_grad_by_norm: 0.1 regularizer: false optimizer: type: AdamW weight_decay: 0.0001