first commit
This commit is contained in:
85
rtdetr_paddle/ppdet/modeling/transformers/ext_op/README.md
Normal file
85
rtdetr_paddle/ppdet/modeling/transformers/ext_op/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# Multi-scale deformable attention自定义OP编译
|
||||
该自定义OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
|
||||
|
||||
## 1. 环境依赖
|
||||
- Paddle >= 2.3.2
|
||||
- gcc 8.2
|
||||
|
||||
## 2. 安装
|
||||
请在当前路径下进行编译安装
|
||||
```
|
||||
cd rtdetr_paddle/ppdet/modeling/transformers/ext_op/
|
||||
python setup_ms_deformable_attn_op.py install
|
||||
```
|
||||
|
||||
编译完成后即可使用,以下为`ms_deformable_attn`的使用示例
|
||||
```
|
||||
# 引入自定义op
|
||||
from deformable_detr_ops import ms_deformable_attn
|
||||
|
||||
# 构造fake input tensor
|
||||
bs, n_heads, c = 2, 8, 8
|
||||
query_length, n_levels, n_points = 2, 2, 2
|
||||
spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
|
||||
level_start_index = paddle.concat((paddle.to_tensor(
|
||||
[0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
|
||||
value_length = sum([(H * W).item() for H, W in spatial_shapes])
|
||||
|
||||
def get_test_tensors(channels):
|
||||
value = paddle.rand(
|
||||
[bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
|
||||
sampling_locations = paddle.rand(
|
||||
[bs, query_length, n_heads, n_levels, n_points, 2],
|
||||
dtype=paddle.float32)
|
||||
attention_weights = paddle.rand(
|
||||
[bs, query_length, n_heads, n_levels, n_points],
|
||||
dtype=paddle.float32) + 1e-5
|
||||
attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
|
||||
-2, keepdim=True)
|
||||
return [value, sampling_locations, attention_weights]
|
||||
|
||||
value, sampling_locations, attention_weights = get_test_tensors(c)
|
||||
|
||||
output = ms_deformable_attn(value,
|
||||
spatial_shapes,
|
||||
level_start_index,
|
||||
sampling_locations,
|
||||
attention_weights)
|
||||
```
|
||||
|
||||
## 3. 单元测试
|
||||
可以通过执行单元测试来确认自定义算子功能的正确性,执行单元测试的示例如下所示:
|
||||
```
|
||||
python test_ms_deformable_attn_op.py
|
||||
```
|
||||
运行成功后,打印如下:
|
||||
```
|
||||
*True check_forward_equal_with_paddle_float: max_abs_err 6.98e-10 max_rel_err 2.03e-07
|
||||
*tensor1 True check_gradient_numerical(D=30)
|
||||
*tensor2 True check_gradient_numerical(D=30)
|
||||
*tensor3 True check_gradient_numerical(D=30)
|
||||
*tensor1 True check_gradient_numerical(D=32)
|
||||
*tensor2 True check_gradient_numerical(D=32)
|
||||
*tensor3 True check_gradient_numerical(D=32)
|
||||
*tensor1 True check_gradient_numerical(D=64)
|
||||
*tensor2 True check_gradient_numerical(D=64)
|
||||
*tensor3 True check_gradient_numerical(D=64)
|
||||
*tensor1 True check_gradient_numerical(D=71)
|
||||
*tensor2 True check_gradient_numerical(D=71)
|
||||
*tensor3 True check_gradient_numerical(D=71)
|
||||
*tensor1 True check_gradient_numerical(D=128)
|
||||
*tensor2 True check_gradient_numerical(D=128)
|
||||
*tensor3 True check_gradient_numerical(D=128)
|
||||
*tensor1 True check_gradient_numerical(D=1024)
|
||||
*tensor2 True check_gradient_numerical(D=1024)
|
||||
*tensor3 True check_gradient_numerical(D=1024)
|
||||
*tensor1 True check_gradient_numerical(D=1025)
|
||||
*tensor2 True check_gradient_numerical(D=1025)
|
||||
*tensor3 True check_gradient_numerical(D=1025)
|
||||
*tensor1 True check_gradient_numerical(D=2048)
|
||||
*tensor2 True check_gradient_numerical(D=2048)
|
||||
*tensor3 True check_gradient_numerical(D=2048)
|
||||
*tensor1 True check_gradient_numerical(D=3096)
|
||||
*tensor2 True check_gradient_numerical(D=3096)
|
||||
*tensor3 True check_gradient_numerical(D=3096)
|
||||
```
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/extension.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
// declare GPU implementation
|
||||
std::vector<paddle::Tensor>
|
||||
MSDeformableAttnCUDAForward(const paddle::Tensor &value,
|
||||
const paddle::Tensor &value_spatial_shapes,
|
||||
const paddle::Tensor &value_level_start_index,
|
||||
const paddle::Tensor &sampling_locations,
|
||||
const paddle::Tensor &attention_weights);
|
||||
|
||||
std::vector<paddle::Tensor> MSDeformableAttnCUDABackward(
|
||||
const paddle::Tensor &value, const paddle::Tensor &value_spatial_shapes,
|
||||
const paddle::Tensor &value_level_start_index,
|
||||
const paddle::Tensor &sampling_locations,
|
||||
const paddle::Tensor &attention_weights, const paddle::Tensor &grad_out);
|
||||
|
||||
//// CPU not implemented
|
||||
|
||||
std::vector<std::vector<int64_t>>
|
||||
MSDeformableAttnInferShape(std::vector<int64_t> value_shape,
|
||||
std::vector<int64_t> value_spatial_shapes_shape,
|
||||
std::vector<int64_t> value_level_start_index_shape,
|
||||
std::vector<int64_t> sampling_locations_shape,
|
||||
std::vector<int64_t> attention_weights_shape) {
|
||||
return {{value_shape[0], sampling_locations_shape[1],
|
||||
value_shape[2] * value_shape[3]}};
|
||||
}
|
||||
|
||||
std::vector<paddle::DataType>
|
||||
MSDeformableAttnInferDtype(paddle::DataType value_dtype,
|
||||
paddle::DataType value_spatial_shapes_dtype,
|
||||
paddle::DataType value_level_start_index_dtype,
|
||||
paddle::DataType sampling_locations_dtype,
|
||||
paddle::DataType attention_weights_dtype) {
|
||||
return {value_dtype};
|
||||
}
|
||||
|
||||
PD_BUILD_OP(ms_deformable_attn)
|
||||
.Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
|
||||
"AttentionWeights"})
|
||||
.Outputs({"Out"})
|
||||
.SetKernelFn(PD_KERNEL(MSDeformableAttnCUDAForward))
|
||||
.SetInferShapeFn(PD_INFER_SHAPE(MSDeformableAttnInferShape))
|
||||
.SetInferDtypeFn(PD_INFER_DTYPE(MSDeformableAttnInferDtype));
|
||||
|
||||
PD_BUILD_GRAD_OP(ms_deformable_attn)
|
||||
.Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
|
||||
"AttentionWeights", paddle::Grad("Out")})
|
||||
.Outputs({paddle::Grad("Value"), paddle::Grad("SpatialShapes"),
|
||||
paddle::Grad("LevelIndex"), paddle::Grad("SamplingLocations"),
|
||||
paddle::Grad("AttentionWeights")})
|
||||
.SetKernelFn(PD_KERNEL(MSDeformableAttnCUDABackward));
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,7 @@
|
||||
from paddle.utils.cpp_extension import CUDAExtension, setup
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup(
|
||||
name='deformable_detr_ops',
|
||||
ext_modules=CUDAExtension(
|
||||
sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu']))
|
||||
@@ -0,0 +1,140 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import numpy as np
|
||||
import paddle
|
||||
# add python path of PaddleDetection to sys.path
|
||||
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 5)))
|
||||
if parent_path not in sys.path:
|
||||
sys.path.append(parent_path)
|
||||
|
||||
from ppdet.modeling.transformers.utils import deformable_attention_core_func
|
||||
ms_deform_attn_core_paddle = deformable_attention_core_func
|
||||
|
||||
try:
|
||||
gpu_index = int(sys.argv[1])
|
||||
except:
|
||||
gpu_index = 0
|
||||
print(f'Use gpu {gpu_index} to test...')
|
||||
paddle.set_device(f'gpu:{gpu_index}')
|
||||
|
||||
try:
|
||||
from deformable_detr_ops import ms_deformable_attn
|
||||
except Exception as e:
|
||||
print('import deformable_detr_ops error', e)
|
||||
sys.exit(-1)
|
||||
|
||||
paddle.seed(1)
|
||||
random.seed(1)
|
||||
np.random.seed(1)
|
||||
|
||||
bs, n_heads, c = 2, 8, 8
|
||||
query_length, n_levels, n_points = 2, 2, 2
|
||||
spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
|
||||
level_start_index = paddle.concat((paddle.to_tensor(
|
||||
[0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
|
||||
value_length = sum([(H * W).item() for H, W in spatial_shapes])
|
||||
|
||||
|
||||
def get_test_tensors(channels):
|
||||
value = paddle.rand(
|
||||
[bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
|
||||
sampling_locations = paddle.rand(
|
||||
[bs, query_length, n_heads, n_levels, n_points, 2],
|
||||
dtype=paddle.float32)
|
||||
attention_weights = paddle.rand(
|
||||
[bs, query_length, n_heads, n_levels, n_points],
|
||||
dtype=paddle.float32) + 1e-5
|
||||
attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
|
||||
-2, keepdim=True)
|
||||
|
||||
return [value, sampling_locations, attention_weights]
|
||||
|
||||
|
||||
@paddle.no_grad()
|
||||
def check_forward_equal_with_paddle_float():
|
||||
value, sampling_locations, attention_weights = get_test_tensors(c)
|
||||
|
||||
output_paddle = ms_deform_attn_core_paddle(
|
||||
value, spatial_shapes, level_start_index, sampling_locations,
|
||||
attention_weights).detach().cpu()
|
||||
output_cuda = ms_deformable_attn(value, spatial_shapes, level_start_index,
|
||||
sampling_locations,
|
||||
attention_weights).detach().cpu()
|
||||
fwdok = paddle.allclose(
|
||||
output_cuda, output_paddle, rtol=1e-2, atol=1e-3).item()
|
||||
max_abs_err = (output_cuda - output_paddle).abs().max().item()
|
||||
max_rel_err = (
|
||||
(output_cuda - output_paddle).abs() / output_paddle.abs()).max().item()
|
||||
|
||||
print(
|
||||
f'*{fwdok} check_forward_equal_with_paddle_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}'
|
||||
)
|
||||
|
||||
|
||||
def check_gradient_numerical(channels=4):
|
||||
value_paddle, sampling_locations_paddle, attention_weights_paddle = get_test_tensors(
|
||||
channels)
|
||||
value_paddle.stop_gradient = False
|
||||
sampling_locations_paddle.stop_gradient = False
|
||||
attention_weights_paddle.stop_gradient = False
|
||||
|
||||
value_cuda = value_paddle.detach().clone()
|
||||
sampling_locations_cuda = sampling_locations_paddle.detach().clone()
|
||||
attention_weights_cuda = attention_weights_paddle.detach().clone()
|
||||
value_cuda.stop_gradient = False
|
||||
sampling_locations_cuda.stop_gradient = False
|
||||
attention_weights_cuda.stop_gradient = False
|
||||
|
||||
output_paddle = ms_deform_attn_core_paddle(
|
||||
value_paddle, spatial_shapes, level_start_index,
|
||||
sampling_locations_paddle, attention_weights_paddle)
|
||||
output_paddle.sum().backward()
|
||||
|
||||
output_cuda = ms_deformable_attn(value_cuda, spatial_shapes,
|
||||
level_start_index, sampling_locations_cuda,
|
||||
attention_weights_cuda)
|
||||
output_cuda.sum().backward()
|
||||
|
||||
res = paddle.allclose(
|
||||
value_paddle.grad, value_cuda.grad, rtol=1e-2, atol=1e-3).item()
|
||||
print(f'*tensor1 {res} check_gradient_numerical(D={channels})')
|
||||
|
||||
res = paddle.allclose(
|
||||
sampling_locations_paddle.grad,
|
||||
sampling_locations_cuda.grad,
|
||||
rtol=1e-2,
|
||||
atol=1e-3).item()
|
||||
print(f'*tensor2 {res} check_gradient_numerical(D={channels})')
|
||||
|
||||
res = paddle.allclose(
|
||||
attention_weights_paddle.grad,
|
||||
attention_weights_cuda.grad,
|
||||
rtol=1e-2,
|
||||
atol=1e-3).item()
|
||||
print(f'*tensor3 {res} check_gradient_numerical(D={channels})')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
check_forward_equal_with_paddle_float()
|
||||
|
||||
for channels in [30, 32, 64, 71, 128, 1024, 1025, 2048, 3096]:
|
||||
check_gradient_numerical(channels)
|
||||
Reference in New Issue
Block a user