diff --git a/torchvision/csrc/cpu/PSROIAlign_cpu.cpp b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp similarity index 95% rename from torchvision/csrc/cpu/PSROIAlign_cpu.cpp rename to torchvision/csrc/cpu/ps_roi_align_kernel.cpp index 899dbb208b6..a56fbe58e9a 100644 --- a/torchvision/csrc/cpu/PSROIAlign_cpu.cpp +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include "ps_roi_align_kernel.h" + +namespace { template T bilinear_interpolate( @@ -57,7 +57,7 @@ T bilinear_interpolate( } template -void PSROIAlignForwardCPU( +void ps_roi_align_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -202,7 +202,7 @@ inline void add(T* address, const T& val) { } template -void PSROIAlignBackwardCPU( +void ps_roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const int* channel_mapping, @@ -298,7 +298,9 @@ void PSROIAlignBackwardCPU( } } -std::tuple PSROIAlign_forward_cpu( +} // namespace + +std::tuple ps_roi_align_forward_cpu( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -313,7 +315,7 @@ std::tuple PSROIAlign_forward_cpu( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIAlign_forward_cpu"; + at::CheckedFrom c = "ps_roi_align_forward_cpu"; at::checkAllSameType(c, {input_t, rois_t}); int num_rois = rois.size(0); @@ -338,8 +340,8 @@ std::tuple PSROIAlign_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIAlign_forward", [&] { - PSROIAlignForwardCPU( + input.scalar_type(), "ps_roi_align_forward", [&] { + ps_roi_align_forward_kernel_impl( output_size, input_.data_ptr(), spatial_scale, @@ -357,7 +359,7 @@ std::tuple PSROIAlign_forward_cpu( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIAlign_backward_cpu( +at::Tensor ps_roi_align_backward_cpu( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -379,7 +381,7 @@ at::Tensor PSROIAlign_backward_cpu( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIAlign_backward_cpu"; + at::CheckedFrom c = "ps_roi_align_backward_cpu"; at::checkAllSameType(c, {grad_t, rois_t}); auto num_rois = rois.size(0); @@ -395,8 +397,8 @@ at::Tensor PSROIAlign_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIAlign_backward", [&] { - PSROIAlignBackwardCPU( + grad.scalar_type(), "ps_roi_align_backward", [&] { + ps_roi_align_backward_kernel_impl( grad.numel(), grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.h b/torchvision/csrc/cpu/ps_roi_align_kernel.h new file mode 100644 index 00000000000..86a3f9a8876 --- /dev/null +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_align_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +VISION_API at::Tensor ps_roi_align_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 39d89bf6515..22119b5e292 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,27 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio); - -VISION_API at::Tensor PSROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API std::tuple PSROIPool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/PSROIAlign_cuda.cu b/torchvision/csrc/cuda/ps_roi_align_kernel.cu similarity index 95% rename from torchvision/csrc/cuda/PSROIAlign_cuda.cu rename to torchvision/csrc/cuda/ps_roi_align_kernel.cu index e6912d8c7ee..4ac0c28de4c 100644 --- a/torchvision/csrc/cuda/PSROIAlign_cuda.cu +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.cu @@ -1,11 +1,11 @@ -#include -#include #include #include #include -#include #include "cuda_helpers.h" +#include "ps_roi_align_kernel.h" + +namespace { template __device__ T bilinear_interpolate( @@ -62,7 +62,7 @@ __device__ T bilinear_interpolate( } template -__global__ void PSROIAlignForwardCUDA( +__global__ void ps_roi_align_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -195,7 +195,7 @@ __device__ void bilinear_interpolate_gradient( } template -__global__ void PSROIAlignBackwardCUDA( +__global__ void ps_roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const int* channel_mapping, @@ -292,7 +292,9 @@ __global__ void PSROIAlignBackwardCUDA( } } -std::tuple PSROIAlign_forward_cuda( +} // namespace + +std::tuple ps_roi_align_forward_cuda( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -307,7 +309,7 @@ std::tuple PSROIAlign_forward_cuda( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIAlign_forward_cuda"; + at::CheckedFrom c = "ps_roi_align_forward_cuda"; at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t}); @@ -344,8 +346,8 @@ std::tuple PSROIAlign_forward_cuda( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIAlign_forward", [&] { - PSROIAlignForwardCUDA<<>>( + input.scalar_type(), "ps_roi_align_forward", [&] { + ps_roi_align_forward_kernel_impl<<>>( output_size, input_.data_ptr(), spatial_scale, @@ -365,7 +367,7 @@ std::tuple PSROIAlign_forward_cuda( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIAlign_backward_cuda( +at::Tensor ps_roi_align_backward_cuda( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -387,7 +389,7 @@ at::Tensor PSROIAlign_backward_cuda( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIAlign_backward_cuda"; + at::CheckedFrom c = "ps_roi_align_backward_cuda"; at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t}); at::checkAllSameType(c, {grad_t, rois_t}); @@ -415,8 +417,8 @@ at::Tensor PSROIAlign_backward_cuda( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIAlign_backward", [&] { - PSROIAlignBackwardCUDA<<>>( + grad.scalar_type(), "ps_roi_align_backward", [&] { + ps_roi_align_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.h b/torchvision/csrc/cuda/ps_roi_align_kernel.h new file mode 100644 index 00000000000..45a300d6711 --- /dev/null +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_align_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +VISION_API at::Tensor ps_roi_align_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index b17f00d6acf..c80386a8db1 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,27 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio); - -VISION_API at::Tensor PSROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API std::tuple PSROIPool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/PSROIAlign.h b/torchvision/csrc/ps_roi_align.cpp similarity index 93% rename from torchvision/csrc/PSROIAlign.h rename to torchvision/csrc/ps_roi_align.cpp index 1e5dd17aabc..0e1a30d6e63 100644 --- a/torchvision/csrc/PSROIAlign.h +++ b/torchvision/csrc/ps_roi_align.cpp @@ -1,20 +1,10 @@ -#pragma once +#include "ps_roi_align.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -#include - -// TODO: put this stuff in torchvision namespace - std::tuple ps_roi_align( const at::Tensor& input, const at::Tensor& rois, @@ -30,7 +20,7 @@ std::tuple ps_roi_align( } #if defined(WITH_CUDA) || defined(WITH_HIP) -std::tuple PSROIAlign_autocast( +std::tuple ps_roi_align_autocast( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -82,6 +72,8 @@ at::Tensor _ps_roi_align_backward( width); } +namespace { + class PSROIAlignFunction : public torch::autograd::Function { public: @@ -186,7 +178,9 @@ class PSROIAlignBackwardFunction } }; -std::tuple PSROIAlign_autograd( +} // namespace + +std::tuple ps_roi_align_autograd( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -199,7 +193,7 @@ std::tuple PSROIAlign_autograd( return std::make_tuple(result[0], result[1]); } -at::Tensor PSROIAlign_backward_autograd( +at::Tensor ps_roi_align_backward_autograd( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, diff --git a/torchvision/csrc/ps_roi_align.h b/torchvision/csrc/ps_roi_align.h new file mode 100644 index 00000000000..0f7ecea2f12 --- /dev/null +++ b/torchvision/csrc/ps_roi_align.h @@ -0,0 +1,66 @@ +#pragma once + +#include "cpu/ps_roi_align_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/ps_roi_align_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/ps_roi_align_kernel.h" +#endif + +// C++ Forward +std::tuple ps_roi_align( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +std::tuple ps_roi_align_autocast( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); +#endif + +// C++ Backward +at::Tensor _ps_roi_align_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); + +// Autograd Forward and Backward +std::tuple ps_roi_align_autograd( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +at::Tensor ps_roi_align_backward_autograd( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index 2d4e2af0f53..c5c204aac2b 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,13 +8,13 @@ #include #endif -#include "PSROIAlign.h" #include "PSROIPool.h" #include "ROIAlign.h" #include "ROIPool.h" #include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" +#include "ps_roi_align.h" // If we are in a Windows environment, we need to define // initialization functions for the _custom_ops extension @@ -65,8 +65,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { m.impl("deform_conv2d", deform_conv2d_forward_cpu); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cpu); m.impl("nms", nms_cpu); - m.impl("ps_roi_align", PSROIAlign_forward_cpu); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_cpu); + m.impl("ps_roi_align", ps_roi_align_forward_cpu); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_cpu); m.impl("ps_roi_pool", PSROIPool_forward_cpu); m.impl("_ps_roi_pool_backward", PSROIPool_backward_cpu); m.impl("roi_align", ROIAlign_forward_cpu); @@ -81,8 +81,8 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { m.impl("deform_conv2d", deform_conv2d_forward_cuda); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cuda); m.impl("nms", nms_cuda); - m.impl("ps_roi_align", PSROIAlign_forward_cuda); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_cuda); + m.impl("ps_roi_align", ps_roi_align_forward_cuda); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_cuda); m.impl("ps_roi_pool", PSROIPool_forward_cuda); m.impl("_ps_roi_pool_backward", PSROIPool_backward_cuda); m.impl("roi_align", ROIAlign_forward_cuda); @@ -97,7 +97,7 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { m.impl("deform_conv2d", deform_conv2d_autocast); m.impl("nms", nms_autocast); - m.impl("ps_roi_align", PSROIAlign_autocast); + m.impl("ps_roi_align", ps_roi_align_autocast); m.impl("ps_roi_pool", PSROIPool_autocast); m.impl("roi_align", ROIAlign_autocast); m.impl("roi_pool", ROIPool_autocast); @@ -107,8 +107,8 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { m.impl("deform_conv2d", deform_conv2d_autograd); m.impl("_deform_conv2d_backward", deform_conv2d_backward_autograd); - m.impl("ps_roi_align", PSROIAlign_autograd); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_autograd); + m.impl("ps_roi_align", ps_roi_align_autograd); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_autograd); m.impl("ps_roi_pool", PSROIPool_autograd); m.impl("_ps_roi_pool_backward", PSROIPool_backward_autograd); m.impl("roi_align", ROIAlign_autograd);