diff --git a/setup.py b/setup.py index 82c93be87cd..0a363794da5 100644 --- a/setup.py +++ b/setup.py @@ -152,8 +152,8 @@ def get_extensions(): ) source_cuda = glob.glob(os.path.join(extensions_dir, 'hip', '*.hip')) # Copy over additional files - shutil.copy("torchvision/csrc/cuda/cuda_helpers.h", "torchvision/csrc/hip/cuda_helpers.h") - shutil.copy("torchvision/csrc/cuda/vision_cuda.h", "torchvision/csrc/hip/vision_cuda.h") + for file in glob.glob(r"torchvision/csrc/cuda/*.h"): + shutil.copy(file, "torchvision/csrc/hip") else: source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu')) diff --git a/test/tracing/frcnn/test_frcnn_tracing.cpp b/test/tracing/frcnn/test_frcnn_tracing.cpp index 95b3a1b5726..90476b24f4b 100644 --- a/test/tracing/frcnn/test_frcnn_tracing.cpp +++ b/test/tracing/frcnn/test_frcnn_tracing.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #ifdef _WIN32 diff --git a/torchvision/csrc/autocast.h b/torchvision/csrc/autocast.h deleted file mode 100644 index 584ef13f389..00000000000 --- a/torchvision/csrc/autocast.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -// TODO: Delete this file once none of the methods use it - -#if defined(WITH_CUDA) || defined(WITH_HIP) -#include -#endif diff --git a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp index f593e880b3b..5cac99db04a 100644 --- a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp @@ -350,7 +350,7 @@ void compute_grad_input( channels * weight_h * weight_w * out_h * out_w * parallel_imgs; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im", ([&] { + columns.scalar_type(), "compute_grad_input", ([&] { deformable_col2im_kernel( num_kernels, columns.data_ptr(), @@ -551,7 +551,7 @@ void compute_grad_offset_and_mask( out_h * out_w * 2 * weight_h * weight_w * n_offset_grps * parallel_imgs; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_coord", ([&] { + columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { deformable_col2im_coord_kernel( num_kernels, columns.data_ptr(), diff --git a/torchvision/csrc/cpu/nms_kernel.cpp b/torchvision/csrc/cpu/nms_kernel.cpp index 036a91f56dc..52953a9e822 100644 --- a/torchvision/csrc/cpu/nms_kernel.cpp +++ b/torchvision/csrc/cpu/nms_kernel.cpp @@ -3,7 +3,7 @@ namespace { template -at::Tensor nms_kernel( +at::Tensor nms_kernel_impl( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { @@ -98,8 +98,8 @@ at::Tensor nms_cpu( auto result = at::empty({0}, dets.options()); - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { - result = nms_kernel(dets, scores, iou_threshold); + AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_cpu", [&] { + result = nms_kernel_impl(dets, scores, iou_threshold); }); return result; } diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp index a56fbe58e9a..3d6c95f02ea 100644 --- a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp @@ -340,7 +340,7 @@ std::tuple ps_roi_align_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_align_forward", [&] { + input.scalar_type(), "ps_roi_align_forward_cpu", [&] { ps_roi_align_forward_kernel_impl( output_size, input_.data_ptr(), @@ -397,7 +397,7 @@ at::Tensor ps_roi_align_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_align_backward", [&] { + grad.scalar_type(), "ps_roi_align_backward_cpu", [&] { ps_roi_align_backward_kernel_impl( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp index 171de9edc6a..cdee9b9f55c 100644 --- a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp @@ -183,7 +183,7 @@ std::tuple ps_roi_pool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_pool_forward", [&] { + input.scalar_type(), "ps_roi_pool_forward_cpu", [&] { ps_roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, @@ -238,7 +238,7 @@ at::Tensor ps_roi_pool_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_pool_backward", [&] { + grad.scalar_type(), "ps_roi_pool_backward_cpu", [&] { ps_roi_pool_backward_kernel_impl( grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cpu/roi_align_kernel.cpp b/torchvision/csrc/cpu/roi_align_kernel.cpp index 01d2bca25a3..133722fdc5e 100644 --- a/torchvision/csrc/cpu/roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/roi_align_kernel.cpp @@ -419,7 +419,7 @@ at::Tensor roi_align_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "roi_align_forward", [&] { + input.scalar_type(), "roi_align_forward_cpu", [&] { roi_align_forward_kernel_impl( output_size, input_.data_ptr(), @@ -473,7 +473,7 @@ at::Tensor roi_align_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "roi_align_forward", [&] { + grad.scalar_type(), "roi_align_backward_cpu", [&] { roi_align_backward_kernel_impl( grad.numel(), grad.data_ptr(), diff --git a/torchvision/csrc/cpu/roi_pool_kernel.cpp b/torchvision/csrc/cpu/roi_pool_kernel.cpp index 389e9c90248..d622f2b430b 100644 --- a/torchvision/csrc/cpu/roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/roi_pool_kernel.cpp @@ -154,7 +154,7 @@ std::tuple roi_pool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "roi_pool_forward", [&] { + input.scalar_type(), "roi_pool_forward_cpu", [&] { roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, @@ -212,7 +212,7 @@ at::Tensor roi_pool_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "roi_pool_backward", [&] { + grad.scalar_type(), "roi_pool_backward_cpu", [&] { roi_pool_backward_kernel_impl( grad.data_ptr(), argmax.data_ptr(), diff --git a/torchvision/csrc/cpu/video/register.cpp b/torchvision/csrc/cpu/video/register.cpp index a88615987bf..9d538444f3f 100644 --- a/torchvision/csrc/cpu/video/register.cpp +++ b/torchvision/csrc/cpu/video/register.cpp @@ -1,5 +1,4 @@ -#ifndef REGISTER_H -#define REGISTER_H +#pragma once #include "Video.h" @@ -15,4 +14,3 @@ static auto registerVideo = .def("next", &Video::Next); } // namespace -#endif diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h deleted file mode 100644 index a772fa13f01..00000000000 --- a/torchvision/csrc/cpu/vision_cpu.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once -#include -#include "../macros.h" - -// TODO: Delete this file once all the methods are gone diff --git a/torchvision/csrc/cuda/deform_conv2d_kernel.cu b/torchvision/csrc/cuda/deform_conv2d_kernel.cu index 6edaa9c73af..cef8124caf3 100644 --- a/torchvision/csrc/cuda/deform_conv2d_kernel.cu +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.cu @@ -66,7 +66,6 @@ // modified from // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -#include #include #include #include @@ -88,7 +87,9 @@ inline unsigned int GET_THREADS() { return 512; } -inline unsigned int GET_BLOCKS(const unsigned int THREADS, const unsigned int N) { +inline unsigned int GET_BLOCKS( + const unsigned int THREADS, + const unsigned int N) { unsigned int kMaxGridNum = at::cuda::getCurrentDeviceProperties()->maxGridSize[0]; return std::min(kMaxGridNum, (N + THREADS - 1) / THREADS); @@ -235,10 +236,8 @@ void deformable_im2col( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "deformable_im2col_gpu", ([&] { - deformable_im2col_kernel<<< - blocks, - threads>>>( + input.scalar_type(), "deformable_im2col", ([&] { + deformable_im2col_kernel<<>>( num_kernels, input.data_ptr(), data_offset.data_ptr(), @@ -381,10 +380,8 @@ void compute_grad_input( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_gpu", ([&] { - deformable_col2im_kernel<<< - blocks, - threads>>>( + columns.scalar_type(), "compute_grad_input", ([&] { + deformable_col2im_kernel<<>>( num_kernels, columns.data_ptr(), offset.data_ptr(), @@ -589,10 +586,8 @@ void compute_grad_offset_and_mask( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - deformable_col2im_coord_kernel<<< - blocks, - threads>>>( + columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { + deformable_col2im_coord_kernel<<>>( num_kernels, columns.data_ptr(), input.data_ptr(), diff --git a/torchvision/csrc/cuda/nms_kernel.cu b/torchvision/csrc/cuda/nms_kernel.cu index 8785bd84897..ae244efebe7 100644 --- a/torchvision/csrc/cuda/nms_kernel.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -1,4 +1,3 @@ -#include #include #include @@ -24,7 +23,7 @@ __device__ inline bool devIoU( } template -__global__ void nms_kernel( +__global__ void nms_kernel_impl( int n_boxes, double iou_threshold, const T* dev_boxes, @@ -74,7 +73,8 @@ __global__ void nms_kernel( } // namespace -at::Tensor nms_cuda(const at::Tensor& dets, +at::Tensor nms_cuda( + const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { TORCH_CHECK(dets.is_cuda(), "dets must be a CUDA tensor"); @@ -124,8 +124,8 @@ at::Tensor nms_cuda(const at::Tensor& dets, cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - dets_sorted.scalar_type(), "nms_kernel_cuda", [&] { - nms_kernel<<>>( + dets_sorted.scalar_type(), "nms_cuda", [&] { + nms_kernel_impl<<>>( dets_num, iou_threshold, dets_sorted.data_ptr(), @@ -133,7 +133,8 @@ at::Tensor nms_cuda(const at::Tensor& dets, }); at::Tensor mask_cpu = mask.to(at::kCPU); - unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr(); + unsigned long long* mask_host = + (unsigned long long*)mask_cpu.data_ptr(); std::vector remv(col_blocks); memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.cu b/torchvision/csrc/cuda/ps_roi_align_kernel.cu index 4ac0c28de4c..7c808580258 100644 --- a/torchvision/csrc/cuda/ps_roi_align_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.cu @@ -339,14 +339,13 @@ std::tuple ps_roi_align_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_align_forward", [&] { + input.scalar_type(), "ps_roi_align_forward_cuda", [&] { ps_roi_align_forward_kernel_impl<<>>( output_size, input_.data_ptr(), @@ -383,8 +382,7 @@ at::Tensor ps_roi_align_backward_cuda( TORCH_CHECK(grad.is_cuda(), "grad must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); TORCH_CHECK( - channel_mapping.is_cuda(), - "channel_mapping must be a CUDA tensor"); + channel_mapping.is_cuda(), "channel_mapping must be a CUDA tensor"); at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; @@ -402,7 +400,7 @@ at::Tensor ps_roi_align_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -414,10 +412,9 @@ at::Tensor ps_roi_align_backward_cuda( int channels_out = channels / (pooled_height * pooled_width); - auto grad_ = grad.contiguous(), - rois_ = rois.contiguous(); + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_align_backward", [&] { + grad.scalar_type(), "ps_roi_align_backward_cuda", [&] { ps_roi_align_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu index aa1c834e059..ed0ed26484d 100644 --- a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu @@ -179,14 +179,13 @@ std::tuple ps_roi_pool_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_pool_forward", [&] { + input.scalar_type(), "ps_roi_pool_forward_cuda", [&] { ps_roi_pool_forward_kernel_impl<<>>( output_size, input_.data_ptr(), @@ -220,8 +219,7 @@ at::Tensor ps_roi_pool_backward_cuda( TORCH_CHECK(grad.is_cuda(), "grad must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); TORCH_CHECK( - channel_mapping.is_cuda(), - "channel_mapping must be a CUDA tensor"); + channel_mapping.is_cuda(), "channel_mapping must be a CUDA tensor"); at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; @@ -239,7 +237,7 @@ at::Tensor ps_roi_pool_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -251,10 +249,9 @@ at::Tensor ps_roi_pool_backward_cuda( int channels_out = channels / (pooled_height * pooled_width); - auto grad_ = grad.contiguous(), - rois_ = rois.contiguous(); + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_pool_backward", [&] { + grad.scalar_type(), "ps_roi_pool_backward_cuda", [&] { ps_roi_pool_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cuda/roi_align_kernel.cu b/torchvision/csrc/cuda/roi_align_kernel.cu index 7f763170a9e..195d8b067f4 100644 --- a/torchvision/csrc/cuda/roi_align_kernel.cu +++ b/torchvision/csrc/cuda/roi_align_kernel.cu @@ -323,8 +323,7 @@ at::Tensor roi_align_forward_cuda( bool aligned) { TORCH_CHECK(input.is_cuda(), "input must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); - TORCH_CHECK( - rois.size(1) == 5, "rois must have shape as Tensor[K, 5]"); + TORCH_CHECK(rois.size(1) == 5, "rois must have shape as Tensor[K, 5]"); at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; @@ -346,7 +345,7 @@ at::Tensor roi_align_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -355,23 +354,23 @@ at::Tensor roi_align_forward_cuda( return output; } - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_align_forward", [&] { - roi_align_forward_kernel_impl<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - aligned, - rois_.data_ptr(), - output.data_ptr()); - }); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_align_forward_cuda", [&] { + roi_align_forward_kernel_impl<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + aligned, + rois_.data_ptr(), + output.data_ptr()); + }); AT_CUDA_CHECK(cudaGetLastError()); return output; } @@ -405,7 +404,7 @@ at::Tensor roi_align_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -421,25 +420,26 @@ at::Tensor roi_align_backward_cuda( int w_stride = grad.stride(3); auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_align_backward", [&] { - roi_align_backward_kernel_impl<<>>( - grad.numel(), - grad.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - aligned, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "roi_align_backward_cuda", [&] { + roi_align_backward_kernel_impl<<>>( + grad.numel(), + grad.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + aligned, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } diff --git a/torchvision/csrc/cuda/roi_pool_kernel.cu b/torchvision/csrc/cuda/roi_pool_kernel.cu index c10dd0cf403..782ecaf9eb3 100644 --- a/torchvision/csrc/cuda/roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/roi_pool_kernel.cu @@ -153,7 +153,7 @@ std::tuple roi_pool_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -162,22 +162,22 @@ std::tuple roi_pool_forward_cuda( return std::make_tuple(output, argmax); } - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_pool_forward", [&] { - roi_pool_forward_kernel_impl<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - rois_.data_ptr(), - output.data_ptr(), - argmax.data_ptr()); - }); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_pool_forward_cuda", [&] { + roi_pool_forward_kernel_impl<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + rois_.data_ptr(), + output.data_ptr(), + argmax.data_ptr()); + }); AT_CUDA_CHECK(cudaGetLastError()); return std::make_tuple(output, argmax); } @@ -215,7 +215,7 @@ at::Tensor roi_pool_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -230,27 +230,27 @@ at::Tensor roi_pool_backward_cuda( int h_stride = grad.stride(2); int w_stride = grad.stride(3); - auto argmax_ = argmax.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_pool_backward", [&] { - roi_pool_backward_kernel_impl<<>>( - grad.numel(), - grad.data_ptr(), - argmax_.data_ptr(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); + auto argmax_ = argmax.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "roi_pool_backward_cuda", [&] { + roi_pool_backward_kernel_impl<<>>( + grad.numel(), + grad.data_ptr(), + argmax_.data_ptr(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h deleted file mode 100644 index a772fa13f01..00000000000 --- a/torchvision/csrc/cuda/vision_cuda.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once -#include -#include "../macros.h" - -// TODO: Delete this file once all the methods are gone diff --git a/torchvision/csrc/macros.h b/torchvision/csrc/macros.h index cb01005a022..559140a933a 100644 --- a/torchvision/csrc/macros.h +++ b/torchvision/csrc/macros.h @@ -1,5 +1,4 @@ -#ifndef TORCHVISION_MACROS_H -#define TORCHVISION_MACROS_H +#pragma once #ifdef _WIN32 #if defined(torchvision_EXPORTS) @@ -20,5 +19,3 @@ #define VISION_INLINE_VARIABLE __attribute__((weak)) #endif #endif - -#endif // TORCHVISION_MACROS_H diff --git a/torchvision/csrc/models/alexnet.h b/torchvision/csrc/models/alexnet.h index 673598d3a53..33ffe379a97 100644 --- a/torchvision/csrc/models/alexnet.h +++ b/torchvision/csrc/models/alexnet.h @@ -1,5 +1,4 @@ -#ifndef ALEXNET_H -#define ALEXNET_H +#pragma once #include #include "general.h" @@ -20,5 +19,3 @@ TORCH_MODULE(AlexNet); } // namespace models } // namespace vision - -#endif // ALEXNET_H diff --git a/torchvision/csrc/models/densenet.h b/torchvision/csrc/models/densenet.h index 731d0c7879f..22db45b719d 100644 --- a/torchvision/csrc/models/densenet.h +++ b/torchvision/csrc/models/densenet.h @@ -1,5 +1,4 @@ -#ifndef DENSENET_H -#define DENSENET_H +#pragma once #include #include "general.h" @@ -82,5 +81,3 @@ TORCH_MODULE(DenseNet161); } // namespace models } // namespace vision - -#endif // DENSENET_H diff --git a/torchvision/csrc/models/general.h b/torchvision/csrc/models/general.h index 1378a1c85d2..4463786b4bb 100644 --- a/torchvision/csrc/models/general.h +++ b/torchvision/csrc/models/general.h @@ -1,5 +1,4 @@ -#ifndef VISION_GENERAL_H -#define VISION_GENERAL_H +#pragma once #ifdef _WIN32 #if defined(torchvision_EXPORTS) @@ -10,5 +9,3 @@ #else #define VISION_API #endif - -#endif // VISION_GENERAL_H \ No newline at end of file diff --git a/torchvision/csrc/models/googlenet.h b/torchvision/csrc/models/googlenet.h index 34b0cf5077f..d5192c7623f 100644 --- a/torchvision/csrc/models/googlenet.h +++ b/torchvision/csrc/models/googlenet.h @@ -1,5 +1,4 @@ -#ifndef GOOGLENET_H -#define GOOGLENET_H +#pragma once #include #include "general.h" @@ -86,5 +85,3 @@ TORCH_MODULE(GoogLeNet); } // namespace models } // namespace vision - -#endif // GOOGLENET_H diff --git a/torchvision/csrc/models/inception.h b/torchvision/csrc/models/inception.h index 58f1a7c1054..3f964e3103c 100644 --- a/torchvision/csrc/models/inception.h +++ b/torchvision/csrc/models/inception.h @@ -1,5 +1,4 @@ -#ifndef INCEPTION_H -#define INCEPTION_H +#pragma once #include #include "general.h" @@ -124,5 +123,3 @@ TORCH_MODULE(InceptionV3); } // namespace models } // namespace vision - -#endif // INCEPTION_H diff --git a/torchvision/csrc/models/mnasnet.h b/torchvision/csrc/models/mnasnet.h index 6f45101f332..ae136cd5b30 100644 --- a/torchvision/csrc/models/mnasnet.h +++ b/torchvision/csrc/models/mnasnet.h @@ -1,5 +1,4 @@ -#ifndef MNASNET_H -#define MNASNET_H +#pragma once #include #include "general.h" @@ -43,5 +42,3 @@ TORCH_MODULE(MNASNet1_3); } // namespace models } // namespace vision - -#endif // MNASNET_H diff --git a/torchvision/csrc/models/mobilenet.h b/torchvision/csrc/models/mobilenet.h index 6af0a597e1f..7e3f8596692 100644 --- a/torchvision/csrc/models/mobilenet.h +++ b/torchvision/csrc/models/mobilenet.h @@ -1,5 +1,4 @@ -#ifndef MOBILENET_H -#define MOBILENET_H +#pragma once #include #include "general.h" @@ -22,5 +21,3 @@ struct VISION_API MobileNetV2Impl : torch::nn::Module { TORCH_MODULE(MobileNetV2); } // namespace models } // namespace vision - -#endif // MOBILENET_H diff --git a/torchvision/csrc/models/models.h b/torchvision/csrc/models/models.h index 1d47f2e3dd6..8376ed12020 100644 --- a/torchvision/csrc/models/models.h +++ b/torchvision/csrc/models/models.h @@ -1,5 +1,4 @@ -#ifndef MODELS_H -#define MODELS_H +#pragma once #include "alexnet.h" #include "densenet.h" @@ -11,5 +10,3 @@ #include "shufflenetv2.h" #include "squeezenet.h" #include "vgg.h" - -#endif // MODELS_H diff --git a/torchvision/csrc/models/modelsimpl.h b/torchvision/csrc/models/modelsimpl.h index 1dc8d06b15e..8f7663b32ad 100644 --- a/torchvision/csrc/models/modelsimpl.h +++ b/torchvision/csrc/models/modelsimpl.h @@ -1,5 +1,4 @@ -#ifndef MODELSIMPL_H -#define MODELSIMPL_H +#pragma once #include @@ -42,5 +41,3 @@ inline bool double_compare(double a, double b) { } // namespace modelsimpl } // namespace models } // namespace vision - -#endif // MODELSIMPL_H diff --git a/torchvision/csrc/models/resnet.h b/torchvision/csrc/models/resnet.h index e17dfe49a1a..ee4a8cd5284 100644 --- a/torchvision/csrc/models/resnet.h +++ b/torchvision/csrc/models/resnet.h @@ -1,5 +1,4 @@ -#ifndef RESNET_H -#define RESNET_H +#pragma once #include #include "general.h" @@ -256,5 +255,3 @@ TORCH_MODULE(WideResNet101_2); } // namespace models } // namespace vision - -#endif // RESNET_H diff --git a/torchvision/csrc/models/shufflenetv2.h b/torchvision/csrc/models/shufflenetv2.h index 96d53d554ab..8aa0ea69db4 100644 --- a/torchvision/csrc/models/shufflenetv2.h +++ b/torchvision/csrc/models/shufflenetv2.h @@ -1,5 +1,4 @@ -#ifndef SHUFFLENETV2_H -#define SHUFFLENETV2_H +#pragma once #include #include "general.h" @@ -44,5 +43,3 @@ TORCH_MODULE(ShuffleNetV2_x2_0); } // namespace models } // namespace vision - -#endif // SHUFFLENETV2_H diff --git a/torchvision/csrc/models/squeezenet.h b/torchvision/csrc/models/squeezenet.h index 6f55387de56..5cd3cb482e7 100644 --- a/torchvision/csrc/models/squeezenet.h +++ b/torchvision/csrc/models/squeezenet.h @@ -1,5 +1,4 @@ -#ifndef SQUEEZENET_H -#define SQUEEZENET_H +#pragma once #include #include "general.h" @@ -36,5 +35,3 @@ TORCH_MODULE(SqueezeNet1_1); } // namespace models } // namespace vision - -#endif // SQUEEZENET_H diff --git a/torchvision/csrc/models/vgg.h b/torchvision/csrc/models/vgg.h index b5c600a68ab..ea64dbfa151 100644 --- a/torchvision/csrc/models/vgg.h +++ b/torchvision/csrc/models/vgg.h @@ -1,5 +1,4 @@ -#ifndef VGG_H -#define VGG_H +#pragma once #include #include "general.h" @@ -89,5 +88,3 @@ TORCH_MODULE(VGG19BN); } // namespace models } // namespace vision - -#endif // VGG_H diff --git a/torchvision/csrc/vision.h b/torchvision/csrc/vision.h index 50bebab1fb1..c99b25c030d 100644 --- a/torchvision/csrc/vision.h +++ b/torchvision/csrc/vision.h @@ -1,5 +1,4 @@ -#ifndef VISION_H -#define VISION_H +#pragma once #include #include @@ -15,5 +14,3 @@ namespace detail { VISION_INLINE_VARIABLE int64_t _cuda_version = cuda_version(); } // namespace detail } // namespace vision - -#endif // VISION_H diff --git a/travis-scripts/run-clang-format/run-clang-format.py b/travis-scripts/run-clang-format/run-clang-format.py index 54e193db45b..fd2913bd70e 100755 --- a/travis-scripts/run-clang-format/run-clang-format.py +++ b/travis-scripts/run-clang-format/run-clang-format.py @@ -28,7 +28,7 @@ DEVNULL = open(os.devnull, "wb") -DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' +DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu' class ExitStatus: