[quant][core][gpu][improvement] Integrated quantized cudnn max pool2d with existing quantized_max_pool2d (#76129)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/76129

Previously, quantized_max_pool2d_cudnn was made available to the
frontend through torch.ops.quantized.max_pool2d.
We improve the integration by also making it available through
torch.max_pool2d, which is made possible by registering
quantized_max_pool2d_cudnn in native_functions.yaml under
quantized_max_pool2d, which is called in max_pool2d.

Ideally and ultimately, we will get rid of the quantized_max_pool2d
registration in native_functions.yaml, and directly register
quantized_max_pool2d and quantized_max_pool2d_cudnn under max_pool2d,
but current support for quantized dispatch keys blocks us from doing so.

Test Plan:
```
python test/run_tests.py
```

```
python test/run_tests.py
```

Differential Revision:
D35789078
D35789078

Reviewed By: jerryzh168

Pulled By: dzdang

fbshipit-source-id: 5d8220255bfab663b4779b5d3c66dea9f79d8ee7
(cherry picked from commit c27164da29043f7dc9a4c27d24a93cd37162c23e)
This commit is contained in:
dzdang 2022-04-26 18:47:29 -07:00 committed by PyTorch MergeBot
parent 6e959dec69
commit 6e292f1a21
5 changed files with 25 additions and 10 deletions

View file

@ -282,6 +282,7 @@ filegroup(
"aten/src/ATen/native/cudnn/*.cpp",
"aten/src/ATen/native/miopen/*.cpp",
"aten/src/ATen/native/nested/cuda/*.cpp",
"aten/src/ATen/native/quantized/cudnn/*.cpp",
"aten/src/ATen/native/sparse/cuda/*.cpp",
"aten/src/ATen/native/transformers/cuda/*.cpp",
"aten/src/THC/*.cpp",

View file

@ -145,6 +145,7 @@ file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
file(GLOB native_utils_cpp "native/utils/*.cpp")
# XNNPACK
@ -248,7 +249,7 @@ if(USE_ROCM)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} ${hip_hip} ${native_hip_hip} ${native_nested_hip_hip} ${native_sparse_hip_hip} ${native_quantized_hip_hip} ${native_transformers_hip_hip})
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${native_quantized_cudnn_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
set(all_hip_cpp ${native_miopen_cpp} ${native_cudnn_hip_cpp} ${miopen_cpp} ${all_hip_cpp})
endif()

View file

@ -3022,6 +3022,7 @@
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
dispatch:
QuantizedCPU: quantized_max_pool2d
QuantizedCUDA: quantized_max_pool2d_cudnn
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor

View file

@ -1,17 +1,17 @@
#include <c10/util/Exception.h>
#ifdef USE_CUDA
#include <ATen/cuda/CUDAConfig.h> // for the definition of AT_CUDNN_ENABLED
#if AT_CUDNN_ENABLED()
#include <ATen/native/cudnn/Macros.h>
#if HAS_CUDNN_V8()
#include <ATen/ATen.h>
#include <ATen/cuda/Exceptions.h>
#include <ATen/cudnn/Descriptors.h>
#include <ATen/cudnn/Handle.h>
#include <ATen/cudnn/Types.h>
#endif // AT_CUDNN_ENABLED
#endif // USE_CUDA
#include <ATen/ATen.h>
#include <ATen/native/Pool.h>
#include <ATen/native/TensorIterator.h>
#include <c10/core/ScalarType.h>
@ -63,6 +63,9 @@ Tensor quantized_max_pool2d_cudnn(
IntArrayRef padding,
IntArrayRef dilation,
bool ceil_mode) {
#ifdef USE_CUDA
#if AT_CUDNN_ENABLED()
#if HAS_CUDNN_V8()
check_maxpool2d_params(
kernel_size,
stride,
@ -179,6 +182,18 @@ Tensor quantized_max_pool2d_cudnn(
// recall we casted our input and output to 4D if qx was 3D, so we recast it back to 3D prior to returning
return (ndim == 3 ? qy.view(std::vector<int64_t>(output_shape.begin() + 1, output_shape.end())) : qy);
#else // HAS_CUDNN_V8()
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN v8 support");
return Tensor{}; // never reached, placates the compiler
#endif // HAS_CUDNN_V8()
#else // AT_CUDNN_ENABLED()
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN support");
return Tensor{}; // never reached, placates the compiler
#endif // AT_CUDNN_ENABLED()
#else // USE_CUDA
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with USE_CUDA support");
return Tensor{}; // never reached, placates the compiler
#endif
}
// Keep the registry in the anonymous namespace.
@ -206,7 +221,3 @@ TORCH_LIBRARY_IMPL(quantized, QuantizedCUDA, m) {
} // namespace
} // namespace native
} // namespace at
#endif // HAS_CUDNN_V8
#endif // AT_CUDNN_ENABLED
#endif // USE_CUDA

View file

@ -85,6 +85,7 @@ includes = [
"aten/src/ATen/cuda/*",
"aten/src/ATen/native/cuda/*",
"aten/src/ATen/native/cudnn/*",
"aten/src/ATen/native/quantized/cudnn/*",
"aten/src/ATen/native/nested/cuda/*",
"aten/src/ATen/native/sparse/cuda/*",
"aten/src/ATen/native/quantized/cuda/*",