mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[quant][core][gpu][improvement] Integrated quantized cudnn max pool2d with existing quantized_max_pool2d (#76129)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/76129 Previously, quantized_max_pool2d_cudnn was made available to the frontend through torch.ops.quantized.max_pool2d. We improve the integration by also making it available through torch.max_pool2d, which is made possible by registering quantized_max_pool2d_cudnn in native_functions.yaml under quantized_max_pool2d, which is called in max_pool2d. Ideally and ultimately, we will get rid of the quantized_max_pool2d registration in native_functions.yaml, and directly register quantized_max_pool2d and quantized_max_pool2d_cudnn under max_pool2d, but current support for quantized dispatch keys blocks us from doing so. Test Plan: ``` python test/run_tests.py ``` ``` python test/run_tests.py ``` Differential Revision: D35789078 D35789078 Reviewed By: jerryzh168 Pulled By: dzdang fbshipit-source-id: 5d8220255bfab663b4779b5d3c66dea9f79d8ee7 (cherry picked from commit c27164da29043f7dc9a4c27d24a93cd37162c23e)
This commit is contained in:
parent
6e959dec69
commit
6e292f1a21
5 changed files with 25 additions and 10 deletions
|
|
@ -282,6 +282,7 @@ filegroup(
|
|||
"aten/src/ATen/native/cudnn/*.cpp",
|
||||
"aten/src/ATen/native/miopen/*.cpp",
|
||||
"aten/src/ATen/native/nested/cuda/*.cpp",
|
||||
"aten/src/ATen/native/quantized/cudnn/*.cpp",
|
||||
"aten/src/ATen/native/sparse/cuda/*.cpp",
|
||||
"aten/src/ATen/native/transformers/cuda/*.cpp",
|
||||
"aten/src/THC/*.cpp",
|
||||
|
|
|
|||
|
|
@ -145,6 +145,7 @@ file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
|
|||
file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
|
||||
file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
|
||||
file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
|
||||
file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
|
||||
file(GLOB native_utils_cpp "native/utils/*.cpp")
|
||||
|
||||
# XNNPACK
|
||||
|
|
@ -248,7 +249,7 @@ if(USE_ROCM)
|
|||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
|
||||
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} ${hip_hip} ${native_hip_hip} ${native_nested_hip_hip} ${native_sparse_hip_hip} ${native_quantized_hip_hip} ${native_transformers_hip_hip})
|
||||
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
|
||||
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
|
||||
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${native_quantized_cudnn_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
|
||||
set(all_hip_cpp ${native_miopen_cpp} ${native_cudnn_hip_cpp} ${miopen_cpp} ${all_hip_cpp})
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -3022,6 +3022,7 @@
|
|||
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
||||
dispatch:
|
||||
QuantizedCPU: quantized_max_pool2d
|
||||
QuantizedCUDA: quantized_max_pool2d_cudnn
|
||||
|
||||
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,17 @@
|
|||
#include <c10/util/Exception.h>
|
||||
#ifdef USE_CUDA
|
||||
#include <ATen/cuda/CUDAConfig.h> // for the definition of AT_CUDNN_ENABLED
|
||||
|
||||
#if AT_CUDNN_ENABLED()
|
||||
|
||||
#include <ATen/native/cudnn/Macros.h>
|
||||
|
||||
#if HAS_CUDNN_V8()
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/Exceptions.h>
|
||||
#include <ATen/cudnn/Descriptors.h>
|
||||
#include <ATen/cudnn/Handle.h>
|
||||
#include <ATen/cudnn/Types.h>
|
||||
#endif // AT_CUDNN_ENABLED
|
||||
#endif // USE_CUDA
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/native/Pool.h>
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
|
|
@ -63,6 +63,9 @@ Tensor quantized_max_pool2d_cudnn(
|
|||
IntArrayRef padding,
|
||||
IntArrayRef dilation,
|
||||
bool ceil_mode) {
|
||||
#ifdef USE_CUDA
|
||||
#if AT_CUDNN_ENABLED()
|
||||
#if HAS_CUDNN_V8()
|
||||
check_maxpool2d_params(
|
||||
kernel_size,
|
||||
stride,
|
||||
|
|
@ -179,6 +182,18 @@ Tensor quantized_max_pool2d_cudnn(
|
|||
|
||||
// recall we casted our input and output to 4D if qx was 3D, so we recast it back to 3D prior to returning
|
||||
return (ndim == 3 ? qy.view(std::vector<int64_t>(output_shape.begin() + 1, output_shape.end())) : qy);
|
||||
#else // HAS_CUDNN_V8()
|
||||
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN v8 support");
|
||||
return Tensor{}; // never reached, placates the compiler
|
||||
#endif // HAS_CUDNN_V8()
|
||||
#else // AT_CUDNN_ENABLED()
|
||||
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN support");
|
||||
return Tensor{}; // never reached, placates the compiler
|
||||
#endif // AT_CUDNN_ENABLED()
|
||||
#else // USE_CUDA
|
||||
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with USE_CUDA support");
|
||||
return Tensor{}; // never reached, placates the compiler
|
||||
#endif
|
||||
}
|
||||
|
||||
// Keep the registry in the anonymous namespace.
|
||||
|
|
@ -206,7 +221,3 @@ TORCH_LIBRARY_IMPL(quantized, QuantizedCUDA, m) {
|
|||
} // namespace
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // HAS_CUDNN_V8
|
||||
#endif // AT_CUDNN_ENABLED
|
||||
#endif // USE_CUDA
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ includes = [
|
|||
"aten/src/ATen/cuda/*",
|
||||
"aten/src/ATen/native/cuda/*",
|
||||
"aten/src/ATen/native/cudnn/*",
|
||||
"aten/src/ATen/native/quantized/cudnn/*",
|
||||
"aten/src/ATen/native/nested/cuda/*",
|
||||
"aten/src/ATen/native/sparse/cuda/*",
|
||||
"aten/src/ATen/native/quantized/cuda/*",
|
||||
|
|
|
|||
Loading…
Reference in a new issue