[quant][core][gpu][improvement] Integrated quantized cudnn max pool2d with existing quantized_max_pool2d (#76129)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/76129 Previously, quantized_max_pool2d_cudnn was made available to the frontend through torch.ops.quantized.max_pool2d. We improve the integration by also making it available through torch.max_pool2d, which is made possible by registering quantized_max_pool2d_cudnn in native_functions.yaml under quantized_max_pool2d, which is called in max_pool2d. Ideally and ultimately, we will get rid of the quantized_max_pool2d registration in native_functions.yaml, and directly register quantized_max_pool2d and quantized_max_pool2d_cudnn under max_pool2d, but current support for quantized dispatch keys blocks us from doing so. Test Plan: ``` python test/run_tests.py ``` ``` python test/run_tests.py ``` Differential Revision: D35789078 D35789078 Reviewed By: jerryzh168 Pulled By: dzdang fbshipit-source-id: 5d8220255bfab663b4779b5d3c66dea9f79d8ee7 (cherry picked from commit c27164da29043f7dc9a4c27d24a93cd37162c23e)
2026-05-14 20:57:59 +00:00 · 2022-04-26 18:47:29 -07:00 · 2022-04-26 18:47:29 -07:00 · 6e292f1a21
commit 6e292f1a21
parent 6e959dec69
5 changed files with 25 additions and 10 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -282,6 +282,7 @@ filegroup(
            "aten/src/ATen/native/cudnn/*.cpp",
            "aten/src/ATen/native/miopen/*.cpp",
            "aten/src/ATen/native/nested/cuda/*.cpp",
+            "aten/src/ATen/native/quantized/cudnn/*.cpp",
            "aten/src/ATen/native/sparse/cuda/*.cpp",
            "aten/src/ATen/native/transformers/cuda/*.cpp",
            "aten/src/THC/*.cpp",
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@ -145,6 +145,7 @@ file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
 file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
 file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
 file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
+file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
 file(GLOB native_utils_cpp "native/utils/*.cpp")

 # XNNPACK
@ -248,7 +249,7 @@ if(USE_ROCM)
  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
  set(ATen_HIP_SRCS ${ATen_HIP_SRCS} ${hip_hip} ${native_hip_hip} ${native_nested_hip_hip} ${native_sparse_hip_hip} ${native_quantized_hip_hip} ${native_transformers_hip_hip})
  # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
-  set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp}  ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
+  set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${native_quantized_cudnn_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
  set(all_hip_cpp ${native_miopen_cpp} ${native_cudnn_hip_cpp} ${miopen_cpp} ${all_hip_cpp})
 endif()

--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@ -3022,6 +3022,7 @@
 - func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
  dispatch:
    QuantizedCPU: quantized_max_pool2d
+    QuantizedCUDA: quantized_max_pool2d_cudnn

 - func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor

--- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@ -1,17 +1,17 @@
+#include <c10/util/Exception.h>
 #ifdef USE_CUDA
 #include <ATen/cuda/CUDAConfig.h>  // for the definition of AT_CUDNN_ENABLED

 #if AT_CUDNN_ENABLED()
-
 #include <ATen/native/cudnn/Macros.h>
-
-#if HAS_CUDNN_V8()
-
-#include <ATen/ATen.h>
 #include <ATen/cuda/Exceptions.h>
 #include <ATen/cudnn/Descriptors.h>
 #include <ATen/cudnn/Handle.h>
 #include <ATen/cudnn/Types.h>
+#endif // AT_CUDNN_ENABLED
+#endif // USE_CUDA
+
+#include <ATen/ATen.h>
 #include <ATen/native/Pool.h>
 #include <ATen/native/TensorIterator.h>
 #include <c10/core/ScalarType.h>
@ -63,6 +63,9 @@ Tensor quantized_max_pool2d_cudnn(
    IntArrayRef padding,
    IntArrayRef dilation,
    bool ceil_mode) {
+#ifdef USE_CUDA
+#if AT_CUDNN_ENABLED()
+#if HAS_CUDNN_V8()
  check_maxpool2d_params(
      kernel_size,
      stride,
@ -179,6 +182,18 @@ Tensor quantized_max_pool2d_cudnn(

  // recall we casted our input and output to 4D if qx was 3D, so we recast it back to 3D prior to returning
  return (ndim == 3 ? qy.view(std::vector<int64_t>(output_shape.begin() + 1, output_shape.end())) : qy);
+#else // HAS_CUDNN_V8()
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN v8 support");
+  return Tensor{}; // never reached, placates the compiler
+#endif // HAS_CUDNN_V8()
+#else // AT_CUDNN_ENABLED()
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN support");
+  return Tensor{}; // never reached, placates the compiler
+#endif // AT_CUDNN_ENABLED()
+#else // USE_CUDA
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with USE_CUDA support");
+  return Tensor{}; // never reached, placates the compiler
+#endif
 }

 // Keep the registry in the anonymous namespace.
@ -206,7 +221,3 @@ TORCH_LIBRARY_IMPL(quantized, QuantizedCUDA, m) {
 } // namespace
 } // namespace native
 } // namespace at
-
-#endif  // HAS_CUDNN_V8
-#endif  // AT_CUDNN_ENABLED
-#endif  // USE_CUDA
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@ -85,6 +85,7 @@ includes = [
    "aten/src/ATen/cuda/*",
    "aten/src/ATen/native/cuda/*",
    "aten/src/ATen/native/cudnn/*",
+    "aten/src/ATen/native/quantized/cudnn/*",
    "aten/src/ATen/native/nested/cuda/*",
    "aten/src/ATen/native/sparse/cuda/*",
    "aten/src/ATen/native/quantized/cuda/*",