Kill THCUNN (#63429)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63429 Test Plan: Imported from OSS Reviewed By: mruberry Differential Revision: D30441308 Pulled By: ngimel fbshipit-source-id: 3ae342a2f8d5c7f8827b637c4055c5d1b0a1be26
2026-05-14 20:57:59 +00:00 · 2021-08-23 12:05:51 -07:00 · 2021-08-23 12:05:51 -07:00 · 560cd88195
commit 560cd88195
parent db1b27fa8d
22 changed files with 2 additions and 376 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -409,21 +409,6 @@ filegroup(
    ],
 )

-filegroup(
-    name = "thcunn_srcs_cu",
-    srcs = [
-        "aten/src/THCUNN/BCECriterion.cu.cc",
-        "aten/src/THCUNN/ELU.cu.cc",
-        "aten/src/THCUNN/HardTanh.cu.cc",
-        "aten/src/THCUNN/LeakyReLU.cu.cc",
-        "aten/src/THCUNN/MultiMarginCriterion.cu.cc",
-        "aten/src/THCUNN/SoftMarginCriterion.cu.cc",
-        "aten/src/THCUNN/SoftPlus.cu.cc",
-        "aten/src/THCUNN/SoftShrink.cu.cc",
-        "aten/src/THCUNN/Tanh.cu.cc",
-    ],
-)
-
 filegroup(
    name = "aten_srcs_cu",
    srcs = [
@ -573,8 +558,6 @@ cc_library(
        "aten/src/THC/**/*.cpp",
        "aten/src/THC/*.cuh",
        "aten/src/THC/generic/*.cu.cc",
-        "aten/src/THCUNN/*.cuh",
-        "aten/src/THCUNN/generic/*.cu.cc",
    ],
    exclude = [
        "aten/src/ATen/Config.h",
@ -716,7 +699,6 @@ cu_library(
    srcs = [
        ":aten_srcs_cu",
        ":thc_srcs_cu",
-        ":thcunn_srcs_cu",
    ],
    copts = ATEN_COPTS + torch_cuda_half_options,
    visibility = ["//visibility:public"],
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -242,8 +242,7 @@ into the repo directory.
 * [aten](aten) - C++ tensor library for PyTorch (no autograd support)
  * [src](aten/src) - [README](aten/src/README.md)
    * [TH](aten/src/TH)
-      [THC](aten/src/THC)
-      [THCUNN](aten/src/THCUNN) - Legacy library code from the original
+      [THC](aten/src/THC) - Legacy library code from the original
      Torch. Try not to add things here; we're slowly porting these to
      [native](aten/src/ATen/native).
      * generic - Contains actual implementations of operators,
--- a/README.md
+++ b/README.md
@ -126,7 +126,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
 PyTorch has minimal framework overhead. We integrate acceleration libraries
 such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA ([cuDNN](https://developer.nvidia.com/cudnn), [NCCL](https://developer.nvidia.com/nccl)) to maximize speed.
 At the core, its CPU and GPU Tensor and neural network backends
-(TH, THC, THNN, THCUNN) are mature and have been tested for years.
+are mature and have been tested for years.

 Hence, PyTorch is quite fast – whether you run small or large neural networks.

--- a/aten/CMakeLists.txt
+++ b/aten/CMakeLists.txt
@ -80,21 +80,14 @@ if(USE_ROCM)
  # ATen proper)
  set(AT_CUDA_ENABLED 1)
  add_subdirectory(src/THH)
-  add_subdirectory(src/THHUNN)
  message("ROCm is enabled.")
 elseif(USE_CUDA)
  set(AT_CUDA_ENABLED 1)
  add_subdirectory(src/THC)
-  add_subdirectory(src/THCUNN)
 else()
  message("disabling CUDA because USE_CUDA is set false")
  set(AT_CUDA_ENABLED 0)
 endif()
-if(NOT USE_CUDA)
-  # we still parse THCUNN even if cuda is disabled to make sure to
-  # install it
-  install(FILES src/THCUNN/generic/THCUNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THCUNN/generic")
-endif()

 if(NOT USE_NNPACK)
  set(AT_NNPACK_ENABLED 0)
--- a/aten/src/ATen/TensorUtils.cpp
+++ b/aten/src/ATen/TensorUtils.cpp
@ -282,7 +282,6 @@ bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides) {
  return contig_if_nonempty;
 }

-// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
 void check_dim_size(
    const Tensor& tensor,
    int64_t dim,
--- a/aten/src/ATen/TensorUtils.h
+++ b/aten/src/ATen/TensorUtils.h
@ -144,7 +144,6 @@ TORCH_API void* maybe_data_ptr(const TensorArg& tensor);
 // on whether a subgeometry is contiguous.
 TORCH_API bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides);

-// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
 TORCH_API void check_dim_size(
    const Tensor& tensor,
    int64_t dim,
--- a/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp
+++ b/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp
@ -7,7 +7,6 @@
 #include <ATen/ExpandUtils.h>
 #include <THC/THC.h>
 #include <THC/THCTensor.hpp>
-#include <THCUNN/THCUNN.h>
 #undef THNN_
 #undef THCIndexTensor_
 #include <ATen/DeviceGuard.h>
--- a/aten/src/ATen/cuda/detail/KernelUtils.h
+++ b/aten/src/ATen/cuda/detail/KernelUtils.h
@ -2,9 +2,6 @@

 #include <ATen/ATen.h>

-// Contents of this file are copied from THCUNN/common.h for the ease of porting
-// THCUNN functions into ATen.
-
 namespace at { namespace cuda { namespace detail {

 // CUDA: grid stride looping
--- a/aten/src/README.md
+++ b/aten/src/README.md
@ -7,7 +7,6 @@ multiple variants of the library, summarized here:
 * TH = TorcH
 * THC = TorcH Cuda
 * THCS = TorcH Cuda Sparse (now defunct)
-* THCUNN = TorcH CUda Neural Network (see cunn)
 * THNN = TorcH Neural Network (now defunct)
 * THS = TorcH Sparse (now defunct)

--- a/aten/src/THCUNN/CMakeLists.txt
+++ b/aten/src/THCUNN/CMakeLists.txt
@ -1,10 +0,0 @@
-set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
-PARENT_SCOPE)
-
-set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
-  "${CMAKE_CURRENT_SOURCE_DIR}"
-PARENT_SCOPE)
-
-install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-  DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR}
-  FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh")
--- a/aten/src/THCUNN/README.md
+++ b/aten/src/THCUNN/README.md
@ -1,26 +0,0 @@
-# THCUNN
-
-THCUNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions; most users will want to use ATen, which provides a C++ wrapper around these functions.
-
-
-Looking to add an implementation?  Consider writing an ATen native function
-instead!  See [../ATen/native](../ATen/native).
-
-## Links
-
-* [API reference](doc/api_reference.md)
-* [Style guidelines](doc/style_guidelines.md)
-
-## API
-
-THCUNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
-
-* **updateOutput** - applies the module to an input
-* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
-* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
-
-For information on argument types please check the [API reference](doc/api_reference.md).
-
-## Developer docs
-
-* [Style guidelines](doc/style_guidelines.md)
--- a/aten/src/THCUNN/SharedMem.cuh
+++ b/aten/src/THCUNN/SharedMem.cuh
@ -1,43 +0,0 @@
-// Based on the simpleTempltes CUDA example
-
-#ifndef THCUNN_SHAREDMEM_H
-#define THCUNN_SHAREDMEM_H
-
-template <typename T>
-struct SharedMem {
-  __device__ T *getPointer()
-  {
-    extern __device__ void error(void);
-    error();
-    return NULL;
-  }
-};
-
-template <>
-struct SharedMem<half>
-{
-  __device__ half *getPointer() {
-    extern __shared__ half s_half[];
-    return s_half;
-  }
-};
-
-template <>
-struct SharedMem<float>
-{
-  __device__ float *getPointer() {
-    extern __shared__ float s_float[];
-    return s_float;
-  }
-};
-
-template <>
-struct SharedMem<double>
-{
-  __device__ double *getPointer() {
-    extern __shared__ double s_double[];
-    return s_double;
-  }
-};
-
-#endif
--- a/aten/src/THCUNN/THCHalfAutoNumerics.cuh
+++ b/aten/src/THCUNN/THCHalfAutoNumerics.cuh
@ -1,38 +0,0 @@
-#ifndef THC_HALF_AUTO_NUMERICS_INC
-#define THC_HALF_AUTO_NUMERICS_INC
-
-#include <TH/THHalf.h>
-#include <THC/THCNumerics.cuh>
-
-// WARNING: THCNumerics is being deprecated. Read the comments and function usage
-//          in THCNumerics to learn about the deprecation
-//
-// Half numerics functions defined as free functions, so cunn code can be
-// written generically, i.e. without excessive calling of THCNumerics<THHalf> functions.
-
-// these functions should move to THCNumerics
-
-inline __host__ __device__ THHalf fmaxType(THHalf x, THHalf y) {
-  return THCNumerics<THHalf>::ge(x, y) ? x : y;
-}
-
-inline __host__ __device__ float fmaxType(float x, THHalf y) {
-  return fmaxf(x, ScalarConvert<THHalf, float>::to(y));
-}
-
-inline __host__ __device__ float fmaxType(float x, float y) {
-  return fmaxf(x, y);
-}
-
-inline __host__ __device__ double fmaxType(double x, double y) {
-  return fmax(x, y);
-}
-
-
-// arithmetic functions
-
-inline __host__ __device__ THHalf pow(THHalf a, THHalf b) {
-  return THCNumerics<THHalf>::pow(a, b);
-}
-
-#endif
--- a/aten/src/THCUNN/THCUNN.h
+++ b/aten/src/THCUNN/THCUNN.h
@ -1,13 +0,0 @@
-#include <THC/THC.h>
-
-#define THCIndexTensor THCudaLongTensor
-#define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME
-typedef int64_t THCIndex_t;
-
-#define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME)
-
-#include <THCUNN/generic/THCUNN.h>
-#include <THC/THCGenerateFloatTypes.h>
-
-#include <THCUNN/generic/THCUNN.h>
-#include <THC/THCGenerateBFloat16Type.h>
--- a/aten/src/THCUNN/common.h
+++ b/aten/src/THCUNN/common.h
@ -1,83 +0,0 @@
-#ifndef THCUNN_COMMON_H
-#define THCUNN_COMMON_H
-
-#define THCUNN_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \
-  "Some of weight/gradient/input tensors are located on different GPUs. Please move them to a single one.")
-
-// Use 1024 threads per block, which requires cuda sm_2x or above
-const int CUDA_NUM_THREADS = 1024;
-
-// CUDA: number of blocks for threads.
-inline int GET_BLOCKS(const int64_t N)
-{
-  // Round up division for positive number
-  auto block_num = N / CUDA_NUM_THREADS + (N % CUDA_NUM_THREADS == 0 ? 0 : 1);
-
-  constexpr int64_t max_int = std::numeric_limits<int>::max();
-  THAssertMsg(block_num <= max_int, "Can't schedule too many blocks on CUDA device");
-
-  return static_cast<int>(block_num);
-}
-
-#define THCUNN_resizeAs_indices(STATE, I1, I2)              \
-  if (!I1->sizes().equals(I2->sizes()))                     \
-  { \
-    THCudaLongTensor_resizeAs(STATE, I1, I2);               \
-  }
-
-#define THCUNN_check_shape(STATE, I1, I2)                 \
-  if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2))        \
-  { \
-       THCDescBuff s1 = THCTensor_(sizeDesc)(STATE, I1);  \
-       THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2);  \
-       THError(#I1 " and " #I2 " shapes do not match: "   \
-               #I1 " %s, " #I2 " %s", s1.str, s2.str);    \
-  }
-
-
-#define THCUNN_check_shape_indices(STATE, I1, I2)              \
-  if (!I1->sizes().equals(I2->sizes()))                        \
-  { \
-       THCDescBuff s1 = THCIndexTensor_(sizeDesc)(STATE, I1);  \
-       THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2);       \
-       THError(#I1 " and " #I2 " shapes do not match: "        \
-               #I1 " %s, " #I2 " %s", s1.str, s2.str);         \
-  }
-
-#define THCUNN_check_nElement(STATE, I1, I2)                \
-  if (I1 != NULL && I2 != NULL ) {                          \
-    ptrdiff_t n1 = THCTensor_(nElement)(STATE, I1);              \
-    ptrdiff_t n2 = THCTensor_(nElement)(STATE, I2);              \
-    if (n1 != n2)                                           \
-    {        \
-      THCDescBuff s1 = THCTensor_(sizeDesc)(state, I1);     \
-      THCDescBuff s2 = THCTensor_(sizeDesc)(state, I2);     \
-      THError(#I1 " and " #I2 " have different number of elements: "        \
-              #I1 "%s has %ld elements, while "             \
-              #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \
-    }        \
-  }
-
-#define THCUNN_check_dim_size(STATE, T, DIM, DIM_SIZE, SIZE) \
-  if (THCTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM ||             \
-      THCTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) {        \
-      THCDescBuff s1 = THCTensor_(sizeDesc)(state, T);       \
-      THError("Need " #T " of dimension %d and " #T ".size[%d] == %d"        \
-              " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
-  }
-
-#define THCUNN_check_dim_size_indices(STATE, T, DIM, DIM_SIZE, SIZE)  \
-  if (THCIndexTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM ||                 \
-      THCIndexTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) {            \
-      THCDescBuff s1 = THCIndexTensor_(sizeDesc)(state, T);           \
-      THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
-              " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
-  }
-
-#define THCUNN_argCheck(STATE, COND, ARG, T, FORMAT) \
-  if (!(COND)) { \
-    THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \
-    THArgCheck(COND, ARG, FORMAT, s1.str);           \
-  }
-
-#endif
--- a/aten/src/THCUNN/doc/api_reference.md
+++ b/aten/src/THCUNN/doc/api_reference.md
@ -1,26 +0,0 @@
-# API docs
-
-This document describes the conventions behind the THCUNN API.
-
-### The API
-
-All functions provided by THCUNN are stored in `aten/src/THCUNN/generic/THCUNN.h`.
-Look at this file.
-
-### Note on function names
-
-Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions:
-
-* `void THNN_FloatAbs_updateOutput(...)`
-* `void THNN_DoubleAbs_updateOutput(...)`
-
-In these docs such function will be referred to as `void THCUNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type.
-
-### Argument types
-
-Some arguments have additional tags placed in square brackets in their header declarations:
-
-* **[OUT]** - This is the output argument. It will be reshaped if needed.
-* **[OPTIONAL]** - This argument is optional and can be safely set to NULL
-* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call.
-* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output).
--- a/aten/src/THCUNN/doc/style_guidelines.md
+++ b/aten/src/THCUNN/doc/style_guidelines.md
@ -1,64 +0,0 @@
-## API design guidelines
-
-Functions should return `void`.
-
-All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this:
-```
-[weight], [bias], [any buffers], [additional arguments], [optional arguments]
-```
-
-### Modules
-```
-updateOutput: state, input, output, ...
-updateGradInput: state, input, gradOutput, gradInput, ...
-accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ...
-```
-
-e.g.
-```C
-void THNN_(ClassNLLCriterion_updateGradInput)(
-           THCState *state,
-           THCTensor *input,
-           THCIndexTensor *target,
-           THCTensor *gradOutput,
-           THCTensor *gradInput,
-           int64_t reduction,
-           THCTensor *weights,
-           THCTensor *total_weight,
-           int64_t ignore_index)
-```
-
-### Criterions
-```
-updateOutput: state, input, target, output, ...
-updateGradInput: state, input, target, gradInput, ...
-```
-
-e.g.
-
-```C
-void THNN_(ClassNLLCriterion_updateOutput)(
-           THCState *state,
-           THCTensor *input,
-           THCIndexTensor *target,
-           THCTensor *output,
-           int64_t reduction,
-           THCTensor *weights,
-           THCTensor *total_weight,
-           int64_t ignore_index)
-```
-
-## Code style guide
-
-```C
-void THNN_(GatedLinear_updateOutput)(
-           THCState *state,
-           THCTensor *input,
-           THCTensor *output,
-           int dim)
-//<- 10 ->
-```
-
-All arguments should start on a new line after function name, and they should be indented using 10 spaces.
-
-Use 2 spaces for block indentation.
--- a/aten/src/THCUNN/generic/THCUNN.h
+++ b/aten/src/THCUNN/generic/THCUNN.h
@ -1,29 +0,0 @@
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "THCUNN/generic/THCUNN.h"
-#else
-
-#include <ATen/core/Reduction.h>
-#include <ATen/Generator.h>
-
-TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateOutput)(
-    THCState* state,
-    THCTensor* input,
-    THCIndexTensor* target,
-    THCTensor* output,
-    int64_t reduction,
-    int p,
-    THCTensor* weights, // [OPTIONAL]
-    accreal margin);
-
-TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateGradInput)(
-    THCState* state,
-    THCTensor* input,
-    THCIndexTensor* target,
-    THCTensor* gradOutput,
-    THCTensor* gradInput,
-    int64_t reduction,
-    int p,
-    THCTensor* weights, // [OPTIONAL]
-    accreal margin);
-
-#endif
--- a/setup.py
+++ b/setup.py
@ -1028,8 +1028,6 @@ if __name__ == '__main__':
                'include/THC/*.cuh',
                'include/THC/*.h*',
                'include/THC/generic/*.h',
-                'include/THCUNN/*.cuh',
-                'include/THCUNN/generic/*.h',
                'include/THH/*.cuh',
                'include/THH/*.h*',
                'include/THH/generic/*.h',
--- a/tools/README.md
+++ b/tools/README.md
@ -15,10 +15,6 @@ Modern infrastructure:
    to import arbitrary Python files in a script, without having to add
    them to the PYTHONPATH first.

-Legacy infrastructure (we should kill this):
-* [cwrap](cwrap) - Implementation of legacy code generation for THNN/THCUNN.
-  This is used by nnwrap.
-
 Build system pieces:

 * [setup_helpers](setup_helpers) - Helper code for searching for
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@ -81,12 +81,10 @@ includes = [
    "aten/src/ATen/native/sparse/cuda/*",
    "aten/src/ATen/native/quantized/cuda/*",
    "aten/src/THC/*",
-    "aten/src/THCUNN/*",
    "aten/src/ATen/test/*",
    # CMakeLists.txt isn't processed by default, but there are a few
    # we do want to handle, so explicitly specify them
    "aten/src/THC/CMakeLists.txt",
-    "aten/src/THCUNN/CMakeLists.txt",
    "torch/*",
    "tools/autograd/templates/python_variable_methods.cpp",
 ]
--- a/torch/utils/hipify/hipify_python.py
+++ b/torch/utils/hipify/hipify_python.py
@ -750,7 +750,6 @@ def preprocessor(
                or f.startswith("ATen/native/quantized/cuda")
                or f.startswith("ATen/native/sparse/cuda")
                or f.startswith("THC/")
-                or f.startswith("THCUNN/")
                or (f.startswith("THC") and not f.startswith("THCP"))
            ):
                return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension))