Kill THCUNN (#63429)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63429

Test Plan: Imported from OSS

Reviewed By: mruberry

Differential Revision: D30441308

Pulled By: ngimel

fbshipit-source-id: 3ae342a2f8d5c7f8827b637c4055c5d1b0a1be26
This commit is contained in:
Peter Bell 2021-08-23 12:05:51 -07:00 committed by Facebook GitHub Bot
parent db1b27fa8d
commit 560cd88195
22 changed files with 2 additions and 376 deletions

View file

@ -409,21 +409,6 @@ filegroup(
],
)
filegroup(
name = "thcunn_srcs_cu",
srcs = [
"aten/src/THCUNN/BCECriterion.cu.cc",
"aten/src/THCUNN/ELU.cu.cc",
"aten/src/THCUNN/HardTanh.cu.cc",
"aten/src/THCUNN/LeakyReLU.cu.cc",
"aten/src/THCUNN/MultiMarginCriterion.cu.cc",
"aten/src/THCUNN/SoftMarginCriterion.cu.cc",
"aten/src/THCUNN/SoftPlus.cu.cc",
"aten/src/THCUNN/SoftShrink.cu.cc",
"aten/src/THCUNN/Tanh.cu.cc",
],
)
filegroup(
name = "aten_srcs_cu",
srcs = [
@ -573,8 +558,6 @@ cc_library(
"aten/src/THC/**/*.cpp",
"aten/src/THC/*.cuh",
"aten/src/THC/generic/*.cu.cc",
"aten/src/THCUNN/*.cuh",
"aten/src/THCUNN/generic/*.cu.cc",
],
exclude = [
"aten/src/ATen/Config.h",
@ -716,7 +699,6 @@ cu_library(
srcs = [
":aten_srcs_cu",
":thc_srcs_cu",
":thcunn_srcs_cu",
],
copts = ATEN_COPTS + torch_cuda_half_options,
visibility = ["//visibility:public"],

View file

@ -242,8 +242,7 @@ into the repo directory.
* [aten](aten) - C++ tensor library for PyTorch (no autograd support)
* [src](aten/src) - [README](aten/src/README.md)
* [TH](aten/src/TH)
[THC](aten/src/THC)
[THCUNN](aten/src/THCUNN) - Legacy library code from the original
[THC](aten/src/THC) - Legacy library code from the original
Torch. Try not to add things here; we're slowly porting these to
[native](aten/src/ATen/native).
* generic - Contains actual implementations of operators,

View file

@ -126,7 +126,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
PyTorch has minimal framework overhead. We integrate acceleration libraries
such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA ([cuDNN](https://developer.nvidia.com/cudnn), [NCCL](https://developer.nvidia.com/nccl)) to maximize speed.
At the core, its CPU and GPU Tensor and neural network backends
(TH, THC, THNN, THCUNN) are mature and have been tested for years.
are mature and have been tested for years.
Hence, PyTorch is quite fast whether you run small or large neural networks.

View file

@ -80,21 +80,14 @@ if(USE_ROCM)
# ATen proper)
set(AT_CUDA_ENABLED 1)
add_subdirectory(src/THH)
add_subdirectory(src/THHUNN)
message("ROCm is enabled.")
elseif(USE_CUDA)
set(AT_CUDA_ENABLED 1)
add_subdirectory(src/THC)
add_subdirectory(src/THCUNN)
else()
message("disabling CUDA because USE_CUDA is set false")
set(AT_CUDA_ENABLED 0)
endif()
if(NOT USE_CUDA)
# we still parse THCUNN even if cuda is disabled to make sure to
# install it
install(FILES src/THCUNN/generic/THCUNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THCUNN/generic")
endif()
if(NOT USE_NNPACK)
set(AT_NNPACK_ENABLED 0)

View file

@ -282,7 +282,6 @@ bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides) {
return contig_if_nonempty;
}
// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
void check_dim_size(
const Tensor& tensor,
int64_t dim,

View file

@ -144,7 +144,6 @@ TORCH_API void* maybe_data_ptr(const TensorArg& tensor);
// on whether a subgeometry is contiguous.
TORCH_API bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides);
// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
TORCH_API void check_dim_size(
const Tensor& tensor,
int64_t dim,

View file

@ -7,7 +7,6 @@
#include <ATen/ExpandUtils.h>
#include <THC/THC.h>
#include <THC/THCTensor.hpp>
#include <THCUNN/THCUNN.h>
#undef THNN_
#undef THCIndexTensor_
#include <ATen/DeviceGuard.h>

View file

@ -2,9 +2,6 @@
#include <ATen/ATen.h>
// Contents of this file are copied from THCUNN/common.h for the ease of porting
// THCUNN functions into ATen.
namespace at { namespace cuda { namespace detail {
// CUDA: grid stride looping

View file

@ -7,7 +7,6 @@ multiple variants of the library, summarized here:
* TH = TorcH
* THC = TorcH Cuda
* THCS = TorcH Cuda Sparse (now defunct)
* THCUNN = TorcH CUda Neural Network (see cunn)
* THNN = TorcH Neural Network (now defunct)
* THS = TorcH Sparse (now defunct)

View file

@ -1,10 +0,0 @@
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
"${CMAKE_CURRENT_SOURCE_DIR}"
PARENT_SCOPE)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR}
FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh")

View file

@ -1,26 +0,0 @@
# THCUNN
THCUNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions; most users will want to use ATen, which provides a C++ wrapper around these functions.
Looking to add an implementation? Consider writing an ATen native function
instead! See [../ATen/native](../ATen/native).
## Links
* [API reference](doc/api_reference.md)
* [Style guidelines](doc/style_guidelines.md)
## API
THCUNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
* **updateOutput** - applies the module to an input
* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
For information on argument types please check the [API reference](doc/api_reference.md).
## Developer docs
* [Style guidelines](doc/style_guidelines.md)

View file

@ -1,43 +0,0 @@
// Based on the simpleTempltes CUDA example
#ifndef THCUNN_SHAREDMEM_H
#define THCUNN_SHAREDMEM_H
template <typename T>
struct SharedMem {
__device__ T *getPointer()
{
extern __device__ void error(void);
error();
return NULL;
}
};
template <>
struct SharedMem<half>
{
__device__ half *getPointer() {
extern __shared__ half s_half[];
return s_half;
}
};
template <>
struct SharedMem<float>
{
__device__ float *getPointer() {
extern __shared__ float s_float[];
return s_float;
}
};
template <>
struct SharedMem<double>
{
__device__ double *getPointer() {
extern __shared__ double s_double[];
return s_double;
}
};
#endif

View file

@ -1,38 +0,0 @@
#ifndef THC_HALF_AUTO_NUMERICS_INC
#define THC_HALF_AUTO_NUMERICS_INC
#include <TH/THHalf.h>
#include <THC/THCNumerics.cuh>
// WARNING: THCNumerics is being deprecated. Read the comments and function usage
// in THCNumerics to learn about the deprecation
//
// Half numerics functions defined as free functions, so cunn code can be
// written generically, i.e. without excessive calling of THCNumerics<THHalf> functions.
// these functions should move to THCNumerics
inline __host__ __device__ THHalf fmaxType(THHalf x, THHalf y) {
return THCNumerics<THHalf>::ge(x, y) ? x : y;
}
inline __host__ __device__ float fmaxType(float x, THHalf y) {
return fmaxf(x, ScalarConvert<THHalf, float>::to(y));
}
inline __host__ __device__ float fmaxType(float x, float y) {
return fmaxf(x, y);
}
inline __host__ __device__ double fmaxType(double x, double y) {
return fmax(x, y);
}
// arithmetic functions
inline __host__ __device__ THHalf pow(THHalf a, THHalf b) {
return THCNumerics<THHalf>::pow(a, b);
}
#endif

View file

@ -1,13 +0,0 @@
#include <THC/THC.h>
#define THCIndexTensor THCudaLongTensor
#define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME
typedef int64_t THCIndex_t;
#define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME)
#include <THCUNN/generic/THCUNN.h>
#include <THC/THCGenerateFloatTypes.h>
#include <THCUNN/generic/THCUNN.h>
#include <THC/THCGenerateBFloat16Type.h>

View file

@ -1,83 +0,0 @@
#ifndef THCUNN_COMMON_H
#define THCUNN_COMMON_H
#define THCUNN_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \
"Some of weight/gradient/input tensors are located on different GPUs. Please move them to a single one.")
// Use 1024 threads per block, which requires cuda sm_2x or above
const int CUDA_NUM_THREADS = 1024;
// CUDA: number of blocks for threads.
inline int GET_BLOCKS(const int64_t N)
{
// Round up division for positive number
auto block_num = N / CUDA_NUM_THREADS + (N % CUDA_NUM_THREADS == 0 ? 0 : 1);
constexpr int64_t max_int = std::numeric_limits<int>::max();
THAssertMsg(block_num <= max_int, "Can't schedule too many blocks on CUDA device");
return static_cast<int>(block_num);
}
#define THCUNN_resizeAs_indices(STATE, I1, I2) \
if (!I1->sizes().equals(I2->sizes())) \
{ \
THCudaLongTensor_resizeAs(STATE, I1, I2); \
}
#define THCUNN_check_shape(STATE, I1, I2) \
if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2)) \
{ \
THCDescBuff s1 = THCTensor_(sizeDesc)(STATE, I1); \
THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \
THError(#I1 " and " #I2 " shapes do not match: " \
#I1 " %s, " #I2 " %s", s1.str, s2.str); \
}
#define THCUNN_check_shape_indices(STATE, I1, I2) \
if (!I1->sizes().equals(I2->sizes())) \
{ \
THCDescBuff s1 = THCIndexTensor_(sizeDesc)(STATE, I1); \
THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \
THError(#I1 " and " #I2 " shapes do not match: " \
#I1 " %s, " #I2 " %s", s1.str, s2.str); \
}
#define THCUNN_check_nElement(STATE, I1, I2) \
if (I1 != NULL && I2 != NULL ) { \
ptrdiff_t n1 = THCTensor_(nElement)(STATE, I1); \
ptrdiff_t n2 = THCTensor_(nElement)(STATE, I2); \
if (n1 != n2) \
{ \
THCDescBuff s1 = THCTensor_(sizeDesc)(state, I1); \
THCDescBuff s2 = THCTensor_(sizeDesc)(state, I2); \
THError(#I1 " and " #I2 " have different number of elements: " \
#I1 "%s has %ld elements, while " \
#I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \
} \
}
#define THCUNN_check_dim_size(STATE, T, DIM, DIM_SIZE, SIZE) \
if (THCTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \
THCTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \
THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \
THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
" but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
}
#define THCUNN_check_dim_size_indices(STATE, T, DIM, DIM_SIZE, SIZE) \
if (THCIndexTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \
THCIndexTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \
THCDescBuff s1 = THCIndexTensor_(sizeDesc)(state, T); \
THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
" but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
}
#define THCUNN_argCheck(STATE, COND, ARG, T, FORMAT) \
if (!(COND)) { \
THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \
THArgCheck(COND, ARG, FORMAT, s1.str); \
}
#endif

View file

@ -1,26 +0,0 @@
# API docs
This document describes the conventions behind the THCUNN API.
### The API
All functions provided by THCUNN are stored in `aten/src/THCUNN/generic/THCUNN.h`.
Look at this file.
### Note on function names
Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions:
* `void THNN_FloatAbs_updateOutput(...)`
* `void THNN_DoubleAbs_updateOutput(...)`
In these docs such function will be referred to as `void THCUNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type.
### Argument types
Some arguments have additional tags placed in square brackets in their header declarations:
* **[OUT]** - This is the output argument. It will be reshaped if needed.
* **[OPTIONAL]** - This argument is optional and can be safely set to NULL
* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call.
* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output).

View file

@ -1,64 +0,0 @@
## API design guidelines
Functions should return `void`.
All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this:
```
[weight], [bias], [any buffers], [additional arguments], [optional arguments]
```
### Modules
```
updateOutput: state, input, output, ...
updateGradInput: state, input, gradOutput, gradInput, ...
accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ...
```
e.g.
```C
void THNN_(ClassNLLCriterion_updateGradInput)(
THCState *state,
THCTensor *input,
THCIndexTensor *target,
THCTensor *gradOutput,
THCTensor *gradInput,
int64_t reduction,
THCTensor *weights,
THCTensor *total_weight,
int64_t ignore_index)
```
### Criterions
```
updateOutput: state, input, target, output, ...
updateGradInput: state, input, target, gradInput, ...
```
e.g.
```C
void THNN_(ClassNLLCriterion_updateOutput)(
THCState *state,
THCTensor *input,
THCIndexTensor *target,
THCTensor *output,
int64_t reduction,
THCTensor *weights,
THCTensor *total_weight,
int64_t ignore_index)
```
## Code style guide
```C
void THNN_(GatedLinear_updateOutput)(
THCState *state,
THCTensor *input,
THCTensor *output,
int dim)
//<- 10 ->
```
All arguments should start on a new line after function name, and they should be indented using 10 spaces.
Use 2 spaces for block indentation.

View file

@ -1,29 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THCUNN/generic/THCUNN.h"
#else
#include <ATen/core/Reduction.h>
#include <ATen/Generator.h>
TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateOutput)(
THCState* state,
THCTensor* input,
THCIndexTensor* target,
THCTensor* output,
int64_t reduction,
int p,
THCTensor* weights, // [OPTIONAL]
accreal margin);
TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateGradInput)(
THCState* state,
THCTensor* input,
THCIndexTensor* target,
THCTensor* gradOutput,
THCTensor* gradInput,
int64_t reduction,
int p,
THCTensor* weights, // [OPTIONAL]
accreal margin);
#endif

View file

@ -1028,8 +1028,6 @@ if __name__ == '__main__':
'include/THC/*.cuh',
'include/THC/*.h*',
'include/THC/generic/*.h',
'include/THCUNN/*.cuh',
'include/THCUNN/generic/*.h',
'include/THH/*.cuh',
'include/THH/*.h*',
'include/THH/generic/*.h',

View file

@ -15,10 +15,6 @@ Modern infrastructure:
to import arbitrary Python files in a script, without having to add
them to the PYTHONPATH first.
Legacy infrastructure (we should kill this):
* [cwrap](cwrap) - Implementation of legacy code generation for THNN/THCUNN.
This is used by nnwrap.
Build system pieces:
* [setup_helpers](setup_helpers) - Helper code for searching for

View file

@ -81,12 +81,10 @@ includes = [
"aten/src/ATen/native/sparse/cuda/*",
"aten/src/ATen/native/quantized/cuda/*",
"aten/src/THC/*",
"aten/src/THCUNN/*",
"aten/src/ATen/test/*",
# CMakeLists.txt isn't processed by default, but there are a few
# we do want to handle, so explicitly specify them
"aten/src/THC/CMakeLists.txt",
"aten/src/THCUNN/CMakeLists.txt",
"torch/*",
"tools/autograd/templates/python_variable_methods.cpp",
]

View file

@ -750,7 +750,6 @@ def preprocessor(
or f.startswith("ATen/native/quantized/cuda")
or f.startswith("ATen/native/sparse/cuda")
or f.startswith("THC/")
or f.startswith("THCUNN/")
or (f.startswith("THC") and not f.startswith("THCP"))
):
return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension))