switch to rocThrust for thrust/cub APIs (#25620)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25620

Pull Request resolved: https://github.com/pytorch/pytorch/pull/25602

Enable rocThrust with hipCUB and rocPRIM for ROCm. They are the ROCm implementations of the thrust and cub APIs and replace the older hip-thrust and cub-hip packages going forward. ROCm 2.5 is the first release to contain the new packages as an option, as of 2.6 they will be the only available option.

Add hipification rules to correctly hipify thrust::cuda to thrust::hip and cub:: to hipcub:: going forward. Add hipification rules to hipify specific cub headers to the general hipcub header.

Infrastructure work to correctly find, include and link against the new packages. Add the macro definition to choose the HIP backend to Thrust.

Since include chains are now a little different from CUDA's Thrust, add includes for functionality used where applicable.

Skip four tests that fail with the new rocThrust for now.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/21864

Reviewed By: xw285cornell

Differential Revision: D16940768

Pulled By: bddppq

fbshipit-source-id: 3dba8a8f1763dd23d89eb0dd26d1db109973dbe5
This commit is contained in:
iotamudelta 2019-09-03 22:14:51 -07:00 committed by Facebook Github Bot
parent 68b9920c7c
commit 4fe857187c
19 changed files with 107 additions and 41 deletions

View file

@ -14,7 +14,7 @@ from dataclasses import dataclass
DOCKER_IMAGE_PATH_BASE = "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/"
DOCKER_IMAGE_VERSION = 287
DOCKER_IMAGE_VERSION = 301
@dataclass

View file

@ -1835,7 +1835,7 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301"
- caffe2_linux_test:
name: caffe2_py2_gcc4_8_ubuntu14_04_test
requires:
@ -1847,7 +1847,7 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301"
resource_class: large
- caffe2_linux_build:
name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build
@ -1859,7 +1859,7 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test
requires:
@ -1872,14 +1872,14 @@ workflows:
- /ci-all\/.*/
build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-test"
use_cuda_docker_runtime: "1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
resource_class: gpu.medium
- caffe2_linux_build:
name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_test
requires:
@ -1887,14 +1887,14 @@ workflows:
- caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-test"
use_cuda_docker_runtime: "1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
resource_class: gpu.medium
- caffe2_linux_build:
name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_test
requires:
@ -1902,35 +1902,35 @@ workflows:
- caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build
build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-test"
use_cuda_docker_runtime: "1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301"
resource_class: gpu.medium
- caffe2_linux_build:
name: caffe2_py2_mkl_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-py2-mkl-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_py2_mkl_ubuntu16_04_test
requires:
- setup
- caffe2_py2_mkl_ubuntu16_04_build
build_environment: "caffe2-py2-mkl-ubuntu16.04-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301"
resource_class: large
- caffe2_linux_build:
name: caffe2_onnx_py2_gcc5_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_onnx_py2_gcc5_ubuntu16_04_test
requires:
- setup
- caffe2_onnx_py2_gcc5_ubuntu16_04_build
build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301"
resource_class: large
- caffe2_linux_build:
name: caffe2_py2_clang3_8_ubuntu16_04_build
@ -1942,7 +1942,7 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-clang3.8-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:301"
build_only: "1"
- caffe2_linux_build:
name: caffe2_py2_clang3_9_ubuntu16_04_build
@ -1954,35 +1954,35 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-clang3.9-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:301"
build_only: "1"
- caffe2_linux_build:
name: caffe2_py2_clang7_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-py2-clang7-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:301"
build_only: "1"
- caffe2_linux_build:
name: caffe2_onnx_py3_6_clang7_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301"
- caffe2_linux_test:
name: caffe2_onnx_py3_6_clang7_ubuntu16_04_test
requires:
- setup
- caffe2_onnx_py3_6_clang7_ubuntu16_04_build
build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301"
resource_class: large
- caffe2_linux_build:
name: caffe2_py2_android_ubuntu16_04_build
requires:
- setup
build_environment: "caffe2-py2-android-ubuntu16.04-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:301"
build_only: "1"
- caffe2_linux_build:
name: caffe2_py2_cuda9_0_cudnn7_centos7_build
@ -1994,7 +1994,7 @@ workflows:
- master
- /ci-all\/.*/
build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301"
- caffe2_linux_test:
name: caffe2_py2_cuda9_0_cudnn7_centos7_test
requires:
@ -2007,7 +2007,7 @@ workflows:
- /ci-all\/.*/
build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-test"
use_cuda_docker_runtime: "1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301"
resource_class: gpu.medium
- caffe2_macos_build:
name: caffe2_py2_ios_macos10_13_build

View file

@ -10,6 +10,7 @@
#include <THC/THCThrustAllocator.cuh>
#include <thrust/execution_policy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/unique.h>
#include <ATen/native/cuda/EmbeddingBackwardKernel.cuh>

View file

@ -13,6 +13,7 @@
#include <thrust/execution_policy.h>
#include <thrust/unique.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/device_vector.h>
#include <ATen/native/cuda/EmbeddingBackwardKernel.cuh>

View file

@ -6,6 +6,7 @@
#include <tuple>
#include <iterator>
#include <thrust/adjacent_difference.h>
#include <thrust/unique.h>
#include <thrust/sort.h>
#include <thrust/scan.h>

View file

@ -2,6 +2,8 @@
#define THC_GENERIC_FILE "THC/generic/THCTensorMode.cu"
#else
#include <thrust/iterator/constant_iterator.h>
void THCTensor_(calculateMode)(THCState *state,
THCTensor *values,
THCudaLongTensor *indices,

View file

@ -2,6 +2,8 @@
#define THC_GENERIC_FILE "THCUNN/generic/LookupTable.cu"
#else
#include <thrust/iterator/constant_iterator.h>
void THNN_(LookupTable_accGradParameters)(
THCState *state,
THCIndexTensor *input,

View file

@ -257,6 +257,14 @@ const char* cublasGetErrorString(cublasStatus_t error) {
#ifdef __HIP_PLATFORM_HCC__
case rocblas_status_invalid_size:
return "rocblas_status_invalid_size";
case rocblas_status_perf_degraded:
return "rocblas_status_perf_degraded";
case rocblas_status_size_query_mismatch:
return "rocblas_status_size_query_mismatch";
case rocblas_status_size_increased:
return "rocblas_status_size_increased";
case rocblas_status_size_unchanged:
return "rocblas_status_size_unchanged";
#endif
}
// To suppress compiler warning.

View file

@ -6,6 +6,10 @@
#include "caffe2/operators/generate_proposals_op_util_nms.h"
#include "caffe2/operators/generate_proposals_op_util_nms_gpu.h"
#ifdef __HIP_PLATFORM_HCC__
#include <cfloat>
#endif
using caffe2::utils::RotatedBox;
namespace caffe2 {

View file

@ -2,6 +2,10 @@
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/reduce_front_back_max_ops.h"
#ifdef __HIP_PLATFORM_HCC__
#include <cfloat>
#endif
namespace caffe2 {
/***

View file

@ -3,7 +3,15 @@
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/rmac_regions_op.h"
#ifdef __HIP_PLATFORM_HCC__
#include <cfloat>
#endif
#ifdef __HIP_PLATFORM_HCC__
namespace rocprim {
#else
namespace cub {
#endif
template <typename KeyT, typename ValueT>
inline __host__ __device__ bool operator<(

View file

@ -21,6 +21,7 @@
#include <thrust/sort.h>
#include <thrust/system/cuda/execution_policy.h>
#include <thrust/unique.h>
#include <thrust/version.h>
#include "caffe2/core/context_gpu.h"
namespace caffe2 {

View file

@ -622,10 +622,8 @@ CAFFE2_CUDA_EXPORT void Gemm<at::Half, CUDAContext>(
N, // ldd
rocblas_datatype_f32_r, // compute type
rocblas_gemm_algo_standard, // rocblas_gemm_algo
0, // solution index, reserved for future use
0, // flags, reserved for future use
NULL, // size of workspace
NULL)); // workspace
0, // solution index, reserved for future use
0)); // flags, reserved for future use
#else
CUBLAS_ENFORCE(cublasSgemmEx(
context->cublas_handle(),
@ -1033,10 +1031,8 @@ CAFFE2_CUDA_EXPORT void GemmStridedBatched<at::Half, CUDAContext>(
batch_size,
rocblas_datatype_f32_r, // compute type
rocblas_gemm_algo_standard, // rocblas_gemm_algo
0, // solution index, reserved for future use
0, // flags, reserved for future use
NULL, // size of workspace
NULL)); // workspace
0, // solution index, reserved for future use
0)); // flags, reserved for future use
#else
CUBLAS_ENFORCE(cublasGemmStridedBatchedEx(
context->cublas_handle(),
@ -1178,10 +1174,8 @@ CAFFE2_CUDA_EXPORT void Gemv<at::Half, CUDAContext>(
ldc, // ldd
rocblas_datatype_f32_r, // compute type
rocblas_gemm_algo_standard, // rocblas_gemm_algo
0, // solution index, reserved for future use
0, // flags, reserved for future use
NULL, // size of workspace
NULL)); // workspace
0, // solution index, reserved for future use
0)); // flags, reserved for future use
#else
CUBLAS_ENFORCE(cublasSgemmEx(
context->cublas_handle(),

View file

@ -872,6 +872,7 @@ if(USE_ROCM)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g)
@ -887,13 +888,12 @@ if(USE_ROCM)
endforeach()
set(Caffe2_HIP_INCLUDE
${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${thrust_INCLUDE_DIRS} $<INSTALL_INTERFACE:include> ${Caffe2_HIP_INCLUDE})
${thrust_INCLUDE_DIRS} ${hipcub_INCLUDE_DIRS} ${rocprim_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} $<INSTALL_INTERFACE:include> ${Caffe2_HIP_INCLUDE})
# This is needed for library added by hip_add_library (same for hip_add_executable)
hip_include_directories(${Caffe2_HIP_INCLUDE})
set(Caffe2_HIP_DEPENDENCY_LIBS
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES})
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${hipcub_LIBRARIES})
# Note [rocblas & rocfft cmake bug]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View file

@ -80,6 +80,27 @@ ELSE()
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
ENDIF()
# ROCPRIM_PATH
IF(NOT DEFINED ENV{ROCPRIM_PATH})
SET(ROCPRIM_PATH ${ROCM_PATH}/rocprim)
ELSE()
SET(ROCPRIM_PATH $ENV{ROCPRIM_PATH})
ENDIF()
# HIPCUB_PATH
IF(NOT DEFINED ENV{HIPCUB_PATH})
SET(HIPCUB_PATH ${ROCM_PATH}/hipcub)
ELSE()
SET(HIPCUB_PATH $ENV{HIPCUB_PATH})
ENDIF()
# ROCTHRUST_PATH
IF(NOT DEFINED ENV{ROCTHRUST_PATH})
SET(ROCTHRUST_PATH ${ROCM_PATH}/rocthrust)
ELSE()
SET(ROCTHRUST_PATH $ENV{ROCTHRUST_PATH})
ENDIF()
IF(NOT DEFINED ENV{PYTORCH_ROCM_ARCH})
SET(PYTORCH_ROCM_ARCH gfx803;gfx900;gfx906)
ELSE()
@ -124,6 +145,9 @@ IF(HIP_FOUND)
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
find_package_and_print_version(rocrand REQUIRED)
find_package_and_print_version(hiprand REQUIRED)
@ -131,7 +155,10 @@ IF(HIP_FOUND)
find_package_and_print_version(miopen REQUIRED)
find_package_and_print_version(rocfft REQUIRED)
find_package_and_print_version(hipsparse REQUIRED)
find_package_and_print_version(rocprim REQUIRED)
find_package_and_print_version(hipcub REQUIRED)
find_package_and_print_version(rocthrust REQUIRED)
# TODO: hip_hcc has an interface include flag "-hc" which is only
# recognizable by hcc, but not gcc and clang. Right now in our
# setup, hcc is only used for linking, but it should be used to
@ -146,6 +173,4 @@ IF(HIP_FOUND)
set(hcc_INCLUDE_DIRS ${HCC_PATH}/include)
set(hsa_INCLUDE_DIRS ${HSA_PATH}/include)
set(thrust_INCLUDE_DIRS ${THRUST_PATH} ${THRUST_PATH}/thrust/system/cuda/detail/cub-hip)
ENDIF()

View file

@ -27,7 +27,8 @@ install_ubuntu() {
cxlactivitylogger \
hipsparse \
rocrand \
hip-thrust \
hipcub \
rocthrust \
rccl
}
@ -59,7 +60,8 @@ install_centos() {
hipsparse \
rocrand \
rccl \
hip-thrust
hipcub \
rocthrust
}
# Install Python packages depending on the base OS

View file

@ -273,6 +273,7 @@ class TestSparse(TestCase):
else:
return tensor.to_dense()
@skipIfRocm
def test_to_sparse(self):
shape = [10, 5, 19, 8]
max_nnz = 1

View file

@ -6857,6 +6857,7 @@ class _TestTorchMixin(torchtest):
test(u.mm(s.diag()).mm(v))
@skipIfNoLapack
@skipIfRocm
def test_det_logdet_slogdet(self):
self._test_det_logdet_slogdet(self, 'cpu')
@ -8970,6 +8971,7 @@ class _TestTorchMixin(torchtest):
self.assertEqual(tensor.std(), tensor.std(unbiased=True))
self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False))
@skipIfRocm
def test_structseq_repr(self):
a = torch.arange(250).reshape(5, 5, 10)
expected = """
@ -12368,6 +12370,7 @@ tensor([[[1., 1., 1., ..., 1., 1., 1.],
if torch.cuda.is_available():
run_test(torch.device('cuda'))
@skipIfRocm
def test_unique_dim(self):
self.assertFalse(hasattr(torch, 'unique_dim'))

View file

@ -277,6 +277,13 @@ CUDA_INCLUDE_MAP = collections.OrderedDict([
("cusparse.h", ("hipsparse.h", CONV_INCLUDE, API_RAND)),
("cufft.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
("cufftXt.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
("thrust/system/cuda/", ("thrust/system/hip/", CONV_INCLUDE, API_BLAS)),
("cub/util_allocator.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/block/block_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/cub.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/block/block_load.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
])
CUDA_IDENTIFIER_MAP = collections.OrderedDict([
@ -2174,6 +2181,8 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict([
("cufftDestroy", ("hipfftDestroy", CONV_MATH_FUNC, API_FFT)),
("cufftGetVersion", ("hipfftGetVersion", CONV_MATH_FUNC, API_FFT)),
("cufftGetProperty", ("hipfftGetProperty", CONV_MATH_FUNC, API_FFT, HIP_UNSUPPORTED)),
("thrust::cuda::", ("thrust::hip::", CONV_MATH_FUNC, API_BLAS)),
("cub::", ("hipcub::", CONV_MATH_FUNC, API_BLAS)),
])
CUDA_SPARSE_MAP = collections.OrderedDict([