mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
switch to rocThrust for thrust/cub APIs (#25620)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/25620 Pull Request resolved: https://github.com/pytorch/pytorch/pull/25602 Enable rocThrust with hipCUB and rocPRIM for ROCm. They are the ROCm implementations of the thrust and cub APIs and replace the older hip-thrust and cub-hip packages going forward. ROCm 2.5 is the first release to contain the new packages as an option, as of 2.6 they will be the only available option. Add hipification rules to correctly hipify thrust::cuda to thrust::hip and cub:: to hipcub:: going forward. Add hipification rules to hipify specific cub headers to the general hipcub header. Infrastructure work to correctly find, include and link against the new packages. Add the macro definition to choose the HIP backend to Thrust. Since include chains are now a little different from CUDA's Thrust, add includes for functionality used where applicable. Skip four tests that fail with the new rocThrust for now. Pull Request resolved: https://github.com/pytorch/pytorch/pull/21864 Reviewed By: xw285cornell Differential Revision: D16940768 Pulled By: bddppq fbshipit-source-id: 3dba8a8f1763dd23d89eb0dd26d1db109973dbe5
This commit is contained in:
parent
68b9920c7c
commit
4fe857187c
19 changed files with 107 additions and 41 deletions
|
|
@ -14,7 +14,7 @@ from dataclasses import dataclass
|
|||
|
||||
DOCKER_IMAGE_PATH_BASE = "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/"
|
||||
|
||||
DOCKER_IMAGE_VERSION = 287
|
||||
DOCKER_IMAGE_VERSION = 301
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -1835,7 +1835,7 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_py2_gcc4_8_ubuntu14_04_test
|
||||
requires:
|
||||
|
|
@ -1847,7 +1847,7 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301"
|
||||
resource_class: large
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build
|
||||
|
|
@ -1859,7 +1859,7 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test
|
||||
requires:
|
||||
|
|
@ -1872,14 +1872,14 @@ workflows:
|
|||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-test"
|
||||
use_cuda_docker_runtime: "1"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
|
||||
resource_class: gpu.medium
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_test
|
||||
requires:
|
||||
|
|
@ -1887,14 +1887,14 @@ workflows:
|
|||
- caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
|
||||
build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-test"
|
||||
use_cuda_docker_runtime: "1"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301"
|
||||
resource_class: gpu.medium
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_test
|
||||
requires:
|
||||
|
|
@ -1902,35 +1902,35 @@ workflows:
|
|||
- caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build
|
||||
build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-test"
|
||||
use_cuda_docker_runtime: "1"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301"
|
||||
resource_class: gpu.medium
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_mkl_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-py2-mkl-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_py2_mkl_ubuntu16_04_test
|
||||
requires:
|
||||
- setup
|
||||
- caffe2_py2_mkl_ubuntu16_04_build
|
||||
build_environment: "caffe2-py2-mkl-ubuntu16.04-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301"
|
||||
resource_class: large
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_onnx_py2_gcc5_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_onnx_py2_gcc5_ubuntu16_04_test
|
||||
requires:
|
||||
- setup
|
||||
- caffe2_onnx_py2_gcc5_ubuntu16_04_build
|
||||
build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301"
|
||||
resource_class: large
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_clang3_8_ubuntu16_04_build
|
||||
|
|
@ -1942,7 +1942,7 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-clang3.8-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:301"
|
||||
build_only: "1"
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_clang3_9_ubuntu16_04_build
|
||||
|
|
@ -1954,35 +1954,35 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-clang3.9-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:301"
|
||||
build_only: "1"
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_clang7_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-py2-clang7-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:301"
|
||||
build_only: "1"
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_onnx_py3_6_clang7_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_onnx_py3_6_clang7_ubuntu16_04_test
|
||||
requires:
|
||||
- setup
|
||||
- caffe2_onnx_py3_6_clang7_ubuntu16_04_build
|
||||
build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301"
|
||||
resource_class: large
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_android_ubuntu16_04_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "caffe2-py2-android-ubuntu16.04-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:301"
|
||||
build_only: "1"
|
||||
- caffe2_linux_build:
|
||||
name: caffe2_py2_cuda9_0_cudnn7_centos7_build
|
||||
|
|
@ -1994,7 +1994,7 @@ workflows:
|
|||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301"
|
||||
- caffe2_linux_test:
|
||||
name: caffe2_py2_cuda9_0_cudnn7_centos7_test
|
||||
requires:
|
||||
|
|
@ -2007,7 +2007,7 @@ workflows:
|
|||
- /ci-all\/.*/
|
||||
build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-test"
|
||||
use_cuda_docker_runtime: "1"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301"
|
||||
resource_class: gpu.medium
|
||||
- caffe2_macos_build:
|
||||
name: caffe2_py2_ios_macos10_13_build
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include <THC/THCThrustAllocator.cuh>
|
||||
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
#include <thrust/unique.h>
|
||||
|
||||
#include <ATen/native/cuda/EmbeddingBackwardKernel.cuh>
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
#include <ATen/native/cuda/EmbeddingBackwardKernel.cuh>
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <tuple>
|
||||
#include <iterator>
|
||||
#include <thrust/adjacent_difference.h>
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/scan.h>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#define THC_GENERIC_FILE "THC/generic/THCTensorMode.cu"
|
||||
#else
|
||||
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
|
||||
void THCTensor_(calculateMode)(THCState *state,
|
||||
THCTensor *values,
|
||||
THCudaLongTensor *indices,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#define THC_GENERIC_FILE "THCUNN/generic/LookupTable.cu"
|
||||
#else
|
||||
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
|
||||
void THNN_(LookupTable_accGradParameters)(
|
||||
THCState *state,
|
||||
THCIndexTensor *input,
|
||||
|
|
|
|||
|
|
@ -257,6 +257,14 @@ const char* cublasGetErrorString(cublasStatus_t error) {
|
|||
#ifdef __HIP_PLATFORM_HCC__
|
||||
case rocblas_status_invalid_size:
|
||||
return "rocblas_status_invalid_size";
|
||||
case rocblas_status_perf_degraded:
|
||||
return "rocblas_status_perf_degraded";
|
||||
case rocblas_status_size_query_mismatch:
|
||||
return "rocblas_status_size_query_mismatch";
|
||||
case rocblas_status_size_increased:
|
||||
return "rocblas_status_size_increased";
|
||||
case rocblas_status_size_unchanged:
|
||||
return "rocblas_status_size_unchanged";
|
||||
#endif
|
||||
}
|
||||
// To suppress compiler warning.
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
#include "caffe2/operators/generate_proposals_op_util_nms.h"
|
||||
#include "caffe2/operators/generate_proposals_op_util_nms_gpu.h"
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <cfloat>
|
||||
#endif
|
||||
|
||||
using caffe2::utils::RotatedBox;
|
||||
|
||||
namespace caffe2 {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/reduce_front_back_max_ops.h"
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <cfloat>
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
/***
|
||||
|
|
|
|||
|
|
@ -3,7 +3,15 @@
|
|||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/rmac_regions_op.h"
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <cfloat>
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
namespace rocprim {
|
||||
#else
|
||||
namespace cub {
|
||||
#endif
|
||||
|
||||
template <typename KeyT, typename ValueT>
|
||||
inline __host__ __device__ bool operator<(
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <thrust/sort.h>
|
||||
#include <thrust/system/cuda/execution_policy.h>
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/version.h>
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
|
|
|||
|
|
@ -622,10 +622,8 @@ CAFFE2_CUDA_EXPORT void Gemm<at::Half, CUDAContext>(
|
|||
N, // ldd
|
||||
rocblas_datatype_f32_r, // compute type
|
||||
rocblas_gemm_algo_standard, // rocblas_gemm_algo
|
||||
0, // solution index, reserved for future use
|
||||
0, // flags, reserved for future use
|
||||
NULL, // size of workspace
|
||||
NULL)); // workspace
|
||||
0, // solution index, reserved for future use
|
||||
0)); // flags, reserved for future use
|
||||
#else
|
||||
CUBLAS_ENFORCE(cublasSgemmEx(
|
||||
context->cublas_handle(),
|
||||
|
|
@ -1033,10 +1031,8 @@ CAFFE2_CUDA_EXPORT void GemmStridedBatched<at::Half, CUDAContext>(
|
|||
batch_size,
|
||||
rocblas_datatype_f32_r, // compute type
|
||||
rocblas_gemm_algo_standard, // rocblas_gemm_algo
|
||||
0, // solution index, reserved for future use
|
||||
0, // flags, reserved for future use
|
||||
NULL, // size of workspace
|
||||
NULL)); // workspace
|
||||
0, // solution index, reserved for future use
|
||||
0)); // flags, reserved for future use
|
||||
#else
|
||||
CUBLAS_ENFORCE(cublasGemmStridedBatchedEx(
|
||||
context->cublas_handle(),
|
||||
|
|
@ -1178,10 +1174,8 @@ CAFFE2_CUDA_EXPORT void Gemv<at::Half, CUDAContext>(
|
|||
ldc, // ldd
|
||||
rocblas_datatype_f32_r, // compute type
|
||||
rocblas_gemm_algo_standard, // rocblas_gemm_algo
|
||||
0, // solution index, reserved for future use
|
||||
0, // flags, reserved for future use
|
||||
NULL, // size of workspace
|
||||
NULL)); // workspace
|
||||
0, // solution index, reserved for future use
|
||||
0)); // flags, reserved for future use
|
||||
#else
|
||||
CUBLAS_ENFORCE(cublasSgemmEx(
|
||||
context->cublas_handle(),
|
||||
|
|
|
|||
|
|
@ -872,6 +872,7 @@ if(USE_ROCM)
|
|||
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
|
||||
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
|
||||
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
|
||||
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
list(APPEND HIP_CXX_FLAGS -g)
|
||||
|
|
@ -887,13 +888,12 @@ if(USE_ROCM)
|
|||
endforeach()
|
||||
|
||||
set(Caffe2_HIP_INCLUDE
|
||||
${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${thrust_INCLUDE_DIRS} $<INSTALL_INTERFACE:include> ${Caffe2_HIP_INCLUDE})
|
||||
|
||||
${thrust_INCLUDE_DIRS} ${hipcub_INCLUDE_DIRS} ${rocprim_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} $<INSTALL_INTERFACE:include> ${Caffe2_HIP_INCLUDE})
|
||||
# This is needed for library added by hip_add_library (same for hip_add_executable)
|
||||
hip_include_directories(${Caffe2_HIP_INCLUDE})
|
||||
|
||||
set(Caffe2_HIP_DEPENDENCY_LIBS
|
||||
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES})
|
||||
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${hipcub_LIBRARIES})
|
||||
|
||||
# Note [rocblas & rocfft cmake bug]
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
|
|||
|
|
@ -80,6 +80,27 @@ ELSE()
|
|||
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
|
||||
ENDIF()
|
||||
|
||||
# ROCPRIM_PATH
|
||||
IF(NOT DEFINED ENV{ROCPRIM_PATH})
|
||||
SET(ROCPRIM_PATH ${ROCM_PATH}/rocprim)
|
||||
ELSE()
|
||||
SET(ROCPRIM_PATH $ENV{ROCPRIM_PATH})
|
||||
ENDIF()
|
||||
|
||||
# HIPCUB_PATH
|
||||
IF(NOT DEFINED ENV{HIPCUB_PATH})
|
||||
SET(HIPCUB_PATH ${ROCM_PATH}/hipcub)
|
||||
ELSE()
|
||||
SET(HIPCUB_PATH $ENV{HIPCUB_PATH})
|
||||
ENDIF()
|
||||
|
||||
# ROCTHRUST_PATH
|
||||
IF(NOT DEFINED ENV{ROCTHRUST_PATH})
|
||||
SET(ROCTHRUST_PATH ${ROCM_PATH}/rocthrust)
|
||||
ELSE()
|
||||
SET(ROCTHRUST_PATH $ENV{ROCTHRUST_PATH})
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ENV{PYTORCH_ROCM_ARCH})
|
||||
SET(PYTORCH_ROCM_ARCH gfx803;gfx900;gfx906)
|
||||
ELSE()
|
||||
|
|
@ -124,6 +145,9 @@ IF(HIP_FOUND)
|
|||
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
|
||||
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
|
||||
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
|
||||
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
|
||||
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
|
||||
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
|
||||
|
||||
find_package_and_print_version(rocrand REQUIRED)
|
||||
find_package_and_print_version(hiprand REQUIRED)
|
||||
|
|
@ -131,7 +155,10 @@ IF(HIP_FOUND)
|
|||
find_package_and_print_version(miopen REQUIRED)
|
||||
find_package_and_print_version(rocfft REQUIRED)
|
||||
find_package_and_print_version(hipsparse REQUIRED)
|
||||
|
||||
find_package_and_print_version(rocprim REQUIRED)
|
||||
find_package_and_print_version(hipcub REQUIRED)
|
||||
find_package_and_print_version(rocthrust REQUIRED)
|
||||
|
||||
# TODO: hip_hcc has an interface include flag "-hc" which is only
|
||||
# recognizable by hcc, but not gcc and clang. Right now in our
|
||||
# setup, hcc is only used for linking, but it should be used to
|
||||
|
|
@ -146,6 +173,4 @@ IF(HIP_FOUND)
|
|||
set(hcc_INCLUDE_DIRS ${HCC_PATH}/include)
|
||||
set(hsa_INCLUDE_DIRS ${HSA_PATH}/include)
|
||||
|
||||
set(thrust_INCLUDE_DIRS ${THRUST_PATH} ${THRUST_PATH}/thrust/system/cuda/detail/cub-hip)
|
||||
|
||||
ENDIF()
|
||||
|
|
|
|||
|
|
@ -27,7 +27,8 @@ install_ubuntu() {
|
|||
cxlactivitylogger \
|
||||
hipsparse \
|
||||
rocrand \
|
||||
hip-thrust \
|
||||
hipcub \
|
||||
rocthrust \
|
||||
rccl
|
||||
}
|
||||
|
||||
|
|
@ -59,7 +60,8 @@ install_centos() {
|
|||
hipsparse \
|
||||
rocrand \
|
||||
rccl \
|
||||
hip-thrust
|
||||
hipcub \
|
||||
rocthrust
|
||||
}
|
||||
|
||||
# Install Python packages depending on the base OS
|
||||
|
|
|
|||
|
|
@ -273,6 +273,7 @@ class TestSparse(TestCase):
|
|||
else:
|
||||
return tensor.to_dense()
|
||||
|
||||
@skipIfRocm
|
||||
def test_to_sparse(self):
|
||||
shape = [10, 5, 19, 8]
|
||||
max_nnz = 1
|
||||
|
|
|
|||
|
|
@ -6857,6 +6857,7 @@ class _TestTorchMixin(torchtest):
|
|||
test(u.mm(s.diag()).mm(v))
|
||||
|
||||
@skipIfNoLapack
|
||||
@skipIfRocm
|
||||
def test_det_logdet_slogdet(self):
|
||||
self._test_det_logdet_slogdet(self, 'cpu')
|
||||
|
||||
|
|
@ -8970,6 +8971,7 @@ class _TestTorchMixin(torchtest):
|
|||
self.assertEqual(tensor.std(), tensor.std(unbiased=True))
|
||||
self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False))
|
||||
|
||||
@skipIfRocm
|
||||
def test_structseq_repr(self):
|
||||
a = torch.arange(250).reshape(5, 5, 10)
|
||||
expected = """
|
||||
|
|
@ -12368,6 +12370,7 @@ tensor([[[1., 1., 1., ..., 1., 1., 1.],
|
|||
if torch.cuda.is_available():
|
||||
run_test(torch.device('cuda'))
|
||||
|
||||
@skipIfRocm
|
||||
def test_unique_dim(self):
|
||||
self.assertFalse(hasattr(torch, 'unique_dim'))
|
||||
|
||||
|
|
|
|||
|
|
@ -277,6 +277,13 @@ CUDA_INCLUDE_MAP = collections.OrderedDict([
|
|||
("cusparse.h", ("hipsparse.h", CONV_INCLUDE, API_RAND)),
|
||||
("cufft.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
|
||||
("cufftXt.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
|
||||
("thrust/system/cuda/", ("thrust/system/hip/", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/util_allocator.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/block/block_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/cub.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/block/block_load.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
])
|
||||
|
||||
CUDA_IDENTIFIER_MAP = collections.OrderedDict([
|
||||
|
|
@ -2174,6 +2181,8 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict([
|
|||
("cufftDestroy", ("hipfftDestroy", CONV_MATH_FUNC, API_FFT)),
|
||||
("cufftGetVersion", ("hipfftGetVersion", CONV_MATH_FUNC, API_FFT)),
|
||||
("cufftGetProperty", ("hipfftGetProperty", CONV_MATH_FUNC, API_FFT, HIP_UNSUPPORTED)),
|
||||
("thrust::cuda::", ("thrust::hip::", CONV_MATH_FUNC, API_BLAS)),
|
||||
("cub::", ("hipcub::", CONV_MATH_FUNC, API_BLAS)),
|
||||
])
|
||||
|
||||
CUDA_SPARSE_MAP = collections.OrderedDict([
|
||||
|
|
|
|||
Loading…
Reference in a new issue