diff --git a/CMakeLists.txt b/CMakeLists.txt index c243652416e..9194e520bb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -288,7 +288,7 @@ option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the option(USE_XNNPACK "Use XNNPACK" ON) option(USE_ZMQ "Use ZMQ" OFF) option(USE_ZSTD "Use ZSTD" OFF) -option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF) +option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF) # Ensure that an ITT build is the default for x86 CPUs cmake_dependent_option( USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON diff --git a/build.bzl b/build.bzl index deb01aab23c..6490a7f3839 100644 --- a/build.bzl +++ b/build.bzl @@ -24,7 +24,7 @@ def define_targets(rules): "CAFFE2_USE_CUDNN", "USE_MKLDNN", "CAFFE2_USE_ITT", - "TORCH_DISABLE_GPU_ASSERTS", + "USE_ROCM_KERNEL_ASSERT", "EIGEN_MPL2_ONLY", ], ) diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt index 2d5fbf555c1..68396a654d2 100644 --- a/c10/CMakeLists.txt +++ b/c10/CMakeLists.txt @@ -18,6 +18,7 @@ set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in set(C10_USE_NUMA ${USE_NUMA}) set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) +set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) configure_file( ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h) diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index 55171fa1a44..563625b296d 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -374,9 +374,7 @@ extern SYCL_EXTERNAL void __assert_fail( unsigned int line, const char* func); #else // __SYCL_DEVICE_ONLY__ -#if ( \ - defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \ - !defined(TORCH_DISABLE_GPU_ASSERTS)) +#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__))) // CUDA supports __assert_fail function which are common for both device // and host side code. __host__ __device__ @@ -393,18 +391,14 @@ __host__ __device__ unsigned int line, const char* function) noexcept __attribute__((__noreturn__)); -#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \ - !defined(TORCH_DISABLE_GPU_ASSERTS) -// ROCm supports __assert_fail only as a device side function. -__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( - const char* assertion, - const char* file, - unsigned int line, - const char* function); -#endif // defined(__HIP_ARCH__) || defined(__HIP__) #endif // __SYCL_DEVICE_ONLY__ } #endif // NDEBUG +// ROCm disable kernel assert by default +#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM) +#define CUDA_KERNEL_ASSERT(cond) +#define SYCL_KERNEL_ASSERT(cond) +#else #define CUDA_KERNEL_ASSERT(cond) \ if (C10_UNLIKELY(!(cond))) { \ __assert_fail( \ @@ -415,6 +409,7 @@ __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( __assert_fail( \ #cond, __FILE__, static_cast(__LINE__), __func__); \ } +#endif // C10_USE_ROCM_KERNEL_ASSERT and USE_ROCM #endif // __APPLE__ #ifdef __APPLE__ diff --git a/c10/macros/cmake_macros.h.in b/c10/macros/cmake_macros.h.in index a1e4fd8ce1b..76c185b5523 100644 --- a/c10/macros/cmake_macros.h.in +++ b/c10/macros/cmake_macros.h.in @@ -9,5 +9,6 @@ #cmakedefine C10_USE_GFLAGS #cmakedefine C10_USE_NUMA #cmakedefine C10_USE_MSVC_STATIC_RUNTIME +#cmakedefine C10_USE_ROCM_KERNEL_ASSERT #endif // C10_MACROS_CMAKE_MACROS_H_ diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl index 391f3cbf698..0ca1f728631 100644 --- a/c10/ovrsource_defs.bzl +++ b/c10/ovrsource_defs.bzl @@ -104,6 +104,7 @@ def define_ovrsource_targets(): ("#cmakedefine C10_BUILD_SHARED_LIBS", ""), ("#cmakedefine C10_USE_NUMA", ""), ("#cmakedefine C10_USE_MSVC_STATIC_RUNTIME", ""), + ("#cmakedefine C10_USE_ROCM_KERNEL_ASSERT", ""), ] mobile_c10_cmake_defines = [ diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in index 997752ede30..4a2fe0c946b 100644 --- a/caffe2/core/macros.h.in +++ b/caffe2/core/macros.h.in @@ -26,13 +26,13 @@ #cmakedefine CAFFE2_USE_NVTX #cmakedefine CAFFE2_USE_ITT #cmakedefine CAFFE2_USE_TRT -#cmakedefine TORCH_DISABLE_GPU_ASSERTS #ifndef EIGEN_MPL2_ONLY #cmakedefine EIGEN_MPL2_ONLY #endif // Useful build settings that are recorded in the compiled binary +// torch.__build__.show() #define CAFFE2_BUILD_STRINGS { \ {"TORCH_VERSION", "${TORCH_VERSION}"}, \ {"CXX_COMPILER", "${CMAKE_CXX_COMPILER}"}, \ @@ -68,5 +68,5 @@ {"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \ {"USE_ITT", "${CAFFE2_USE_ITT}"}, \ {"USE_TRT", "${CAFFE2_USE_TRT}"}, \ - {"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \ + {"USE_ROCM_KERNEL_ASSERT", "${USE_ROCM_KERNEL_ASSERT}"}, \ } diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 4ba9bd44f50..acc95842b63 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1192,16 +1192,6 @@ if(ANDROID) list(APPEND Caffe2_DEPENDENCY_LIBS log) endif() -# ---[ Kernel asserts -# Kernel asserts are enabled by default for CUDA and disabled for ROCm. -# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS -if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS) - message(STATUS "Forcefully enabling kernel asserts on ROCM") -elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS) - message(STATUS "Disabling kernel asserts for ROCm") - caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON) -endif() - # ---[ LLVM if(USE_LLVM) message(STATUS "Looking for LLVM in ${USE_LLVM}") @@ -1249,6 +1239,7 @@ if(USE_ROCM) caffe2_update_option(USE_SYSTEM_NCCL ON) endif() + list(APPEND HIP_CXX_FLAGS -fPIC) list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_AMD__=1) list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1) @@ -1291,6 +1282,15 @@ if(USE_ROCM) list(APPEND Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS roc::hipblas hip::hipfft hip::hiprand roc::hipsparse roc::hipsolver) + # ---[ Kernel asserts + # Kernel asserts is disabled for ROCm by default. + # It can be turned on by turning on the env USE_ROCM_KERNEL_ASSERT to the build system. + if(USE_ROCM_KERNEL_ASSERT) + message(STATUS "Enabling Kernel Assert for ROCm") + else() + message(STATUS "Disabling Kernel Assert for ROCm") + endif() + else() caffe2_update_option(USE_ROCM OFF) endif() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 0cb9aef3e62..9c05aac28be 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -198,5 +198,5 @@ function(caffe2_print_configuration_summary) # coreml message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}") message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}") - message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}") + message(STATUS " USE_ROCM_KERNEL_ASSERT : ${USE_ROCM_KERNEL_ASSERT}") endfunction() diff --git a/setup.py b/setup.py index 5e59c4a0986..86cad767c12 100644 --- a/setup.py +++ b/setup.py @@ -160,6 +160,9 @@ # USE_ZSTD # Enables use of ZSTD, if the libraries are found # +# USE_ROCM_KERNEL_ASSERT=1 +# Enable kernel assert in ROCm platform +# # Environment variables we respect (these environment variables are # conventional and are often understood/set by other software.) #