[CUDA] remove CUBLAS_TENSOR_OP_MATH mode (#19431)

This pull request replaces `CUBLAS_TENSOR_OP_MATH` with
`CUBLAS_DEFAULT_MATH`. The changes affect several files, including test
cases and a Python script for AMD hipify process.

### Motivation and Context

CUBLAS_TENSOR_OP_MATH mode is deprecated:
https://docs.nvidia.com/cuda/cublas/index.html#cublasmath-t

On CUDA versions prior to 11, users are required to set the math mode to
CUBLAS_TENSOR_OP_MATH manually to be able to use tensor cores for FP16.
On CUDA 11 and CUDA 12, this is no longer required. Since latest ORT
only supports CUDA >= 11 so it is safe to remove CUBLAS_TENSOR_OP_MATH
from our code base.
This commit is contained in:
Tianlei Wu 2024-02-06 12:48:39 -08:00 committed by GitHub
parent 61b0e04b03
commit c4b49fb7bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 7 additions and 10 deletions

View file

@ -1005,7 +1005,6 @@ Status LaunchLongformerAttentionKernel(
bool disable_compact_memory,
bool use_merged_qkv_weights,
bool use_half4) {
CublasMathModeSetter helper(device_prop, cublas, CUBLAS_TENSOR_OP_MATH);
size_t softmax_workspace_size = GetLongformerSoftmaxWorkspaceSize(element_size,
batch_size,
num_heads,

View file

@ -141,8 +141,7 @@ class HalfGemmOptions {
}
#else
cublasMath_t GetMathMode() const {
// CublasMathModeSetter will check whether device has tensor cores later.
return CUBLAS_TENSOR_OP_MATH;
return CUBLAS_DEFAULT_MATH;
}
cudaDataType GetComputeType() const {

View file

@ -17,7 +17,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
#endif
}
@ -30,7 +30,7 @@ TEST(CudaGemmOptions, TestCompute16F) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
#endif
}
@ -43,7 +43,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
#endif
}
@ -56,7 +56,7 @@ TEST(CudaGemmOptions, Pedantic) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F_PEDANTIC);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
#endif
}
@ -69,7 +69,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F_PEDANTIC);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
#endif
}
@ -82,7 +82,7 @@ TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) {
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
#else
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
#endif
}

View file

@ -117,7 +117,6 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
s = s.replace("HIPBLAS_R_16F", "rocblas_datatype_f16_r")
s = s.replace("HIPBLAS_R_32F", "rocblas_datatype_f32_r")
s = s.replace("ROCBLAS_GEMM_DEFAULT_TENSOR_OP", "rocblas_gemm_algo_standard")
s = s.replace("ROCBLAS_TENSOR_OP_MATH", "0 /* CUBLAS_TENSOR_OP_MATH is deprecated */")
# compatible layer
s = s.replace("rocblas_gemm_strided_batched_ex", "_compat_rocblas_gemm_strided_batched_ex")