mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-04 04:07:22 +00:00
[CUDA] remove CUBLAS_TENSOR_OP_MATH mode (#19431)
This pull request replaces `CUBLAS_TENSOR_OP_MATH` with `CUBLAS_DEFAULT_MATH`. The changes affect several files, including test cases and a Python script for AMD hipify process. ### Motivation and Context CUBLAS_TENSOR_OP_MATH mode is deprecated: https://docs.nvidia.com/cuda/cublas/index.html#cublasmath-t On CUDA versions prior to 11, users are required to set the math mode to CUBLAS_TENSOR_OP_MATH manually to be able to use tensor cores for FP16. On CUDA 11 and CUDA 12, this is no longer required. Since latest ORT only supports CUDA >= 11 so it is safe to remove CUBLAS_TENSOR_OP_MATH from our code base.
This commit is contained in:
parent
61b0e04b03
commit
c4b49fb7bf
4 changed files with 7 additions and 10 deletions
|
|
@ -1005,7 +1005,6 @@ Status LaunchLongformerAttentionKernel(
|
|||
bool disable_compact_memory,
|
||||
bool use_merged_qkv_weights,
|
||||
bool use_half4) {
|
||||
CublasMathModeSetter helper(device_prop, cublas, CUBLAS_TENSOR_OP_MATH);
|
||||
size_t softmax_workspace_size = GetLongformerSoftmaxWorkspaceSize(element_size,
|
||||
batch_size,
|
||||
num_heads,
|
||||
|
|
|
|||
|
|
@ -141,8 +141,7 @@ class HalfGemmOptions {
|
|||
}
|
||||
#else
|
||||
cublasMath_t GetMathMode() const {
|
||||
// CublasMathModeSetter will check whether device has tensor cores later.
|
||||
return CUBLAS_TENSOR_OP_MATH;
|
||||
return CUBLAS_DEFAULT_MATH;
|
||||
}
|
||||
|
||||
cudaDataType GetComputeType() const {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ TEST(CudaGemmOptions, TestCompute16F) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -56,7 +56,7 @@ TEST(CudaGemmOptions, Pedantic) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F_PEDANTIC);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -69,7 +69,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F_PEDANTIC);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -82,7 +82,7 @@ TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) {
|
|||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
|
||||
#else
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
|
||||
EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
|
||||
EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -117,7 +117,6 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
|
|||
s = s.replace("HIPBLAS_R_16F", "rocblas_datatype_f16_r")
|
||||
s = s.replace("HIPBLAS_R_32F", "rocblas_datatype_f32_r")
|
||||
s = s.replace("ROCBLAS_GEMM_DEFAULT_TENSOR_OP", "rocblas_gemm_algo_standard")
|
||||
s = s.replace("ROCBLAS_TENSOR_OP_MATH", "0 /* CUBLAS_TENSOR_OP_MATH is deprecated */")
|
||||
|
||||
# compatible layer
|
||||
s = s.replace("rocblas_gemm_strided_batched_ex", "_compat_rocblas_gemm_strided_batched_ex")
|
||||
|
|
|
|||
Loading…
Reference in a new issue