diff --git a/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
index f002394600..c9c66b73b3 100644
--- a/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
@@ -1005,7 +1005,6 @@ Status LaunchLongformerAttentionKernel(
     bool disable_compact_memory,
     bool use_merged_qkv_weights,
     bool use_half4) {
-  CublasMathModeSetter helper(device_prop, cublas, CUBLAS_TENSOR_OP_MATH);
   size_t softmax_workspace_size = GetLongformerSoftmaxWorkspaceSize(element_size,
                                                                     batch_size,
                                                                     num_heads,
diff --git a/onnxruntime/core/providers/cuda/cuda_common.h b/onnxruntime/core/providers/cuda/cuda_common.h
index e9941ce743..41c999bace 100644
--- a/onnxruntime/core/providers/cuda/cuda_common.h
+++ b/onnxruntime/core/providers/cuda/cuda_common.h
@@ -141,8 +141,7 @@ class HalfGemmOptions {
   }
 #else
   cublasMath_t GetMathMode() const {
-    // CublasMathModeSetter will check whether device has tensor cores later.
-    return CUBLAS_TENSOR_OP_MATH;
+    return CUBLAS_DEFAULT_MATH;
   }
 
   cudaDataType GetComputeType() const {
diff --git a/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc b/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc
index 6cac23f144..4917701e51 100644
--- a/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc
+++ b/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc
@@ -17,7 +17,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
 #endif
 }
@@ -30,7 +30,7 @@ TEST(CudaGemmOptions, TestCompute16F) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
 #endif
 }
@@ -43,7 +43,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
 #endif
 }
@@ -56,7 +56,7 @@ TEST(CudaGemmOptions, Pedantic) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_32F_PEDANTIC);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_32F);
 #endif
 }
@@ -69,7 +69,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_PEDANTIC_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F_PEDANTIC);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
 #endif
 }
@@ -82,7 +82,7 @@ TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) {
   EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUBLAS_COMPUTE_16F);
 #else
-  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_TENSOR_OP_MATH);
+  EXPECT_EQ(gemm_options.GetMathMode(), CUBLAS_DEFAULT_MATH);
   EXPECT_EQ(gemm_options.GetComputeType(), CUDA_R_16F);
 #endif
 }
diff --git a/tools/ci_build/amd_hipify.py b/tools/ci_build/amd_hipify.py
index 8ea0481c9b..e286236ba6 100644
--- a/tools/ci_build/amd_hipify.py
+++ b/tools/ci_build/amd_hipify.py
@@ -117,7 +117,6 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
     s = s.replace("HIPBLAS_R_16F", "rocblas_datatype_f16_r")
     s = s.replace("HIPBLAS_R_32F", "rocblas_datatype_f32_r")
     s = s.replace("ROCBLAS_GEMM_DEFAULT_TENSOR_OP", "rocblas_gemm_algo_standard")
-    s = s.replace("ROCBLAS_TENSOR_OP_MATH", "0 /* CUBLAS_TENSOR_OP_MATH is deprecated */")
 
     # compatible layer
     s = s.replace("rocblas_gemm_strided_batched_ex", "_compat_rocblas_gemm_strided_batched_ex")