use __hmul2 instead of __hmul2_rn (#15852)

### Description  ### Motivation and Context  https://github.com/microsoft/onnxruntime/issues/15840
2026-07-13 18:08:13 +00:00 · 2023-05-09 10:54:00 -07:00 · 2023-05-09 10:54:00 -07:00 · 475f661acd
commit 475f661acd
parent 34cb293c6b
1 changed files with 1 additions and 1 deletions
--- a/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu
@ -156,7 +156,7 @@ struct TypeMapper : public V_vec_m_<T, size> {};
 // The following operator overriding is not common so we put it in anonymous namespace
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 530
 inline __device__ half2 operator*(const float a, const half2 b) {
-  return __hmul2_rn(__float2half2_rn(a), b);
+  return __hmul2(__float2half2_rn(a), b);
 }
 #else
 inline __device__ half2 operator*(const float a, const half2 b) {