From 475f661acdbabffdac4fc97d01922a623d5f7611 Mon Sep 17 00:00:00 2001
From: Ye Wang <52801275+wangyems@users.noreply.github.com>
Date: Tue, 9 May 2023 10:54:00 -0700
Subject: [PATCH] use __hmul2 instead of __hmul2_rn (#15852)

### Description
<!-- Describe your changes. -->


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

https://github.com/microsoft/onnxruntime/issues/15840
---
 onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu b/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu
index b5c8dcca32..ebe87158d1 100644
--- a/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias_impl.cu
@@ -156,7 +156,7 @@ struct TypeMapper : public V_vec_m_<T, size> {};
 // The following operator overriding is not common so we put it in anonymous namespace
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 530
 inline __device__ half2 operator*(const float a, const half2 b) {
-  return __hmul2_rn(__float2half2_rn(a), b);
+  return __hmul2(__float2half2_rn(a), b);
 }
 #else
 inline __device__ half2 operator*(const float a, const half2 b) {