use __hmul2 instead of __hmul2_rn (#15852)

### Description
<!-- Describe your changes. -->



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

https://github.com/microsoft/onnxruntime/issues/15840
This commit is contained in:
Ye Wang 2023-05-09 10:54:00 -07:00 committed by GitHub
parent 34cb293c6b
commit 475f661acd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -156,7 +156,7 @@ struct TypeMapper : public V_vec_m_<T, size> {};
// The following operator overriding is not common so we put it in anonymous namespace
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 530
inline __device__ half2 operator*(const float a, const half2 b) {
return __hmul2_rn(__float2half2_rn(a), b);
return __hmul2(__float2half2_rn(a), b);
}
#else
inline __device__ half2 operator*(const float a, const half2 b) {