diff --git a/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_common.cuh b/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_common.cuh index 849883ff6d..2dbd3009ab 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_common.cuh +++ b/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_common.cuh @@ -74,7 +74,7 @@ __device__ inline char2 QuantizeHalf2Char2(const __half2 xy, const __half2 inver s2xy.s2.x = __half2short_rn(scaled_xy.x); s2xy.s2.y = __half2short_rn(scaled_xy.y); s2xy.u1 = __vmaxs2(__vmins2(s2xy.u1, 0x007F007F), 0xFF80FF80); - return char2{(char)s2xy.s2.x, (char)s2xy.s2.y}; + return char2{(signed char)s2xy.s2.x, (signed char)s2xy.s2.y}; } __device__ inline char4 QuantizeHalf4Char4(const __half4 val4, const __half2 inverse_scale2) { @@ -87,7 +87,7 @@ __device__ inline char4 QuantizeHalf4Char4(const __half4 val4, const __half2 inv shortzw.s2.y = __half2short_rn(__high2half(val4_zw)); shortxy.u1 = __vmaxs2(__vmins2(shortxy.u1, 0x007F007F), 0xFF80FF80); shortzw.u1 = __vmaxs2(__vmins2(shortzw.u1, 0x007F007F), 0xFF80FF80); - return char4{(char)shortxy.s2.x, (char)shortxy.s2.y, (char)shortzw.s2.x, (char)shortzw.s2.y}; + return char4{(signed char)shortxy.s2.x, (signed char)shortxy.s2.y, (signed char)shortzw.s2.x, (signed char)shortzw.s2.y}; } __device__ inline char4 QuantizeHalf4Char4Strict(const __half4 val4, const float inverse_scale) { @@ -98,7 +98,7 @@ __device__ inline char4 QuantizeHalf4Char4Strict(const __half4 val4, const float shortzw.s2.y = static_cast(__float2int_rn(__half2float(val4.zw.y) * inverse_scale)); shortxy.u1 = __vmaxs2(__vmins2(shortxy.u1, 0x007F007F), 0xFF80FF80); shortzw.u1 = __vmaxs2(__vmins2(shortzw.u1, 0x007F007F), 0xFF80FF80); - return char4{(char)shortxy.s2.x, (char)shortxy.s2.y, (char)shortzw.s2.x, (char)shortzw.s2.y}; + return char4{(signed char)shortxy.s2.x, (signed char)shortxy.s2.y, (signed char)shortzw.s2.x, (signed char)shortzw.s2.y}; } __device__ inline __half4 DeqantizeChar4Half4(const char4 ch4, const __half2 scale2) { diff --git a/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq_impl.cu b/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq_impl.cu index d93a4c55b2..e99b06fe89 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq_impl.cu +++ b/onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq_impl.cu @@ -42,7 +42,7 @@ struct DequantizeVec { static __device__ inline QuantizedVecT Quantize(const float2 fpvals, const float inv_scale) { float dqvalx = fmaxf(fminf(127.0f, fpvals.x * inv_scale), -128.0f); float dqvaly = fmaxf(fminf(127.0f, fpvals.y * inv_scale), -128.0f); - return char2{static_cast(__float2int_rn(dqvalx)), static_cast(__float2int_rn(dqvaly))}; + return char2{static_cast(__float2int_rn(dqvalx)), static_cast(__float2int_rn(dqvaly))}; } static __device__ inline float2 Dequantize(const QuantizedVecT qvals, const float scale) { @@ -76,7 +76,7 @@ struct DequantizeVec<__half2> { s2xy.s2.x = __half2short_rn(xy.x); s2xy.s2.y = __half2short_rn(xy.y); s2xy.u1 = __vmaxs2(__vmins2(s2xy.u1, 0x007F007F), 0xFF80FF80); - return char2{(char)s2xy.s2.x, (char)s2xy.s2.y}; + return char2{(signed char)s2xy.s2.x, (signed char)s2xy.s2.y}; } static __device__ inline __half2 Dequantize(const QuantizedVecT qvals, const __half scale) {