diff --git a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc index 9f7168b81f..5c5f1863f6 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc @@ -215,8 +215,8 @@ Status QAttention::Compute(OpKernelContext* context) const { BufferUniquePtr gemm_buffer(gemm_data, BufferDeleter(allocator)); auto Q = reinterpret_cast(gemm_data); - auto K = Q + batch_size * sequence_length * hidden_size; - auto V = K + batch_size * sequence_length * hidden_size; + auto K = Q + static_cast(batch_size) * sequence_length * hidden_size; + auto V = K + static_cast(batch_size) * sequence_length * hidden_size; T* QKV[3] = {Q, K, V}; { @@ -272,7 +272,7 @@ Status QAttention::Compute(OpKernelContext* context) const { gemm_params.BIsPacked = true; } else { gemm_params.B = weights_data + weights_offset; - gemm_params.ldb = 3 * hidden_size; + gemm_params.ldb = static_cast(3) * hidden_size; } gemm_params.ZeroPointB = nullptr != weight_zp_data ? weight_zp_data + weights_zp_offset : &weight_zp_default; gemm_params.PerColumnZeroPoints = is_weight_zp_per_column;