diff --git a/onnxruntime/contrib_ops/cpu/bert/attention.cc b/onnxruntime/contrib_ops/cpu/bert/attention.cc index 80a0ee8653..be0aae79eb 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/cpu/bert/attention.cc @@ -227,30 +227,16 @@ Status Attention::PrePack(const Tensor& weights, int input_idx, bool& is_pack template Status Attention::Compute(OpKernelContext* context) const { const Tensor* input = context->Input(0); - const Tensor* weights; + const Tensor* weights = packed_weights_ ? nullptr : context->Input(1); const Tensor* bias = context->Input(2); const Tensor* mask_index = context->Input(3); const Tensor* past = context->Input(4); - if (packed_weights_) { - weights = nullptr; - ORT_RETURN_IF_ERROR(CheckInputs(input->Shape(), - weight_shape_, - bias->Shape(), - mask_index, - past)); - } else { - weights = context->Input(1); - //Normally we don't check if an input is NULL, but this one is needed to make VC++ - //static analyzer happy - if (weights == nullptr) - return Status(common::ONNXRUNTIME, common::FAIL, "the second input cannot be NULL"); - ORT_RETURN_IF_ERROR(CheckInputs(input->Shape(), - weights->Shape(), - bias->Shape(), - mask_index, - past)); - } + ORT_RETURN_IF_ERROR(CheckInputs(input->Shape(), + weights ? weights->Shape() : weight_shape_, + bias->Shape(), + mask_index, + past)); const auto& shape = input->Shape().GetDims(); const int batch_size = static_cast(shape[0]); @@ -279,7 +265,7 @@ Status Attention::Compute(OpKernelContext* context) const { { const int loop_len = 3 * batch_size * num_heads_; const auto* input_data = input->template Data(); - const auto* weights_data = weights == nullptr ? nullptr : weights->template Data(); + const auto* weights_data = weights ? weights->template Data() : nullptr; const auto* bias_data = bias->template Data(); const double cost = diff --git a/onnxruntime/core/providers/cpu/math/matmul_integer.cc b/onnxruntime/core/providers/cpu/math/matmul_integer.cc index 81ea7c2fea..998b97565b 100644 --- a/onnxruntime/core/providers/cpu/math/matmul_integer.cc +++ b/onnxruntime/core/providers/cpu/math/matmul_integer.cc @@ -35,7 +35,7 @@ Status MatMulInteger::Compute(OpKernelContext* ctx) const { const Tensor* b = packed_b_ ? nullptr : ctx->Input(1); MatMulComputeHelper helper; - ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), packed_b_ ? b_shape_ : b->Shape())); + ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b ? b->Shape() : b_shape_)); Tensor* y = ctx->Output(0, helper.OutputShape()); // Bail out early if the output is going to be empty @@ -61,9 +61,9 @@ Status MatMulInteger::Compute(OpKernelContext* ctx) const { const auto* a_data = a->template Data(); auto* y_data = y->template MutableData(); - for (size_t i = 0; i < helper.OutputOffsets().size(); i++) { #ifdef MLAS_SUPPORTS_PACKED_GEMM_U8X8 - if (packed_b_) { + if (packed_b_) { + for (size_t i = 0; i < helper.OutputOffsets().size(); i++) { MlasGemm(static_cast(helper.M()), static_cast(helper.N()), static_cast(helper.K()), @@ -76,25 +76,33 @@ Status MatMulInteger::Compute(OpKernelContext* ctx) const { y_data + helper.OutputOffsets()[i], static_cast(helper.N()), thread_pool); - continue; } -#endif - const auto* b_data = static_cast(b->DataRaw()); - const bool b_is_signed = b->IsDataType(); - MlasGemm(static_cast(helper.M()), - static_cast(helper.N()), - static_cast(helper.K()), - a_data + helper.LeftOffsets()[i], - static_cast(helper.K()), - a_offset, - b_data + helper.RightOffsets()[i], - static_cast(helper.N()), - b_offset, - b_is_signed, - y_data + helper.OutputOffsets()[i], - static_cast(helper.N()), - thread_pool); + return Status::OK(); } +#endif + + if (b != nullptr) { + for (size_t i = 0; i < helper.OutputOffsets().size(); i++) { + const auto* b_data = static_cast(b->DataRaw()); + const bool b_is_signed = b->IsDataType(); + MlasGemm(static_cast(helper.M()), + static_cast(helper.N()), + static_cast(helper.K()), + a_data + helper.LeftOffsets()[i], + static_cast(helper.K()), + a_offset, + b_data + helper.RightOffsets()[i], + static_cast(helper.N()), + b_offset, + b_is_signed, + y_data + helper.OutputOffsets()[i], + static_cast(helper.N()), + thread_pool); + } + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input B should not be null."); + } + return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/math/matmul_integer_base.h b/onnxruntime/core/providers/cpu/math/matmul_integer_base.h index ad5bd7d2bd..50c99f6efb 100644 --- a/onnxruntime/core/providers/cpu/math/matmul_integer_base.h +++ b/onnxruntime/core/providers/cpu/math/matmul_integer_base.h @@ -46,7 +46,7 @@ class MatMulIntegerBase : public OpKernel { #endif protected: - bool b_is_signed_; + bool b_is_signed_{true}; TensorShape b_shape_; BufferUniquePtr packed_b_; }; diff --git a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc index 878277be32..e6ce0404af 100644 --- a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc +++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc @@ -80,7 +80,7 @@ ONNX_OPERATOR_KERNEL_EX( .TypeConstraint("T4", DataTypeImpl::GetTensorType()), QLinearConv); -} +} // namespace contrib #endif @@ -107,7 +107,7 @@ Status QLinearConv::PrePack(const Tensor& tensor, int input_idx, bool& is_packed const size_t output_channels = static_cast(shape[0]); const size_t group_input_channels = static_cast(shape[1]); const size_t kernel_size = - static_cast(std::accumulate(shape.data() + 2, shape.data() + rank, 1LL, std::multiplies())); + static_cast(std::accumulate(shape.data() + 2, shape.data() + rank, 1LL, std::multiplies())); const auto* Wdata = static_cast(tensor.DataRaw()); W_shape_ = shape; @@ -165,7 +165,7 @@ Status QLinearConv::PrePack(const Tensor& tensor, int input_idx, bool& is_packed Status QLinearConv::Compute(OpKernelContext* context) const { const Tensor* X = context->Input(0); const Tensor* W = is_W_packed_ ? nullptr : context->Input(3); - const auto& W_shape = is_W_packed_ ? W_shape_ : W->Shape(); + const auto& W_shape = W ? W->Shape() : W_shape_; const bool is_W_signed = (W != nullptr) ? W->IsDataType() : is_W_signed_; const int64_t N = X->Shape()[0]; @@ -285,7 +285,8 @@ Status QLinearConv::Compute(OpKernelContext* context) const { } #endif if (use_reordered_W) { - if (reordered_W_buffer_) { + if (W == nullptr) { + // Weight was constant and reordered. reordered_W = static_cast(reordered_W_buffer_.get()); } else { // Weight tensor was not constant or prepacking is disabled. @@ -402,7 +403,7 @@ Status QLinearConv::Compute(OpKernelContext* context) const { static_cast(kernel_rank), static_cast(col_buffer.get()) + group_id * col_buffer_size, X_zero_point_value); - } + } } } diff --git a/onnxruntime/test/mlas/unittest.cpp b/onnxruntime/test/mlas/unittest.cpp index 5a1365b0c2..15606b5b2b 100644 --- a/onnxruntime/test/mlas/unittest.cpp +++ b/onnxruntime/test/mlas/unittest.cpp @@ -714,7 +714,7 @@ private: for (size_t f = 0; f < M * N; f++) { if (C[f] != CReference[f]) { - printf("mismatch M=%zd, N=%zd, K=%zd, offa=%d, offb=%d!\n", M, N, K, offa, offb); + printf("mismatch M=%zd, N=%zd, K=%zd, offa=%d, offb=%d!\n", M, N, K, int(offa), int(offb)); break; } } @@ -921,7 +921,7 @@ private: for (size_t f = 0; f < M * N; f++) { // Sensitive to comparing positive/negative zero. if (C[f] != CReference[f]) { - printf("mismatch M=%zd, N=%zd, K=%zd, offa=%d, offb=%d! %f %f\n", M, N, K, offa, offb, C[f], CReference[f]); + printf("mismatch M=%zd, N=%zd, K=%zd, offa=%d, offb=%d! %f %f\n", M, N, K, int(offa), int(offb), C[f], CReference[f]); break; } }