diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index e74add6a14..7ee572508f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -179,6 +179,58 @@ static Status AddBinaryOperator(int32_t op_type, return Status::OK(); } +static Status AddSqueezeOp(ModelBuilder& model_builder, + const std::string& node_name, + const std::string& input, const std::string& output, + vector axes) ORT_MUST_USE_RESULT; +static Status AddSqueezeOp(ModelBuilder& model_builder, + const std::string& node_name, + const std::string& input, const std::string& output, + vector axes) { + if (model_builder.GetAndroidSdkVer() < 28) { + return ORT_MAKE_STATUS( + ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetAndroidSdkVer()); + } + + auto& shaper(model_builder.GetShaper()); + const auto& operand_indices(model_builder.GetOperandIndices()); + const auto& operand_types(model_builder.GetOperandTypes()); + + const auto& input_shape(shaper[input]); + auto input_dims = input_shape.size(); + for (auto& axis : axes) { + axis = static_cast(HandleNegativeAxis(axis, input_dims)); + } + + // Despite the spec of ANEURALNETWORKS_SQUEEZE at + // https://developer.android.com/ndk/reference/group/neural-networks + // states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional. + // + // The actual code of NNAPI requires the axes to be provided + // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31 + if (axes.empty()) { // Squeeze all + for (size_t i = 0; i < input_dims; i++) { + if (input_shape[i] == 1) + axes.push_back(i); + } + } + + const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes"); + Shape axes_dimen = {static_cast(axes.size())}; + const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen); + ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type)); + + std::vector input_indices; + input_indices.push_back(operand_indices.at(input)); // input + input_indices.push_back(operand_indices.at(axes_name)); // axes + + ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output)); + const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); + ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices, + {output}, {output_operand_type}, {false})); + return Status::OK(); +} + enum DataLayout { L_0231 = 0, L_1230 = 1, @@ -1581,12 +1633,27 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N uint32_t bias_idx; bool has_bias = (op == "Gemm") && (input_defs.size() > 2); if (has_bias) { - bias_idx = operand_indices.at(input_defs[c_idx]->Name()); + const auto& bias = input_defs[c_idx]->Name(); + // We need squeeze the input tensor to 1d if necessary + if (shaper[bias].size() > 1) { + std::string bias_squeezed = model_builder.GetUniqueName(node.Name() + op + "_bias_squeezed"); + // We will use squeeze all here + ORT_RETURN_IF_ERROR(AddSqueezeOp(model_builder, node.Name(), + bias, bias_squeezed, + {} /* axes */)); + bias_idx = operand_indices.at(bias_squeezed); + LOGS_DEFAULT(VERBOSE) << "GemmOpBuilder - Operand [" << bias << "] squeezed from " + << Shape2String(shaper[bias]) + << " to " + << Shape2String(shaper[bias_squeezed]); + } else { + bias_idx = operand_indices.at(bias); + } } else { // No C supplied, we need a vector of 0 - std::string bias = node.Name() + op + "_bias"; + std::string bias = model_builder.GetUniqueName(node.Name() + op + "_bias"); const auto& bias_type = operand_types.at(input2).type; - Shape bias_dimen = {shaper[input2][0]}; + const Shape& bias_dimen = {shaper[input2][0]}; if (bias_type == Type::TENSOR_FLOAT32) { std::vector buffer(bias_dimen[0], 0.f); OperandType bias_operand_type(Type::TENSOR_FLOAT32, bias_dimen); @@ -1767,10 +1834,6 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const class SqueezeOpBuilder : public BaseOpBuilder { public: void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override; - static Status AddSqueezeOp(ModelBuilder& model_builder, - const std::string& node_name, - const std::string& input, const std::string& output, - vector axes) ORT_MUST_USE_RESULT; private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT; @@ -1783,49 +1846,6 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } } -/* static */ Status SqueezeOpBuilder::AddSqueezeOp(ModelBuilder& model_builder, - const std::string& node_name, - const std::string& input, const std::string& output, - vector axes) { - auto& shaper(model_builder.GetShaper()); - const auto& operand_indices(model_builder.GetOperandIndices()); - const auto& operand_types(model_builder.GetOperandTypes()); - - const auto& input_shape(shaper[input]); - auto input_dims = input_shape.size(); - for (auto& axis : axes) { - axis = static_cast(HandleNegativeAxis(axis, input_dims)); - } - - // Despite the spec of ANEURALNETWORKS_SQUEEZE at - // https://developer.android.com/ndk/reference/group/neural-networks - // states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional. - // - // The actual code of NNAPI requires the axes to be provided - // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31 - if (axes.empty()) { // Squeeze all - for (size_t i = 0; i < input_dims; i++) { - if (input_shape[i] == 1) - axes.push_back(i); - } - } - - const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes"); - Shape axes_dimen = {static_cast(axes.size())}; - const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen); - ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type)); - - std::vector input_indices; - input_indices.push_back(operand_indices.at(input)); // input - input_indices.push_back(operand_indices.at(axes_name)); // axes - - ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output)); - const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); - ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices, - {output}, {output_operand_type}, {false})); - return Status::OK(); -} - /* static */ vector SqueezeOpBuilder::GetAxes(ModelBuilder& model_builder, const Node& node) { vector axes; // Squeeze opset 13 use input as axes diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 5ada0499fa..da9e4c4bed 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -719,6 +719,39 @@ bool GemmOpSupportChecker::HasSupportedInputsImpl(const Node& node) const { }); } +// Get the bias size (C) of Gemm op +// ANEURALNETWORKS_FULLY_CONNECTED only supports 1d bias +// Will test if C of Gemm can be squeezed and return the 1d vector size after squeeze +static bool GetBiasSize(const Shape& c_shape, int32_t android_sdk_ver, uint32_t& size) { + // TODO add support of scalar C for Gemm + size_t c_dim = c_shape.size(); + if (c_dim == 0) { + LOGS_DEFAULT(VERBOSE) << "C of Gemm cannot be a scalar"; + return false; + } + + if (c_dim != 1 && android_sdk_ver < 28) { + LOGS_DEFAULT(VERBOSE) << "C of Gemm can only be 1d tensor for API level " << android_sdk_ver + << " shape of C, " << Shape2String(c_shape); + return false; + } + + if (c_dim != 1) { + // If C is a (2+)d tensor, it must have the format {1, 1, ..., 1, n} + // where every except the last dimension should be 1 + for (size_t i = 0; i < c_dim - 1; ++i) { + if (c_shape[i] != 1) { + LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector or a tensor with only last dimension != 1" + << " c_shape: " << Shape2String(c_shape); + return false; + } + } + } + + size = c_shape[c_dim - 1]; + return true; +} + int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const { const auto& op(node.OpType()); @@ -730,7 +763,7 @@ int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const { } bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, - const OpSupportCheckParams& /* params */) const { + const OpSupportCheckParams& params) const { const auto& op_type = node.OpType(); const auto input_defs(node.InputDefs()); size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C @@ -774,7 +807,11 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial if (!(transA == 0 && alpha == 1.f && beta == 1.f)) { LOGS_DEFAULT(VERBOSE) << "Only transA == 0, alpha == 1.0 " - << "and beta == 1.0 is supported."; + << "and beta == 1.0 is supported." + << " transA " << transA + << " transB " << transB + << " alpha " << alpha + << " beta " << beta; return false; } @@ -788,9 +825,13 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial if (!GetShape(*input_defs[c_idx], c_shape)) return false; - if (c_shape.size() != 1 || - c_shape[0] != (transB == 0 ? b_shape[1] : b_shape[0])) { - LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape[0]" + uint32_t c_size; + if (!GetBiasSize(c_shape, params.android_sdk_ver, c_size)) + return false; + + if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) { + LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape[" + << (transB == 0 ? "1" : "0") << "]" << " b_shape: " << Shape2String(b_shape) << " c_shape: " << Shape2String(c_shape); diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index ec4581177e..59f1376965 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -140,6 +140,7 @@ TEST(GemmOpTest, GemmTransBIsInitializer) { } // NNAPI EP's GEMM only works as A*B', add case only B is transposed +// Also test NNAPI EP's handling of non-1D bias (C of Gemm) TEST(GemmOpTest, GemmTransB) { OpTester test("Gemm"); @@ -152,7 +153,32 @@ TEST(GemmOpTest, GemmTransB) { {1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f}); test.AddInput("B", {3, 4}, std::vector(12, 1.0f)); - test.AddInput("C", {3}, std::vector(3, 1.0f)); + test.AddInput("C", {1, 3}, std::vector(3, 1.0f)); + test.AddOutput("Y", {2, 3}, + {11.0f, 11.0f, 11.0f, + -9.0f, -9.0f, -9.0f}); +#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues +#else + test.Run(); +#endif +} + +// NNAPI EP's GEMM only works as A*B', add case only B is transposed +// Also test NNAPI EP's handling of non-1D bias (C of Gemm) which is broadcastable but not valid for NNAPI +TEST(GemmOpTest, GemmTransB_1) { + OpTester test("Gemm"); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)1); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + + test.AddInput("A", {2, 4}, + {1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}); + test.AddInput("B", {3, 4}, std::vector(12, 1.0f)); + test.AddInput("C", {2, 1}, std::vector(2, 1.0f)); test.AddOutput("Y", {2, 3}, {11.0f, 11.0f, 11.0f, -9.0f, -9.0f, -9.0f});