mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
[NNAPI] Support non-1d tensor for C of Gemm op (#5982)
* Add support for non-1d tensor for C of Gemm
* check android api level before add squeeze
* Minor update
* Fix to accept c only in format of {1,1,...,1,n}
This commit is contained in:
parent
6846c665ff
commit
cdacee6696
3 changed files with 143 additions and 56 deletions
|
|
@ -179,6 +179,58 @@ static Status AddBinaryOperator(int32_t op_type,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
static Status AddSqueezeOp(ModelBuilder& model_builder,
|
||||
const std::string& node_name,
|
||||
const std::string& input, const std::string& output,
|
||||
vector<int32_t> axes) ORT_MUST_USE_RESULT;
|
||||
static Status AddSqueezeOp(ModelBuilder& model_builder,
|
||||
const std::string& node_name,
|
||||
const std::string& input, const std::string& output,
|
||||
vector<int32_t> axes) {
|
||||
if (model_builder.GetAndroidSdkVer() < 28) {
|
||||
return ORT_MAKE_STATUS(
|
||||
ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetAndroidSdkVer());
|
||||
}
|
||||
|
||||
auto& shaper(model_builder.GetShaper());
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto& operand_types(model_builder.GetOperandTypes());
|
||||
|
||||
const auto& input_shape(shaper[input]);
|
||||
auto input_dims = input_shape.size();
|
||||
for (auto& axis : axes) {
|
||||
axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_dims));
|
||||
}
|
||||
|
||||
// Despite the spec of ANEURALNETWORKS_SQUEEZE at
|
||||
// https://developer.android.com/ndk/reference/group/neural-networks
|
||||
// states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional.
|
||||
//
|
||||
// The actual code of NNAPI requires the axes to be provided
|
||||
// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31
|
||||
if (axes.empty()) { // Squeeze all
|
||||
for (size_t i = 0; i < input_dims; i++) {
|
||||
if (input_shape[i] == 1)
|
||||
axes.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes");
|
||||
Shape axes_dimen = {static_cast<uint32_t>(axes.size())};
|
||||
const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type));
|
||||
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input)); // input
|
||||
input_indices.push_back(operand_indices.at(axes_name)); // axes
|
||||
|
||||
ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output));
|
||||
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices,
|
||||
{output}, {output_operand_type}, {false}));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
enum DataLayout {
|
||||
L_0231 = 0,
|
||||
L_1230 = 1,
|
||||
|
|
@ -1581,12 +1633,27 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
|
|||
uint32_t bias_idx;
|
||||
bool has_bias = (op == "Gemm") && (input_defs.size() > 2);
|
||||
if (has_bias) {
|
||||
bias_idx = operand_indices.at(input_defs[c_idx]->Name());
|
||||
const auto& bias = input_defs[c_idx]->Name();
|
||||
// We need squeeze the input tensor to 1d if necessary
|
||||
if (shaper[bias].size() > 1) {
|
||||
std::string bias_squeezed = model_builder.GetUniqueName(node.Name() + op + "_bias_squeezed");
|
||||
// We will use squeeze all here
|
||||
ORT_RETURN_IF_ERROR(AddSqueezeOp(model_builder, node.Name(),
|
||||
bias, bias_squeezed,
|
||||
{} /* axes */));
|
||||
bias_idx = operand_indices.at(bias_squeezed);
|
||||
LOGS_DEFAULT(VERBOSE) << "GemmOpBuilder - Operand [" << bias << "] squeezed from "
|
||||
<< Shape2String(shaper[bias])
|
||||
<< " to "
|
||||
<< Shape2String(shaper[bias_squeezed]);
|
||||
} else {
|
||||
bias_idx = operand_indices.at(bias);
|
||||
}
|
||||
} else {
|
||||
// No C supplied, we need a vector of 0
|
||||
std::string bias = node.Name() + op + "_bias";
|
||||
std::string bias = model_builder.GetUniqueName(node.Name() + op + "_bias");
|
||||
const auto& bias_type = operand_types.at(input2).type;
|
||||
Shape bias_dimen = {shaper[input2][0]};
|
||||
const Shape& bias_dimen = {shaper[input2][0]};
|
||||
if (bias_type == Type::TENSOR_FLOAT32) {
|
||||
std::vector<float> buffer(bias_dimen[0], 0.f);
|
||||
OperandType bias_operand_type(Type::TENSOR_FLOAT32, bias_dimen);
|
||||
|
|
@ -1767,10 +1834,6 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
class SqueezeOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
|
||||
static Status AddSqueezeOp(ModelBuilder& model_builder,
|
||||
const std::string& node_name,
|
||||
const std::string& input, const std::string& output,
|
||||
vector<int32_t> axes) ORT_MUST_USE_RESULT;
|
||||
|
||||
private:
|
||||
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
|
||||
|
|
@ -1783,49 +1846,6 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ Status SqueezeOpBuilder::AddSqueezeOp(ModelBuilder& model_builder,
|
||||
const std::string& node_name,
|
||||
const std::string& input, const std::string& output,
|
||||
vector<int32_t> axes) {
|
||||
auto& shaper(model_builder.GetShaper());
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto& operand_types(model_builder.GetOperandTypes());
|
||||
|
||||
const auto& input_shape(shaper[input]);
|
||||
auto input_dims = input_shape.size();
|
||||
for (auto& axis : axes) {
|
||||
axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_dims));
|
||||
}
|
||||
|
||||
// Despite the spec of ANEURALNETWORKS_SQUEEZE at
|
||||
// https://developer.android.com/ndk/reference/group/neural-networks
|
||||
// states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional.
|
||||
//
|
||||
// The actual code of NNAPI requires the axes to be provided
|
||||
// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31
|
||||
if (axes.empty()) { // Squeeze all
|
||||
for (size_t i = 0; i < input_dims; i++) {
|
||||
if (input_shape[i] == 1)
|
||||
axes.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes");
|
||||
Shape axes_dimen = {static_cast<uint32_t>(axes.size())};
|
||||
const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type));
|
||||
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input)); // input
|
||||
input_indices.push_back(operand_indices.at(axes_name)); // axes
|
||||
|
||||
ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output));
|
||||
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
|
||||
ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices,
|
||||
{output}, {output_operand_type}, {false}));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/* static */ vector<int32_t> SqueezeOpBuilder::GetAxes(ModelBuilder& model_builder, const Node& node) {
|
||||
vector<int32_t> axes;
|
||||
// Squeeze opset 13 use input as axes
|
||||
|
|
|
|||
|
|
@ -719,6 +719,39 @@ bool GemmOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
|
|||
});
|
||||
}
|
||||
|
||||
// Get the bias size (C) of Gemm op
|
||||
// ANEURALNETWORKS_FULLY_CONNECTED only supports 1d bias
|
||||
// Will test if C of Gemm can be squeezed and return the 1d vector size after squeeze
|
||||
static bool GetBiasSize(const Shape& c_shape, int32_t android_sdk_ver, uint32_t& size) {
|
||||
// TODO add support of scalar C for Gemm
|
||||
size_t c_dim = c_shape.size();
|
||||
if (c_dim == 0) {
|
||||
LOGS_DEFAULT(VERBOSE) << "C of Gemm cannot be a scalar";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (c_dim != 1 && android_sdk_ver < 28) {
|
||||
LOGS_DEFAULT(VERBOSE) << "C of Gemm can only be 1d tensor for API level " << android_sdk_ver
|
||||
<< " shape of C, " << Shape2String(c_shape);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (c_dim != 1) {
|
||||
// If C is a (2+)d tensor, it must have the format {1, 1, ..., 1, n}
|
||||
// where every except the last dimension should be 1
|
||||
for (size_t i = 0; i < c_dim - 1; ++i) {
|
||||
if (c_shape[i] != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector or a tensor with only last dimension != 1"
|
||||
<< " c_shape: " << Shape2String(c_shape);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size = c_shape[c_dim - 1];
|
||||
return true;
|
||||
}
|
||||
|
||||
int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
|
||||
const auto& op(node.OpType());
|
||||
|
||||
|
|
@ -730,7 +763,7 @@ int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
|
|||
}
|
||||
|
||||
bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
const OpSupportCheckParams& params) const {
|
||||
const auto& op_type = node.OpType();
|
||||
const auto input_defs(node.InputDefs());
|
||||
size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C
|
||||
|
|
@ -774,7 +807,11 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
|
||||
if (!(transA == 0 && alpha == 1.f && beta == 1.f)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Only transA == 0, alpha == 1.0 "
|
||||
<< "and beta == 1.0 is supported.";
|
||||
<< "and beta == 1.0 is supported."
|
||||
<< " transA " << transA
|
||||
<< " transB " << transB
|
||||
<< " alpha " << alpha
|
||||
<< " beta " << beta;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -788,9 +825,13 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
if (!GetShape(*input_defs[c_idx], c_shape))
|
||||
return false;
|
||||
|
||||
if (c_shape.size() != 1 ||
|
||||
c_shape[0] != (transB == 0 ? b_shape[1] : b_shape[0])) {
|
||||
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape[0]"
|
||||
uint32_t c_size;
|
||||
if (!GetBiasSize(c_shape, params.android_sdk_ver, c_size))
|
||||
return false;
|
||||
|
||||
if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) {
|
||||
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape["
|
||||
<< (transB == 0 ? "1" : "0") << "]"
|
||||
<< " b_shape: " << Shape2String(b_shape)
|
||||
<< " c_shape: " << Shape2String(c_shape);
|
||||
|
||||
|
|
|
|||
|
|
@ -140,6 +140,7 @@ TEST(GemmOpTest, GemmTransBIsInitializer) {
|
|||
}
|
||||
|
||||
// NNAPI EP's GEMM only works as A*B', add case only B is transposed
|
||||
// Also test NNAPI EP's handling of non-1D bias (C of Gemm)
|
||||
TEST(GemmOpTest, GemmTransB) {
|
||||
OpTester test("Gemm");
|
||||
|
||||
|
|
@ -152,7 +153,32 @@ TEST(GemmOpTest, GemmTransB) {
|
|||
{1.0f, 2.0f, 3.0f, 4.0f,
|
||||
-1.0f, -2.0f, -3.0f, -4.0f});
|
||||
test.AddInput<float>("B", {3, 4}, std::vector<float>(12, 1.0f));
|
||||
test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
|
||||
test.AddInput<float>("C", {1, 3}, std::vector<float>(3, 1.0f));
|
||||
test.AddOutput<float>("Y", {2, 3},
|
||||
{11.0f, 11.0f, 11.0f,
|
||||
-9.0f, -9.0f, -9.0f});
|
||||
#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues
|
||||
#else
|
||||
test.Run();
|
||||
#endif
|
||||
}
|
||||
|
||||
// NNAPI EP's GEMM only works as A*B', add case only B is transposed
|
||||
// Also test NNAPI EP's handling of non-1D bias (C of Gemm) which is broadcastable but not valid for NNAPI
|
||||
TEST(GemmOpTest, GemmTransB_1) {
|
||||
OpTester test("Gemm");
|
||||
|
||||
test.AddAttribute("transA", (int64_t)0);
|
||||
test.AddAttribute("transB", (int64_t)1);
|
||||
test.AddAttribute("alpha", 1.0f);
|
||||
test.AddAttribute("beta", 1.0f);
|
||||
|
||||
test.AddInput<float>("A", {2, 4},
|
||||
{1.0f, 2.0f, 3.0f, 4.0f,
|
||||
-1.0f, -2.0f, -3.0f, -4.0f});
|
||||
test.AddInput<float>("B", {3, 4}, std::vector<float>(12, 1.0f));
|
||||
test.AddInput<float>("C", {2, 1}, std::vector<float>(2, 1.0f));
|
||||
test.AddOutput<float>("Y", {2, 3},
|
||||
{11.0f, 11.0f, 11.0f,
|
||||
-9.0f, -9.0f, -9.0f});
|
||||
|
|
|
|||
Loading…
Reference in a new issue