[NNAPI] Support non-1d tensor for C of Gemm op (#5982)

* Add support for non-1d tensor for C of Gemm

* check android api level before add squeeze

* Minor update

* Fix to accept c only in format of {1,1,...,1,n}
This commit is contained in:
Guoyu Wang 2020-12-02 00:22:38 -08:00 committed by GitHub
parent 6846c665ff
commit cdacee6696
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 143 additions and 56 deletions

View file

@ -179,6 +179,58 @@ static Status AddBinaryOperator(int32_t op_type,
return Status::OK();
}
static Status AddSqueezeOp(ModelBuilder& model_builder,
const std::string& node_name,
const std::string& input, const std::string& output,
vector<int32_t> axes) ORT_MUST_USE_RESULT;
static Status AddSqueezeOp(ModelBuilder& model_builder,
const std::string& node_name,
const std::string& input, const std::string& output,
vector<int32_t> axes) {
if (model_builder.GetAndroidSdkVer() < 28) {
return ORT_MAKE_STATUS(
ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetAndroidSdkVer());
}
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto& input_shape(shaper[input]);
auto input_dims = input_shape.size();
for (auto& axis : axes) {
axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_dims));
}
// Despite the spec of ANEURALNETWORKS_SQUEEZE at
// https://developer.android.com/ndk/reference/group/neural-networks
// states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional.
//
// The actual code of NNAPI requires the axes to be provided
// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31
if (axes.empty()) { // Squeeze all
for (size_t i = 0; i < input_dims; i++) {
if (input_shape[i] == 1)
axes.push_back(i);
}
}
const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes");
Shape axes_dimen = {static_cast<uint32_t>(axes.size())};
const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen);
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type));
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input)); // input
input_indices.push_back(operand_indices.at(axes_name)); // axes
ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output));
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices,
{output}, {output_operand_type}, {false}));
return Status::OK();
}
enum DataLayout {
L_0231 = 0,
L_1230 = 1,
@ -1581,12 +1633,27 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
uint32_t bias_idx;
bool has_bias = (op == "Gemm") && (input_defs.size() > 2);
if (has_bias) {
bias_idx = operand_indices.at(input_defs[c_idx]->Name());
const auto& bias = input_defs[c_idx]->Name();
// We need squeeze the input tensor to 1d if necessary
if (shaper[bias].size() > 1) {
std::string bias_squeezed = model_builder.GetUniqueName(node.Name() + op + "_bias_squeezed");
// We will use squeeze all here
ORT_RETURN_IF_ERROR(AddSqueezeOp(model_builder, node.Name(),
bias, bias_squeezed,
{} /* axes */));
bias_idx = operand_indices.at(bias_squeezed);
LOGS_DEFAULT(VERBOSE) << "GemmOpBuilder - Operand [" << bias << "] squeezed from "
<< Shape2String(shaper[bias])
<< " to "
<< Shape2String(shaper[bias_squeezed]);
} else {
bias_idx = operand_indices.at(bias);
}
} else {
// No C supplied, we need a vector of 0
std::string bias = node.Name() + op + "_bias";
std::string bias = model_builder.GetUniqueName(node.Name() + op + "_bias");
const auto& bias_type = operand_types.at(input2).type;
Shape bias_dimen = {shaper[input2][0]};
const Shape& bias_dimen = {shaper[input2][0]};
if (bias_type == Type::TENSOR_FLOAT32) {
std::vector<float> buffer(bias_dimen[0], 0.f);
OperandType bias_operand_type(Type::TENSOR_FLOAT32, bias_dimen);
@ -1767,10 +1834,6 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
class SqueezeOpBuilder : public BaseOpBuilder {
public:
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
static Status AddSqueezeOp(ModelBuilder& model_builder,
const std::string& node_name,
const std::string& input, const std::string& output,
vector<int32_t> axes) ORT_MUST_USE_RESULT;
private:
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
@ -1783,49 +1846,6 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
}
}
/* static */ Status SqueezeOpBuilder::AddSqueezeOp(ModelBuilder& model_builder,
const std::string& node_name,
const std::string& input, const std::string& output,
vector<int32_t> axes) {
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto& input_shape(shaper[input]);
auto input_dims = input_shape.size();
for (auto& axis : axes) {
axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_dims));
}
// Despite the spec of ANEURALNETWORKS_SQUEEZE at
// https://developer.android.com/ndk/reference/group/neural-networks
// states, that the axes (input 1 of ANEURALNETWORKS_SQUEEZE) is optional.
//
// The actual code of NNAPI requires the axes to be provided
// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/common/operations/Squeeze.cpp#31
if (axes.empty()) { // Squeeze all
for (size_t i = 0; i < input_dims; i++) {
if (input_shape[i] == 1)
axes.push_back(i);
}
}
const auto axes_name = model_builder.GetUniqueName(node_name + input + "_axes");
Shape axes_dimen = {static_cast<uint32_t>(axes.size())};
const OperandType axes_operand_type(Type::TENSOR_INT32, axes_dimen);
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(axes_name, axes.data(), axes_operand_type));
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input)); // input
input_indices.push_back(operand_indices.at(axes_name)); // axes
ORT_RETURN_IF_ERROR(shaper.Squeeze(input, axes, output));
const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_SQUEEZE, input_indices,
{output}, {output_operand_type}, {false}));
return Status::OK();
}
/* static */ vector<int32_t> SqueezeOpBuilder::GetAxes(ModelBuilder& model_builder, const Node& node) {
vector<int32_t> axes;
// Squeeze opset 13 use input as axes

View file

@ -719,6 +719,39 @@ bool GemmOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
});
}
// Get the bias size (C) of Gemm op
// ANEURALNETWORKS_FULLY_CONNECTED only supports 1d bias
// Will test if C of Gemm can be squeezed and return the 1d vector size after squeeze
static bool GetBiasSize(const Shape& c_shape, int32_t android_sdk_ver, uint32_t& size) {
// TODO add support of scalar C for Gemm
size_t c_dim = c_shape.size();
if (c_dim == 0) {
LOGS_DEFAULT(VERBOSE) << "C of Gemm cannot be a scalar";
return false;
}
if (c_dim != 1 && android_sdk_ver < 28) {
LOGS_DEFAULT(VERBOSE) << "C of Gemm can only be 1d tensor for API level " << android_sdk_ver
<< " shape of C, " << Shape2String(c_shape);
return false;
}
if (c_dim != 1) {
// If C is a (2+)d tensor, it must have the format {1, 1, ..., 1, n}
// where every except the last dimension should be 1
for (size_t i = 0; i < c_dim - 1; ++i) {
if (c_shape[i] != 1) {
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector or a tensor with only last dimension != 1"
<< " c_shape: " << Shape2String(c_shape);
return false;
}
}
}
size = c_shape[c_dim - 1];
return true;
}
int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
const auto& op(node.OpType());
@ -730,7 +763,7 @@ int GemmOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
}
bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const OpSupportCheckParams& /* params */) const {
const OpSupportCheckParams& params) const {
const auto& op_type = node.OpType();
const auto input_defs(node.InputDefs());
size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C
@ -774,7 +807,11 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
if (!(transA == 0 && alpha == 1.f && beta == 1.f)) {
LOGS_DEFAULT(VERBOSE) << "Only transA == 0, alpha == 1.0 "
<< "and beta == 1.0 is supported.";
<< "and beta == 1.0 is supported."
<< " transA " << transA
<< " transB " << transB
<< " alpha " << alpha
<< " beta " << beta;
return false;
}
@ -788,9 +825,13 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
if (!GetShape(*input_defs[c_idx], c_shape))
return false;
if (c_shape.size() != 1 ||
c_shape[0] != (transB == 0 ? b_shape[1] : b_shape[0])) {
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape[0]"
uint32_t c_size;
if (!GetBiasSize(c_shape, params.android_sdk_ver, c_size))
return false;
if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) {
LOGS_DEFAULT(VERBOSE) << "C of Gemm must be a vector of b_shape["
<< (transB == 0 ? "1" : "0") << "]"
<< " b_shape: " << Shape2String(b_shape)
<< " c_shape: " << Shape2String(c_shape);

View file

@ -140,6 +140,7 @@ TEST(GemmOpTest, GemmTransBIsInitializer) {
}
// NNAPI EP's GEMM only works as A*B', add case only B is transposed
// Also test NNAPI EP's handling of non-1D bias (C of Gemm)
TEST(GemmOpTest, GemmTransB) {
OpTester test("Gemm");
@ -152,7 +153,32 @@ TEST(GemmOpTest, GemmTransB) {
{1.0f, 2.0f, 3.0f, 4.0f,
-1.0f, -2.0f, -3.0f, -4.0f});
test.AddInput<float>("B", {3, 4}, std::vector<float>(12, 1.0f));
test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
test.AddInput<float>("C", {1, 3}, std::vector<float>(3, 1.0f));
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
#if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32)
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues
#else
test.Run();
#endif
}
// NNAPI EP's GEMM only works as A*B', add case only B is transposed
// Also test NNAPI EP's handling of non-1D bias (C of Gemm) which is broadcastable but not valid for NNAPI
TEST(GemmOpTest, GemmTransB_1) {
OpTester test("Gemm");
test.AddAttribute("transA", (int64_t)0);
test.AddAttribute("transB", (int64_t)1);
test.AddAttribute("alpha", 1.0f);
test.AddAttribute("beta", 1.0f);
test.AddInput<float>("A", {2, 4},
{1.0f, 2.0f, 3.0f, 4.0f,
-1.0f, -2.0f, -3.0f, -4.0f});
test.AddInput<float>("B", {3, 4}, std::vector<float>(12, 1.0f));
test.AddInput<float>("C", {2, 1}, std::vector<float>(2, 1.0f));
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});