[NNAPI QDQ] Add QDQ Resize support (#10442)

* Add NNAPI support of QDQ Resize

* minor update to UT

* fix build break

* fix android UT failure

* address cr comments
This commit is contained in:
Guoyu Wang 2022-02-01 18:14:58 -08:00 committed by GitHub
parent 91b8ad5ee7
commit 7318361645
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 146 additions and 36 deletions

View file

@ -68,6 +68,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
} else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
if (op_type == "Conv")
return QuantizedOpType::QDQConv;
else if (op_type == "Resize")
return QuantizedOpType::QDQResize;
} else {
// throw?
}

View file

@ -86,6 +86,7 @@ enum class QuantizedOpType : uint8_t {
// QLinearMul,
// QLinearReduceMean,
QDQConv,
QDQResize,
// TODO, add other QDQ NodeUnit types
};

View file

@ -2258,10 +2258,20 @@ class ResizeOpBuilder : public BaseOpBuilder {
private:
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
};
/* static */ bool ResizeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
}
void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
const auto& inputs = node_unit.Inputs();
if (IsQuantizedOp(node_unit)) {
AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // x_scale, x_zp
AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp
}
// We don't really use ROI here, so add them to skipped list
model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // ROI
@ -2296,6 +2306,15 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
}
}
// Check if the quantization scale and ZP is correct
if (IsQuantizedOp(node_unit)) {
float x_scale = 0.0f;
int32_t x_zero_point = 0;
ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
}
bool is_linear_resize = helper.Get("mode", "nearest") == "linear";
int32_t operationCode = is_linear_resize ? ANEURALNETWORKS_RESIZE_BILINEAR

View file

@ -1466,8 +1466,14 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
};
/* static */ bool ResizeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
}
bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const OpSupportCheckParams& params) const {
Shape input_shape;
@ -1587,6 +1593,34 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
}
}
}
if (IsQuantizedOp(node_unit)) {
// For QDQResize, we only support uint8 output now
// TODO, add int8 support to NNAPI, and maybe move all the output type check into a virtual function
// similar to HasSupportedInputsImpl
int32_t output_type;
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
return false;
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
<< "] is not supported for now";
return false;
}
// Check input scales and ZPs
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
return false;
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
return false;
// Check output scale and ZP
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
return false;
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
return false;
}
return true;
}

View file

@ -0,0 +1,40 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "qdq_test_utils.h"
namespace onnxruntime {
namespace test {
GetQDQTestCaseFn BuildQDQResizeTestCase(
const std::vector<int64_t>& input_shape,
const std::vector<int64_t>& sizes_data,
const std::string& mode,
const std::string& coordinate_transformation_mode) {
return [input_shape, sizes_data, mode, coordinate_transformation_mode](ModelTestBuilder& builder) {
auto* input1_arg = builder.MakeInput<uint8_t>(input_shape,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
auto* roi = builder.MakeInitializer<float>({0}, {});
auto* scales = builder.MakeInitializer<float>({0}, {});
auto* sizes = builder.Make1DInitializer<int64_t>(sizes_data);
auto* output_arg = builder.MakeOutput();
// add DQ
auto* dq_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
// add Resize
auto* resize_output = builder.MakeIntermediate();
Node& resize_node = builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
resize_node.AddAttribute("mode", mode);
resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode);
// add Q
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
};
}
} // namespace test
} // namespace onnxruntime

View file

@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "graph_transform_test_builder.h"
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
@ -12,7 +14,7 @@
namespace onnxruntime {
namespace test {
using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
using GetQDQTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
template <typename T>
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
@ -24,10 +26,8 @@ AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp =
return dq_output;
}
// TODO: for now it just builds a conv qdq graph.
// can be modified and made it shared among different qdq test graphs associated with other operators
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
auto* output_arg = builder.MakeOutput();
@ -78,5 +78,10 @@ GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shap
};
}
GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape,
const std::vector<int64_t>& sizes_data,
const std::string& mode = "nearest",
const std::string& coordinate_transformation_mode = "half_pixel");
} // namespace test
} // namespace onnxruntime

View file

@ -633,27 +633,6 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
TEST(QDQTransformerTests, Resize) {
auto test_case = [&](const std::vector<int64_t>& input1_shape,
const std::vector<int64_t>& sizes_shape) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input1_arg = builder.MakeInput<uint8_t>(input1_shape,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
auto* roi = builder.MakeInitializer<float>({0}, {});
auto* scales = builder.MakeInitializer<float>({0}, {});
auto* sizes = builder.MakeInitializer<int64_t>(sizes_shape, 1, 16);
auto* output_arg = builder.MakeOutput();
// add DQ
auto* dq_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
// add Resize
auto* resize_output = builder.MakeIntermediate();
builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
// add Q
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
};
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
auto op_to_count = CountOpsInGraph(session.GetGraph());
EXPECT_EQ(op_to_count["Resize"], 1);
@ -661,12 +640,14 @@ TEST(QDQTransformerTests, Resize) {
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
};
TransformerTester(build_test_case, check_matmul_graph,
TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
check_matmul_graph,
TransformerLevel::Level1,
TransformerLevel::Level2);
};
test_case({2, 13, 12, 37}, {4});
RandomValueGenerator rand_gen{optional<RandomValueGenerator::RandomSeedType>{2345}};
test_case({2, 13, 12, 37}, rand_gen.Uniform<int64_t>(std::vector<int64_t>{4}, 1, 16));
}
TEST(QDQTransformerTests, Resize_No_Fusion) {

View file

@ -239,14 +239,10 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
<< "No node should be taken by the NNAPI EP";
}
#if defined(__ANDROID__)
TEST(NnapiExecutionProviderTest, TestQDQModel) {
onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
Graph& graph = model.MainGraph();
ModelTestBuilder helper(graph);
auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
{1, 1, 3, 3} /*weights_shape*/);
build_test_case(helper);
helper.SetGraphOutputs();
ASSERT_STATUS_OK(model.MainGraph().Resolve());
@ -255,13 +251,41 @@ TEST(NnapiExecutionProviderTest, TestQDQModel) {
std::string model_data;
model.ToProto().SerializeToString(&model_data);
#if defined(__ANDROID__)
RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
std::make_unique<NnapiExecutionProvider>(0),
helper.feeds_);
// TODO: can add test load only verfication here later
#else
// test load only
SessionOptions so;
InferenceSessionWrapper session_object{so, GetEnvironment()};
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique<NnapiExecutionProvider>(0)));
ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
ASSERT_STATUS_OK(session_object.Initialize());
ASSERT_GT(CountAssignedNodes(session_object.GetGraph(), kNnapiExecutionProvider), 0)
<< "Some nodes should have been taken by the NNAPI EP";
#endif
}
TEST(NnapiExecutionProviderTest, TestQDQConv) {
RunQDQModelTest(BuildQDQConvTestCase<uint8_t /* InputType */,
uint8_t /* WeightType */,
int32_t /* BiasType */,
uint8_t /* OutputType */>(
{1, 1, 5, 5} /*input_shape*/,
{1, 1, 3, 3} /*weights_shape*/),
"nnapi_qdq_test_graph_conv");
}
TEST(NnapiExecutionProviderTest, TestQDQResize) {
// NNAPI EP does not support the default setting of Resize Op
// Use bi-linear and asymmetric for NNAPI EP only
RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
{1, 3, 32, 32} /* sizes_data */,
"linear" /* mode */,
"asymmetric" /* coordinate_transformation_mode */),
"nnapi_qdq_test_graph_resize");
}
#endif // defined(__ANDROID__)
#endif // !(ORT_MINIMAL_BUILD)

View file

@ -35,6 +35,10 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
EXPECT_THAT(ltensor.DataAsSpan<int64_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<int64_t>()))
<< " mismatch for " << output_names[i];
break;
case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
EXPECT_THAT(ltensor.DataAsSpan<uint8_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<uint8_t>()))
<< " mismatch for " << output_names[i];
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
constexpr float abs_err = 1e-5f;