mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-23 22:13:38 +00:00
[NNAPI QDQ] Add QDQ Resize support (#10442)
* Add NNAPI support of QDQ Resize * minor update to UT * fix build break * fix android UT failure * address cr comments
This commit is contained in:
parent
91b8ad5ee7
commit
7318361645
9 changed files with 146 additions and 36 deletions
|
|
@ -68,6 +68,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
|
|||
} else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
|
||||
if (op_type == "Conv")
|
||||
return QuantizedOpType::QDQConv;
|
||||
else if (op_type == "Resize")
|
||||
return QuantizedOpType::QDQResize;
|
||||
} else {
|
||||
// throw?
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ enum class QuantizedOpType : uint8_t {
|
|||
// QLinearMul,
|
||||
// QLinearReduceMean,
|
||||
QDQConv,
|
||||
QDQResize,
|
||||
// TODO, add other QDQ NodeUnit types
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -2258,10 +2258,20 @@ class ResizeOpBuilder : public BaseOpBuilder {
|
|||
|
||||
private:
|
||||
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
|
||||
};
|
||||
|
||||
/* static */ bool ResizeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
|
||||
return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
|
||||
}
|
||||
|
||||
void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
if (IsQuantizedOp(node_unit)) {
|
||||
AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // x_scale, x_zp
|
||||
AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp
|
||||
}
|
||||
|
||||
// We don't really use ROI here, so add them to skipped list
|
||||
model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // ROI
|
||||
|
||||
|
|
@ -2296,6 +2306,15 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
}
|
||||
}
|
||||
|
||||
// Check if the quantization scale and ZP is correct
|
||||
if (IsQuantizedOp(node_unit)) {
|
||||
float x_scale = 0.0f;
|
||||
int32_t x_zero_point = 0;
|
||||
ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
|
||||
initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
|
||||
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
|
||||
}
|
||||
|
||||
bool is_linear_resize = helper.Get("mode", "nearest") == "linear";
|
||||
|
||||
int32_t operationCode = is_linear_resize ? ANEURALNETWORKS_RESIZE_BILINEAR
|
||||
|
|
|
|||
|
|
@ -1466,8 +1466,14 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
|
|||
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
|
||||
};
|
||||
|
||||
/* static */ bool ResizeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
|
||||
return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
|
||||
}
|
||||
|
||||
bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
Shape input_shape;
|
||||
|
|
@ -1587,6 +1593,34 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IsQuantizedOp(node_unit)) {
|
||||
// For QDQResize, we only support uint8 output now
|
||||
// TODO, add int8 support to NNAPI, and maybe move all the output type check into a virtual function
|
||||
// similar to HasSupportedInputsImpl
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
40
onnxruntime/test/optimizer/qdq_test_utils.cc
Normal file
40
onnxruntime/test/optimizer/qdq_test_utils.cc
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "qdq_test_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
GetQDQTestCaseFn BuildQDQResizeTestCase(
|
||||
const std::vector<int64_t>& input_shape,
|
||||
const std::vector<int64_t>& sizes_data,
|
||||
const std::string& mode,
|
||||
const std::string& coordinate_transformation_mode) {
|
||||
return [input_shape, sizes_data, mode, coordinate_transformation_mode](ModelTestBuilder& builder) {
|
||||
auto* input1_arg = builder.MakeInput<uint8_t>(input_shape,
|
||||
std::numeric_limits<uint8_t>::min(),
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
auto* roi = builder.MakeInitializer<float>({0}, {});
|
||||
auto* scales = builder.MakeInitializer<float>({0}, {});
|
||||
auto* sizes = builder.Make1DInitializer<int64_t>(sizes_data);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
// add DQ
|
||||
auto* dq_output = builder.MakeIntermediate();
|
||||
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
|
||||
|
||||
// add Resize
|
||||
auto* resize_output = builder.MakeIntermediate();
|
||||
Node& resize_node = builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
|
||||
|
||||
resize_node.AddAttribute("mode", mode);
|
||||
resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode);
|
||||
|
||||
// add Q
|
||||
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "graph_transform_test_builder.h"
|
||||
|
||||
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
|
||||
|
|
@ -12,7 +14,7 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
|
||||
using GetQDQTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
|
||||
|
|
@ -24,10 +26,8 @@ AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp =
|
|||
return dq_output;
|
||||
}
|
||||
|
||||
// TODO: for now it just builds a conv qdq graph.
|
||||
// can be modified and made it shared among different qdq test graphs associated with other operators
|
||||
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
|
||||
GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
|
||||
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
|
||||
return [input_shape, weights_shape](ModelTestBuilder& builder) {
|
||||
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
|
@ -78,5 +78,10 @@ GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shap
|
|||
};
|
||||
}
|
||||
|
||||
GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape,
|
||||
const std::vector<int64_t>& sizes_data,
|
||||
const std::string& mode = "nearest",
|
||||
const std::string& coordinate_transformation_mode = "half_pixel");
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -633,27 +633,6 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
|
|||
TEST(QDQTransformerTests, Resize) {
|
||||
auto test_case = [&](const std::vector<int64_t>& input1_shape,
|
||||
const std::vector<int64_t>& sizes_shape) {
|
||||
auto build_test_case = [&](ModelTestBuilder& builder) {
|
||||
auto* input1_arg = builder.MakeInput<uint8_t>(input1_shape,
|
||||
std::numeric_limits<uint8_t>::min(),
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
auto* roi = builder.MakeInitializer<float>({0}, {});
|
||||
auto* scales = builder.MakeInitializer<float>({0}, {});
|
||||
auto* sizes = builder.MakeInitializer<int64_t>(sizes_shape, 1, 16);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
// add DQ
|
||||
auto* dq_output = builder.MakeIntermediate();
|
||||
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
|
||||
|
||||
// add Resize
|
||||
auto* resize_output = builder.MakeIntermediate();
|
||||
builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
|
||||
|
||||
// add Q
|
||||
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Resize"], 1);
|
||||
|
|
@ -661,12 +640,14 @@ TEST(QDQTransformerTests, Resize) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph,
|
||||
TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
|
||||
check_matmul_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({2, 13, 12, 37}, {4});
|
||||
RandomValueGenerator rand_gen{optional<RandomValueGenerator::RandomSeedType>{2345}};
|
||||
test_case({2, 13, 12, 37}, rand_gen.Uniform<int64_t>(std::vector<int64_t>{4}, 1, 16));
|
||||
}
|
||||
|
||||
TEST(QDQTransformerTests, Resize_No_Fusion) {
|
||||
|
|
|
|||
|
|
@ -239,14 +239,10 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
|
|||
<< "No node should be taken by the NNAPI EP";
|
||||
}
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
TEST(NnapiExecutionProviderTest, TestQDQModel) {
|
||||
onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
|
||||
static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
|
||||
onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
|
||||
Graph& graph = model.MainGraph();
|
||||
ModelTestBuilder helper(graph);
|
||||
|
||||
auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
|
||||
{1, 1, 3, 3} /*weights_shape*/);
|
||||
build_test_case(helper);
|
||||
helper.SetGraphOutputs();
|
||||
ASSERT_STATUS_OK(model.MainGraph().Resolve());
|
||||
|
|
@ -255,13 +251,41 @@ TEST(NnapiExecutionProviderTest, TestQDQModel) {
|
|||
std::string model_data;
|
||||
model.ToProto().SerializeToString(&model_data);
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
|
||||
std::make_unique<NnapiExecutionProvider>(0),
|
||||
helper.feeds_);
|
||||
|
||||
// TODO: can add test load only verfication here later
|
||||
#else
|
||||
// test load only
|
||||
SessionOptions so;
|
||||
InferenceSessionWrapper session_object{so, GetEnvironment()};
|
||||
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique<NnapiExecutionProvider>(0)));
|
||||
ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
|
||||
ASSERT_STATUS_OK(session_object.Initialize());
|
||||
ASSERT_GT(CountAssignedNodes(session_object.GetGraph(), kNnapiExecutionProvider), 0)
|
||||
<< "Some nodes should have been taken by the NNAPI EP";
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQConv) {
|
||||
RunQDQModelTest(BuildQDQConvTestCase<uint8_t /* InputType */,
|
||||
uint8_t /* WeightType */,
|
||||
int32_t /* BiasType */,
|
||||
uint8_t /* OutputType */>(
|
||||
{1, 1, 5, 5} /*input_shape*/,
|
||||
{1, 1, 3, 3} /*weights_shape*/),
|
||||
"nnapi_qdq_test_graph_conv");
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQResize) {
|
||||
// NNAPI EP does not support the default setting of Resize Op
|
||||
// Use bi-linear and asymmetric for NNAPI EP only
|
||||
RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
|
||||
{1, 3, 32, 32} /* sizes_data */,
|
||||
"linear" /* mode */,
|
||||
"asymmetric" /* coordinate_transformation_mode */),
|
||||
"nnapi_qdq_test_graph_resize");
|
||||
}
|
||||
#endif // defined(__ANDROID__)
|
||||
|
||||
#endif // !(ORT_MINIMAL_BUILD)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
|
|||
EXPECT_THAT(ltensor.DataAsSpan<int64_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<int64_t>()))
|
||||
<< " mismatch for " << output_names[i];
|
||||
break;
|
||||
case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
|
||||
EXPECT_THAT(ltensor.DataAsSpan<uint8_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<uint8_t>()))
|
||||
<< " mismatch for " << output_names[i];
|
||||
break;
|
||||
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
|
||||
constexpr float abs_err = 1e-5f;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue