[NNAPI QDQ] Add QDQ Resize support (#10442)

* Add NNAPI support of QDQ Resize * minor update to UT * fix build break * fix android UT failure * address cr comments
2026-07-08 17:17:15 +00:00 · 2022-02-01 18:14:58 -08:00 · 2022-02-01 18:14:58 -08:00 · 7318361645
commit 7318361645
parent 91b8ad5ee7
9 changed files with 146 additions and 36 deletions
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@ -68,6 +68,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
  } else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
    if (op_type == "Conv")
      return QuantizedOpType::QDQConv;
+    else if (op_type == "Resize")
+      return QuantizedOpType::QDQResize;
  } else {
    // throw?
  }
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@ -86,6 +86,7 @@ enum class QuantizedOpType : uint8_t {
  // QLinearMul,
  // QLinearReduceMean,
  QDQConv,
+  QDQResize,
  // TODO, add other QDQ NodeUnit types
 };

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@ -2258,10 +2258,20 @@ class ResizeOpBuilder : public BaseOpBuilder {

 private:
  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };

+/* static */ bool ResizeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
+  return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
+}
+
 void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
  const auto& inputs = node_unit.Inputs();
+  if (IsQuantizedOp(node_unit)) {
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param);               // x_scale, x_zp
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param);  // y_scale, y_zp
+  }
+
  // We don't really use ROI here, so add them to skipped list
  model_builder.AddInitializerToSkip(inputs[1].node_arg.Name());  // ROI

@ -2296,6 +2306,15 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
    }
  }

+  // Check if the quantization scale and ZP is correct
+  if (IsQuantizedOp(node_unit)) {
+    float x_scale = 0.0f;
+    int32_t x_zero_point = 0;
+    ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
+        initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
+    ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
+  }
+
  bool is_linear_resize = helper.Get("mode", "nearest") == "linear";

  int32_t operationCode = is_linear_resize ? ANEURALNETWORKS_RESIZE_BILINEAR
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@ -1466,8 +1466,14 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
  int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }

  bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
+  bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };

+/* static */ bool ResizeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
+  return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
+}
+
 bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                               const OpSupportCheckParams& params) const {
  Shape input_shape;
@ -1587,6 +1593,34 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
      }
    }
  }
+
+  if (IsQuantizedOp(node_unit)) {
+    // For QDQResize, we only support uint8 output now
+    // TODO, add int8 support to NNAPI, and maybe move all the output type check into a virtual function
+    // similar to HasSupportedInputsImpl
+    int32_t output_type;
+    if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
+      return false;
+
+    if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+      LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
+                            << "] is not supported for now";
+      return false;
+    }
+
+    // Check input scales and ZPs
+    if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
+      return false;
+    if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
+      return false;
+
+    // Check output scale and ZP
+    if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
+      return false;
+    if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
+      return false;
+  }
+
  return true;
 }

--- a/onnxruntime/test/optimizer/qdq_test_utils.cc
+++ b/onnxruntime/test/optimizer/qdq_test_utils.cc
@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "qdq_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+GetQDQTestCaseFn BuildQDQResizeTestCase(
+    const std::vector<int64_t>& input_shape,
+    const std::vector<int64_t>& sizes_data,
+    const std::string& mode,
+    const std::string& coordinate_transformation_mode) {
+  return [input_shape, sizes_data, mode, coordinate_transformation_mode](ModelTestBuilder& builder) {
+    auto* input1_arg = builder.MakeInput<uint8_t>(input_shape,
+                                                  std::numeric_limits<uint8_t>::min(),
+                                                  std::numeric_limits<uint8_t>::max());
+    auto* roi = builder.MakeInitializer<float>({0}, {});
+    auto* scales = builder.MakeInitializer<float>({0}, {});
+    auto* sizes = builder.Make1DInitializer<int64_t>(sizes_data);
+    auto* output_arg = builder.MakeOutput();
+
+    // add DQ
+    auto* dq_output = builder.MakeIntermediate();
+    builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
+
+    // add Resize
+    auto* resize_output = builder.MakeIntermediate();
+    Node& resize_node = builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
+
+    resize_node.AddAttribute("mode", mode);
+    resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode);
+
+    // add Q
+    builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
+  };
+}
+
+}  // namespace test
+}  // namespace onnxruntime
--- a/onnxruntime/test/optimizer/qdq_test_utils.h
+++ b/onnxruntime/test/optimizer/qdq_test_utils.h
@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.

+#pragma once
+
 #include "graph_transform_test_builder.h"

 #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
@ -12,7 +14,7 @@
 namespace onnxruntime {
 namespace test {

-using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
+using GetQDQTestCaseFn = std::function<void(ModelTestBuilder& builder)>;

 template <typename T>
 typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
@ -24,10 +26,8 @@ AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp =
  return dq_output;
 }

-// TODO: for now it just builds a conv qdq graph.
-// can be modified and made it shared among different qdq test graphs associated with other operators
 template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
-GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
+GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
  return [input_shape, weights_shape](ModelTestBuilder& builder) {
    auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
    auto* output_arg = builder.MakeOutput();
@ -78,5 +78,10 @@ GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shap
  };
 }

+GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape,
+                                        const std::vector<int64_t>& sizes_data,
+                                        const std::string& mode = "nearest",
+                                        const std::string& coordinate_transformation_mode = "half_pixel");
+
 }  // namespace test
 }  // namespace onnxruntime
--- a/onnxruntime/test/optimizer/qdq_transformer_test.cc
+++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@ -633,27 +633,6 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
 TEST(QDQTransformerTests, Resize) {
  auto test_case = [&](const std::vector<int64_t>& input1_shape,
                       const std::vector<int64_t>& sizes_shape) {
-    auto build_test_case = [&](ModelTestBuilder& builder) {
-      auto* input1_arg = builder.MakeInput<uint8_t>(input1_shape,
-                                                    std::numeric_limits<uint8_t>::min(),
-                                                    std::numeric_limits<uint8_t>::max());
-      auto* roi = builder.MakeInitializer<float>({0}, {});
-      auto* scales = builder.MakeInitializer<float>({0}, {});
-      auto* sizes = builder.MakeInitializer<int64_t>(sizes_shape, 1, 16);
-      auto* output_arg = builder.MakeOutput();
-
-      // add DQ
-      auto* dq_output = builder.MakeIntermediate();
-      builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
-
-      // add Resize
-      auto* resize_output = builder.MakeIntermediate();
-      builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
-
-      // add Q
-      builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
-    };
-
    auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
      auto op_to_count = CountOpsInGraph(session.GetGraph());
      EXPECT_EQ(op_to_count["Resize"], 1);
@ -661,12 +640,14 @@ TEST(QDQTransformerTests, Resize) {
      EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
    };

-    TransformerTester(build_test_case, check_matmul_graph,
+    TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
+                      check_matmul_graph,
                      TransformerLevel::Level1,
                      TransformerLevel::Level2);
  };

-  test_case({2, 13, 12, 37}, {4});
+  RandomValueGenerator rand_gen{optional<RandomValueGenerator::RandomSeedType>{2345}};
+  test_case({2, 13, 12, 37}, rand_gen.Uniform<int64_t>(std::vector<int64_t>{4}, 1, 16));
 }

 TEST(QDQTransformerTests, Resize_No_Fusion) {
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@ -239,14 +239,10 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
      << "No node should be taken by the NNAPI EP";
 }

-#if defined(__ANDROID__)
-TEST(NnapiExecutionProviderTest, TestQDQModel) {
-  onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
+static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
+  onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
  Graph& graph = model.MainGraph();
  ModelTestBuilder helper(graph);
-
-  auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
-                                                                                  {1, 1, 3, 3} /*weights_shape*/);
  build_test_case(helper);
  helper.SetGraphOutputs();
  ASSERT_STATUS_OK(model.MainGraph().Resolve());
@ -255,13 +251,41 @@ TEST(NnapiExecutionProviderTest, TestQDQModel) {
  std::string model_data;
  model.ToProto().SerializeToString(&model_data);

+#if defined(__ANDROID__)
  RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
                            std::make_unique<NnapiExecutionProvider>(0),
                            helper.feeds_);
-
-  // TODO: can add test load only verfication here later
+#else
+  // test load only
+  SessionOptions so;
+  InferenceSessionWrapper session_object{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique<NnapiExecutionProvider>(0)));
+  ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
+  ASSERT_STATUS_OK(session_object.Initialize());
+  ASSERT_GT(CountAssignedNodes(session_object.GetGraph(), kNnapiExecutionProvider), 0)
+      << "Some nodes should have been taken by the NNAPI EP";
+#endif
+}
+
+TEST(NnapiExecutionProviderTest, TestQDQConv) {
+  RunQDQModelTest(BuildQDQConvTestCase<uint8_t /* InputType */,
+                                       uint8_t /* WeightType */,
+                                       int32_t /* BiasType */,
+                                       uint8_t /* OutputType */>(
+                      {1, 1, 5, 5} /*input_shape*/,
+                      {1, 1, 3, 3} /*weights_shape*/),
+                  "nnapi_qdq_test_graph_conv");
+}
+
+TEST(NnapiExecutionProviderTest, TestQDQResize) {
+  // NNAPI EP does not support the default setting of Resize Op
+  // Use bi-linear and asymmetric for NNAPI EP only
+  RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
+                                         {1, 3, 32, 32} /* sizes_data */,
+                                         "linear" /* mode */,
+                                         "asymmetric" /* coordinate_transformation_mode */),
+                  "nnapi_qdq_test_graph_resize");
 }
-#endif  // defined(__ANDROID__)

 #endif  // !(ORT_MINIMAL_BUILD)

--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@ -35,6 +35,10 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
        EXPECT_THAT(ltensor.DataAsSpan<int64_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<int64_t>()))
            << " mismatch for " << output_names[i];
        break;
+      case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
+        EXPECT_THAT(ltensor.DataAsSpan<uint8_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<uint8_t>()))
+            << " mismatch for " << output_names[i];
+        break;
      case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
        constexpr float abs_err = 1e-5f;