From e3ff4a6bfaf1f8a739317fae006f736bd487bacc Mon Sep 17 00:00:00 2001
From: Edward Chen <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 20 Apr 2022 08:53:29 -0700
Subject: [PATCH] Fix NNAPI EP error when handling external node adjacent to
 partition. (#11233)

Move a check for a graph output (for the partition) prior to iterating the downstream nodes to avoid trying to get a NodeUnit for a node that is outside of the partition.
---
 .../nnapi_builtin/builders/model_builder.cc   | 62 +++++++++++--------
 .../nnapi_builtin/builders/model_builder.h    |  1 +
 .../test/providers/nnapi/nnapi_basic_test.cc  | 16 +++++
 .../testdata/required_ops_and_types.config    |  5 ++
 4 files changed, 58 insertions(+), 26 deletions(-)
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index 5757470e7c..5abe9b6041 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -146,9 +146,9 @@ void ModelBuilder::PreprocessActivations() {
 }
 
 const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const {
-  // In theory, if node_unit_map_ is generated correctly, see PreprocessNodeUnits(), a NodeUnit can be
-  // found for any single node in the graph_viewer_, unless the given node is not from graph_viewer_
-  return *node_unit_map_.at(node);
+  const auto node_unit_it = node_unit_map_.find(node);
+  ORT_ENFORCE(node_unit_it != node_unit_map_.end(), "Node does not have corresponding NodeUnit.");
+  return *node_unit_it->second;
 }
 
 void ModelBuilder::PreprocessNodeUnits() {
@@ -620,13 +620,12 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
 }
 
 int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {
-  int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE;
   const auto& output_nodes = node_unit.GetOutputNodes();
   if (node_unit.GetOutputNodes().size() != 1) {
     LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
                           << "] type [" << node_unit.OpType()
                           << "], with " << output_nodes.size() << " output nodes";
-    return fuse_code;
+    return ANEURALNETWORKS_FUSED_NONE;
   }
 
   const auto& outputs = node_unit.Outputs();
@@ -634,42 +633,53 @@ int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {
     LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
                           << "] type [" << node_unit.OpType()
                           << "], with " << outputs.size() << " outputs";
-    return fuse_code;
+    return ANEURALNETWORKS_FUSED_NONE;
   }
 
   const NodeArg& output = outputs[0].node_arg;
-  const auto& output_node = *output_nodes[0];
+
+  // if output is a graph output, will add activation separately
+  if (const auto& graph_outputs = graph_viewer_.GetOutputs();
+      std::find(graph_outputs.cbegin(), graph_outputs.cend(), &output) != graph_outputs.cend()) {
+    return ANEURALNETWORKS_FUSED_NONE;
+  }
 
   // TODO, add support of activation fusion for quantized node group (qdq or qlinear)
   // We do not support activation fusion for quantized operators for now
   // (usually the activations are fused already in the quantization)
-  auto quant_op_type = GetQuantizedOpType(node_unit);
-  if (quant_op_type != QuantizedOpType::Unknown)
-    return fuse_code;
+  if (auto quant_op_type = GetQuantizedOpType(node_unit);
+      quant_op_type != QuantizedOpType::Unknown) {
+    return ANEURALNETWORKS_FUSED_NONE;
+  }
 
+  const auto& output_node = *output_nodes[0];
+  int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE;
+  bool fuse_code_assigned_from_activation = false;
   for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) {
     const auto& dst_node = it->GetNode();
     const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()];
-    const auto& dst_node_unit = GetNodeUnit(&dst_node);
-    if (Contains(activation_node_units_, &dst_node_unit)) {
-      if (&output == dst_input) {
-        fuse_code = activation_node_units_.at(&dst_node_unit);
-      }
-    } else {
-      // if there is any other non-relu node using the output
-      // will add relu separately
-      if (&output == dst_input)
-        return ANEURALNETWORKS_FUSED_NONE;
-    }
-  }
 
-  // if output is a graph output, will add activation separately
-  if (fuse_code != ANEURALNETWORKS_FUSED_NONE) {
-    const auto& graph_outputs = graph_viewer_.GetOutputs();
-    if (std::find(graph_outputs.cbegin(), graph_outputs.cend(), &output) != graph_outputs.cend()) {
+    if (&output != dst_input) {
+      continue;
+    }
+
+    const auto& dst_node_unit = GetNodeUnit(&dst_node);
+    auto activation_it = activation_node_units_.find(&dst_node_unit);
+    if (activation_it == activation_node_units_.end()) {
+      // output node is not a fusable activation
       return ANEURALNETWORKS_FUSED_NONE;
     }
 
+    if (fuse_code_assigned_from_activation) {
+      // don't overwrite a previously assigned fuse code, just don't fuse
+      return ANEURALNETWORKS_FUSED_NONE;
+    }
+
+    fuse_code = activation_it->second;
+    fuse_code_assigned_from_activation = true;
+  }
+
+  if (fuse_code != ANEURALNETWORKS_FUSED_NONE) {
     LOGS_DEFAULT(VERBOSE) << "Node [" << node_unit.Name() << "] type [" << node_unit.OpType()
                           << "], fused the output [" << output.Name() << "]";
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
index 76178f440e..1092f91772 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@@ -109,6 +109,7 @@ class ModelBuilder {
   const GraphViewer& GetGraphViewer() const { return graph_viewer_; }
 
   // Get the NodeUnit which contains the given node
+  // the given node must be in the underlying graph_viewer
   const NodeUnit& GetNodeUnit(const Node* node) const;
 
  private:
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index dd485636a0..45ca18cf78 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -492,6 +492,22 @@ TEST(NnapiExecutionProviderTest, TestOrtFormatModel) {
 #endif
 }
 
+// test that NNAPI EP can process an activation node that is outside of its partition
+TEST(NnapiExecutionProviderTest, ActivationOutsideOfPartition) {
+  // model starts with Conv -> Relu
+  constexpr auto* model_file_name = ORT_TSTR("testdata/mnist.level1_opt.ort");
+  // stop NNAPI partitioning at Relu so NNAPI EP only takes first Conv
+  const auto nnapi_partitioning_stop_ops = "Relu";
+  SessionOptions so;
+  InferenceSessionWrapper session_object{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(
+      std::make_unique<NnapiExecutionProvider>(0, nnapi_partitioning_stop_ops)));
+  ASSERT_STATUS_OK(session_object.Load(model_file_name));
+  ASSERT_STATUS_OK(session_object.Initialize());
+  // expect one NNAPI partition
+  ASSERT_EQ(CountAssignedNodes(session_object.GetGraph(), kNnapiExecutionProvider), 1);
+}
+
 }  // namespace test
 }  // namespace onnxruntime
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
diff --git a/onnxruntime/test/testdata/required_ops_and_types.config b/onnxruntime/test/testdata/required_ops_and_types.config
index 17687906d7..3c2e3e9a9e 100644
--- a/onnxruntime/test/testdata/required_ops_and_types.config
+++ b/onnxruntime/test/testdata/required_ops_and_types.config
@@ -7,3 +7,8 @@ ai.onnx;8;MaxPool{"inputs": {"0": ["float"]}},Sum{"inputs": {"0": ["float"]}}
 ai.onnx;9;Cast{"inputs": {"0": ["float"]}, "outputs": {"0": ["bool"]}}
 ai.onnx;11;ArgMax{"inputs": {"0": ["float"]}},If,Loop
 ai.onnx.ml;1;ArrayFeatureExtractor,LinearClassifier,Normalizer,ZipMap
+
+# Note: The lines below were added manually.
+# TODO find a way to avoid manual modification of this file
+# also include Transpose added by layout transformation
+ai.onnx;1;Transpose