From 18015f0f556ba790bc49bb77eee4f29c2b293745 Mon Sep 17 00:00:00 2001
From: wejoncy <wejoncy@163.com>
Date: Tue, 7 Mar 2023 12:04:42 +0800
Subject: [PATCH] use span

---
 .../builders/impl/LRN_op_builder.cc           |  2 +-
 .../builders/impl/base_op_builder.cc          |  2 +-
 .../builders/impl/conv_op_builder.cc          |  2 +-
 .../builders/impl/depthtospace_op_builder.cc  |  2 +-
 .../builders/impl/gemm_op_builder.cc          |  4 +-
 .../builders/impl/pool_op_builder.cc          |  2 +-
 .../builders/impl/resize_op_builder.cc        |  2 +-
 .../builders/impl/slice_op_builder.cc         |  2 +-
 .../builders/impl/softmax_op_builder.cc       |  2 +-
 .../nnapi_builtin/builders/model_builder.cc   | 12 +++---
 .../nnapi_builtin/builders/model_builder.h    |  8 ++--
 .../builders/op_builder_helpers.cc            |  6 +--
 .../providers/nnapi/nnapi_builtin/model.cc    |  4 +-
 .../nnapi/nnapi_builtin/nnapi_api_helper.cc   | 43 ++++++-------------
 .../nnapi/nnapi_builtin/nnapi_api_helper.h    | 29 +++++++++++--
 .../nnapi_builtin/nnapi_execution_provider.h  |  3 +-
 16 files changed, 65 insertions(+), 60 deletions(-)

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc
index cac1477d26..00bca40013 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc
@@ -45,7 +45,7 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
   const auto& operand_indices(model_builder.GetOperandIndices());
   const auto& operand_types(model_builder.GetOperandTypes());
   NodeAttrHelper helper(node_unit);
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
 
   auto input = node_unit.Inputs()[0].node_arg.Name();
   const auto& output = node_unit.Outputs()[0].node_arg.Name();
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc
index bb3befa886..7a18679329 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc
@@ -48,7 +48,7 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node
 
 Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
   OpSupportCheckParams params{
-      model_builder.GetEfficientFeatureLevel(),
+      model_builder.GetEffectiveFeatureLevel(),
       model_builder.UseNCHW(),
   };
   ORT_RETURN_IF_NOT(IsOpSupported(model_builder.GetInitializerTensors(), node_unit, params),
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc
index 9c64bf57af..5b8bbd338a 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc
@@ -245,7 +245,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   int32_t fuse_code = model_builder.FindActivation(node_unit);
   ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);
 
-  if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
+  if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
     ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
 
     // 1. NNAPI Grouped Conv does not support dilations
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc
index cd31abda2a..649f1e1cff 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc
@@ -39,7 +39,7 @@ Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   auto& shaper(model_builder.GetShaper());
   const auto& operand_indices(model_builder.GetOperandIndices());
   const auto& operand_types(model_builder.GetOperandTypes());
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
   NodeAttrHelper helper(node_unit);
 
   const auto& input = node_unit.Inputs()[0].node_arg.Name();
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc
index 5a2870e041..9b3003d472 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc
@@ -84,7 +84,7 @@ class GemmOpBuilder : public BaseOpBuilder {
 // Add operator related
 
 void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
-  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
+  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
     // no initializers to skip for batch matmul
     return;
   }
@@ -125,7 +125,7 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
 }
 
 Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
-  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
+  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
     return op_builder_helpers::BuildBatchMatMul(model_builder, node_unit);
   }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc
index cb0b70367d..c14568aacc 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc
@@ -146,7 +146,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   ADD_SCALAR_OPERAND(model_builder, input_indices, kernel_shape[0]);
   ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);
 
-  if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {  // nchw only supported on api 29+
+  if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {  // nchw only supported on api 29+
     ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
   }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc
index 1dd6833ce2..01e348caf1 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc
@@ -77,7 +77,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   const auto& initializers(model_builder.GetInitializerTensors());
   NodeAttrHelper helper(node_unit);
   const auto& inputs = node_unit.Inputs();
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
   const auto& output = node_unit.Outputs()[0].node_arg.Name();
 
   auto input = inputs[0].node_arg.Name();
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc
index 3e343b008a..903469d34e 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc
@@ -163,7 +163,7 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   if (std::all_of(compute_metadata.steps_.cbegin(),
                   compute_metadata.steps_.cend(),
                   [](int64_t i) { return i == 1; }) &&
-      model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
+      model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
     op_code = ANEURALNETWORKS_SLICE;
     // the nnapi size of the slice in this case is the output shape
     ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_));  // nnapi_sizes
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc
index 446a1eadce..b6007628fb 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc
@@ -62,7 +62,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
   auto& shaper(model_builder.GetShaper());
   const auto& operand_indices(model_builder.GetOperandIndices());
   const auto& operand_types(model_builder.GetOperandTypes());
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
   NodeAttrHelper helper(node_unit);
 
   auto input = node_unit.Inputs()[0].node_arg.Name();
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index c86daa7865..c2c993aa50 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -28,11 +28,11 @@ namespace onnxruntime {
 namespace nnapi {
 
 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
-                           const std::vector<DeviceWrapper>& nnapi_target_devices)
+                           gsl::span<const DeviceWrapper> nnapi_target_devices)
     : nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique<Model>(nnapi_handle)},
       shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices),
-      nnapi_target_device_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
-  nnapi_model_->nnapi_target_device_feature_level_ = nnapi_target_device_feature_level_;
+      nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
+  nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_;
 }
 
 // Scalar operand is copied into the model, no need to persist
@@ -367,10 +367,10 @@ Status ModelBuilder::AddNewNNAPIOperand(const OperandType& operand_type, uint32_
   index = next_index_++;
 
   if (operand_type.channelQuant) {
-    if (nnapi_target_device_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
+    if (nnapi_effective_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                              "Per-channel quantization is only supported on Android API level 29+,",
-                             " system NNAPI feature level: ", nnapi_target_device_feature_level_);
+                             " system NNAPI feature level: ", nnapi_effective_feature_level_);
     }
 
     RETURN_STATUS_ON_ERROR(nnapi_.ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
@@ -506,7 +506,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
       "on identifyInputsAndOutputs");
 
   // relax fp32tofp16 is only available on API 28+
-  if (use_fp16_ && nnapi_target_device_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
+  if (use_fp16_ && nnapi_effective_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
     RETURN_STATUS_ON_ERROR_WITH_NOTE(
         nnapi_.ANeuralNetworksModel_relaxComputationFloat32toFloat16(
             nnapi_model_->model_, true),
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
index 87179aa28a..ee660605ff 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@@ -31,7 +31,7 @@ class ModelBuilder {
   using Shape = Shaper::Shape;
 
   ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
-               const std::vector<DeviceWrapper>& nnapi_target_devices);
+               gsl::span<const DeviceWrapper> nnapi_target_devices);
 
   common::Status Compile(std::unique_ptr<Model>& model);
 
@@ -102,7 +102,7 @@ class ModelBuilder {
   // the given node must be in the underlying graph_viewer
   const NodeUnit& GetNodeUnit(const Node* node) const;
 
-  int32_t GetEfficientFeatureLevel() const { return nnapi_target_device_feature_level_; }
+  int32_t GetEffectiveFeatureLevel() const { return nnapi_effective_feature_level_; }
  private:
   const NnApi& nnapi_;
   const GraphViewer& graph_viewer_;
@@ -144,10 +144,10 @@ class ModelBuilder {
 
   std::unordered_set<std::string> unique_names_;
 
-  const std::vector<DeviceWrapper>& nnapi_target_devices_;
+  gsl::span<const DeviceWrapper> nnapi_target_devices_;
 
   // feature_level, to decide if we can run this node on NNAPI
-  int32_t nnapi_target_device_feature_level_ = 0;
+  int32_t nnapi_effective_feature_level_ = 0;
   // The number of nnapi operations in this model
   size_t num_nnapi_ops_ = 0;
   uint32_t next_index_ = 0;
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
index 023de5a217..9832338c0c 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
@@ -847,9 +847,9 @@ Status AddSqueezeOp(ModelBuilder& model_builder,
                     const std::string& node_name,
                     const std::string& input, const std::string& output,
                     std::vector<int32_t> axes) {
-  if (model_builder.GetEfficientFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
+  if (model_builder.GetEffectiveFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
     return ORT_MAKE_STATUS(
-        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEfficientFeatureLevel());
+        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEffectiveFeatureLevel());
   }
 
   auto& shaper(model_builder.GetShaper());
@@ -1013,7 +1013,7 @@ bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit
 
     // Now the dest node is Gemm/Matmul, we want to make sure it is supported
     OpSupportCheckParams params{
-        model_builder.GetEfficientFeatureLevel(),
+        model_builder.GetEffectiveFeatureLevel(),
         model_builder.UseNCHW(),
     };
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
index e93dbd3bde..d0e01f1f56 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
@@ -7,7 +7,7 @@
 #include "core/providers/common.h"
 #include "core/providers/nnapi/nnapi_builtin/builders/helper.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
-#include "nnapi_api_helper.h"
+#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
 
 #ifdef USENNAPISHAREDMEM
 #include <sys/mman.h>
@@ -90,7 +90,7 @@ size_t Model::GetMappedOutputIdx(const std::string& name) const {
 
 bool Model::SupportsDynamicOutputShape() const {
   // dynamic output shape is only supported on Android API level 29+ (ANEURALNETWORKS_FEATURE_LEVEL_3)
-  return nnapi_target_device_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
+  return nnapi_effective_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
 }
 
 Status Model::PrepareForExecution(std::unique_ptr<Execution>& execution) {
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc
index b13827cf72..a38c0a8528 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc
@@ -1,11 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "nnapi_api_helper.h"
-
 #include "core/common/inlined_containers_fwd.h"
 #include "core/providers/nnapi/nnapi_builtin/builders/model_builder.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
+#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
 #include "core/common/logging/logging.h"
 
 #ifdef __ANDROID__
@@ -15,23 +14,6 @@
 namespace onnxruntime {
 namespace nnapi {
 
-/**  How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
- *
- * NNAPI device feature level is closely related to NNAPI runtime feature level
-    (ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
-    (the most advanced NNAPI specification and features that the runtime implements).
-    An NNAPI device feature level is always less than or equal to the runtime feature level.
- *
- * On Android devices with API level 30 and older, the Android API level of the Android device
-    must be used for NNAPI runtime feature discovery.
-    Enum values in FeatureLevelCode from feature level 1 to 5 have their
-    corresponding Android API levels listed in their documentation,
-    and each such enum value equals the corresponding API level.
-    This allows using the Android API level as the feature level.
-    This mapping between enum value and Android API level does not exist for
-    feature levels after NNAPI feature level 5 and API levels after S (31).
-
- */
 static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
   int32_t runtime_level = static_cast<int32_t>(nnapi_handle.nnapi_runtime_feature_level);
 
@@ -51,12 +33,12 @@ static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
  * @return The max feature level support by a set of devices.
  *
  */
-static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_sets) {
+static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> devices) {
   int32_t target_feature_level = GetNNAPIRuntimeFeatureLevel(nnapi_handle);
 
   int64_t devices_feature_level = -1;
 
-  for (const auto &device : device_sets) {
+  for (const auto &device : devices) {
     // we want to op run on the device with the highest feature level so we can support more ops.
     // and we don't care which device runs them.
     devices_feature_level = std::max(device.feature_level, devices_feature_level);
@@ -75,7 +57,7 @@ static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const st
 // get all target devices which satisfy the target_device_option
 // we will always put CPU device at the end if cpu is enabled
 Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
-                        std::vector<DeviceWrapper>& device_sets) {
+                        InlinedVector<DeviceWrapper>& devices) {
   // GetTargetDevices is only supported when NNAPI runtime feature level >= ANEURALNETWORKS_FEATURE_LEVEL_3
   if (GetNNAPIRuntimeFeatureLevel(nnapi_handle) < ANEURALNETWORKS_FEATURE_LEVEL_3)
     return Status::OK();
@@ -110,9 +92,9 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
     }
 
     if (device_is_cpu) {
-      cpu_index = static_cast<int32_t>(device_sets.size());
+      cpu_index = static_cast<int32_t>(devices.size());
     }
-    device_sets.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
+    devices.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
   }
 
   // put CPU device at the end
@@ -120,17 +102,16 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
   // and nnapi internally skip the last device if it has already found one.
   // 2) we can easily exclude nnapi-reference when not strict excluding CPU.
   // 3) we can easily log the detail of how op was assigned on NNAPI devices which is helpful for debugging.
-  if (cpu_index != -1 && cpu_index != static_cast<int32_t>(device_sets.size()) - 1) {
-    std::swap(device_sets[device_sets.size() - 1], device_sets[cpu_index]);
+  if (cpu_index != -1 && cpu_index != static_cast<int32_t>(devices.size()) - 1) {
+    std::swap(devices[devices.size() - 1], devices[cpu_index]);
   }
 
   return Status::OK();
 }
 
-
-std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets) {
+std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices) {
   std::string nnapi_target_devices_detail;
-  for (const auto& device : device_sets) {
+  for (const auto& device : devices) {
     const auto device_detail = MakeString("[Name: [", device.name, "], Type [", device.type, "]], ");
     nnapi_target_devices_detail += device_detail + " ,";
   }
@@ -140,7 +121,7 @@ std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets)
 // Get devices-set first and then get the max feature level supported by all target devices
 // return -1 if failed.  It's not necessary to handle the error here, because level=-1 will refuse all ops
 int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option) {
-  std::vector<DeviceWrapper> nnapi_target_devices;
+  InlinedVector<DeviceWrapper> nnapi_target_devices;
   if (auto st = GetTargetDevices(nnapi_handle, target_device_option, nnapi_target_devices); !st.IsOK()) {
     LOGS_DEFAULT(WARNING) << "GetTargetDevices failed for :" << st.ErrorMessage();
     return -1;
@@ -150,7 +131,7 @@ int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_h
 
 // get the max feature level supported by all target devices, If no devices are specified,
 // it will return the runtime feature level
-int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles) {
+int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles) {
   return GetDeviceFeatureLevelInternal(nnapi_handle, device_handles);
 }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h
index c10ef7a95e..4501d56ad1 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h
@@ -34,12 +34,35 @@ enum class TargetDeviceOption : int8_t {
 };
 
 const char* const nnapi_cpu = ("nnapi-reference");
-int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles);
 
+/**  How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
+ *
+ * NNAPI device feature level is closely related to NNAPI runtime feature level
+    (ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
+    (the most advanced NNAPI specification and features that the runtime implements).
+    An NNAPI device feature level is always less than or equal to the runtime feature level.
+ *
+ * On Android devices with API level 30 and older, the Android API level of the Android device
+    must be used for NNAPI runtime feature discovery.
+    Enum values in FeatureLevelCode from feature level 1 to 5 have their
+    corresponding Android API levels listed in their documentation,
+    and each such enum value equals the corresponding API level.
+    This allows using the Android API level as the feature level.
+    This mapping between enum value and Android API level does not exist for
+    feature levels after NNAPI feature level 5 and API levels after S (31).
+
+ */
+int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles);
+
+/**
+ * Get all hardware accelerators by different deviceoption.
+ *
+*/
 Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
-                        std::vector<DeviceWrapper>& nnapi_target_devices);
+                        InlinedVector<DeviceWrapper>& nnapi_target_devices);
+
 int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option);
 
-std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets);
+std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices);
 }  // namespace nnapi
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
index fec3fca519..0eccffe13a 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include "core/common/inlined_containers_fwd.h"
 #include "core/common/optional.h"
 #include "core/framework/execution_provider.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
@@ -45,7 +46,7 @@ class NnapiExecutionProvider : public IExecutionProvider {
 
   // nnapi handle for either Android NNAPI or x86 hooker.
   const nnapi::NnApi* nnapi_handle_ = nullptr;
-  std::vector<nnapi::DeviceWrapper> nnapi_target_devices_;
+  InlinedVector<nnapi::DeviceWrapper> nnapi_target_devices_;
   nnapi::TargetDeviceOption target_device_option_;
 };
 }  // namespace onnxruntime