use span

2026-07-04 04:07:22 +00:00 · 2023-03-07 12:04:42 +08:00 · 2023-03-07 12:04:42 +08:00 · 18015f0f55
commit 18015f0f55
parent 4ca84ac303
16 changed files with 65 additions and 60 deletions
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc
@ -45,7 +45,7 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No
  const auto& operand_indices(model_builder.GetOperandIndices());
  const auto& operand_types(model_builder.GetOperandTypes());
  NodeAttrHelper helper(node_unit);
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();

  auto input = node_unit.Inputs()[0].node_arg.Name();
  const auto& output = node_unit.Outputs()[0].node_arg.Name();
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/base_op_builder.cc
@ -48,7 +48,7 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node

 Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
  OpSupportCheckParams params{
-      model_builder.GetEfficientFeatureLevel(),
+      model_builder.GetEffectiveFeatureLevel(),
      model_builder.UseNCHW(),
  };
  ORT_RETURN_IF_NOT(IsOpSupported(model_builder.GetInitializerTensors(), node_unit, params),
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc
@ -245,7 +245,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  int32_t fuse_code = model_builder.FindActivation(node_unit);
  ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);

-  if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
+  if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);

    // 1. NNAPI Grouped Conv does not support dilations
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc
@ -39,7 +39,7 @@ Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
  auto& shaper(model_builder.GetShaper());
  const auto& operand_indices(model_builder.GetOperandIndices());
  const auto& operand_types(model_builder.GetOperandTypes());
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
  NodeAttrHelper helper(node_unit);

  const auto& input = node_unit.Inputs()[0].node_arg.Name();
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc
@ -84,7 +84,7 @@ class GemmOpBuilder : public BaseOpBuilder {
 // Add operator related

 void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
-  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
+  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
    // no initializers to skip for batch matmul
    return;
  }
@ -125,7 +125,7 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
 }

 Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
-  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEfficientFeatureLevel())) {
+  if (op_builder_helpers::IsSupportedBatchMatMul(node_unit, model_builder.GetEffectiveFeatureLevel())) {
    return op_builder_helpers::BuildBatchMatMul(model_builder, node_unit);
  }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc
@ -146,7 +146,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  ADD_SCALAR_OPERAND(model_builder, input_indices, kernel_shape[0]);
  ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code);

-  if (model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {  // nchw only supported on api 29+
+  if (model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {  // nchw only supported on api 29+
    ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
  }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc
@ -77,7 +77,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
  const auto& initializers(model_builder.GetInitializerTensors());
  NodeAttrHelper helper(node_unit);
  const auto& inputs = node_unit.Inputs();
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
  const auto& output = node_unit.Outputs()[0].node_arg.Name();

  auto input = inputs[0].node_arg.Name();
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc
@ -163,7 +163,7 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
  if (std::all_of(compute_metadata.steps_.cbegin(),
                  compute_metadata.steps_.cend(),
                  [](int64_t i) { return i == 1; }) &&
-      model_builder.GetEfficientFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
+      model_builder.GetEffectiveFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) {
    op_code = ANEURALNETWORKS_SLICE;
    // the nnapi size of the slice in this case is the output shape
    ORT_RETURN_IF_ERROR(AddOperand("sizes", param_dimen, compute_metadata.output_dims_));  // nnapi_sizes
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc
@ -62,7 +62,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
  auto& shaper(model_builder.GetShaper());
  const auto& operand_indices(model_builder.GetOperandIndices());
  const auto& operand_types(model_builder.GetOperandTypes());
-  const auto android_feature_level = model_builder.GetEfficientFeatureLevel();
+  const auto android_feature_level = model_builder.GetEffectiveFeatureLevel();
  NodeAttrHelper helper(node_unit);

  auto input = node_unit.Inputs()[0].node_arg.Name();
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@ -28,11 +28,11 @@ namespace onnxruntime {
 namespace nnapi {

 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
-                           const std::vector<DeviceWrapper>& nnapi_target_devices)
+                           gsl::span<const DeviceWrapper> nnapi_target_devices)
    : nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique<Model>(nnapi_handle)},
      shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices),
-      nnapi_target_device_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
-  nnapi_model_->nnapi_target_device_feature_level_ = nnapi_target_device_feature_level_;
+      nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) {
+  nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_;
 }

 // Scalar operand is copied into the model, no need to persist
@ -367,10 +367,10 @@ Status ModelBuilder::AddNewNNAPIOperand(const OperandType& operand_type, uint32_
  index = next_index_++;

  if (operand_type.channelQuant) {
-    if (nnapi_target_device_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
+    if (nnapi_effective_feature_level_ < ANEURALNETWORKS_FEATURE_LEVEL_3) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                             "Per-channel quantization is only supported on Android API level 29+,",
-                             " system NNAPI feature level: ", nnapi_target_device_feature_level_);
+                             " system NNAPI feature level: ", nnapi_effective_feature_level_);
    }

    RETURN_STATUS_ON_ERROR(nnapi_.ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
@ -506,7 +506,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
      "on identifyInputsAndOutputs");

  // relax fp32tofp16 is only available on API 28+
-  if (use_fp16_ && nnapi_target_device_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
+  if (use_fp16_ && nnapi_effective_feature_level_ > ANEURALNETWORKS_FEATURE_LEVEL_1) {
    RETURN_STATUS_ON_ERROR_WITH_NOTE(
        nnapi_.ANeuralNetworksModel_relaxComputationFloat32toFloat16(
            nnapi_model_->model_, true),
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@ -31,7 +31,7 @@ class ModelBuilder {
  using Shape = Shaper::Shape;

  ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle,
-               const std::vector<DeviceWrapper>& nnapi_target_devices);
+               gsl::span<const DeviceWrapper> nnapi_target_devices);

  common::Status Compile(std::unique_ptr<Model>& model);

@ -102,7 +102,7 @@ class ModelBuilder {
  // the given node must be in the underlying graph_viewer
  const NodeUnit& GetNodeUnit(const Node* node) const;

-  int32_t GetEfficientFeatureLevel() const { return nnapi_target_device_feature_level_; }
+  int32_t GetEffectiveFeatureLevel() const { return nnapi_effective_feature_level_; }
 private:
  const NnApi& nnapi_;
  const GraphViewer& graph_viewer_;
@ -144,10 +144,10 @@ class ModelBuilder {

  std::unordered_set<std::string> unique_names_;

-  const std::vector<DeviceWrapper>& nnapi_target_devices_;
+  gsl::span<const DeviceWrapper> nnapi_target_devices_;

  // feature_level, to decide if we can run this node on NNAPI
-  int32_t nnapi_target_device_feature_level_ = 0;
+  int32_t nnapi_effective_feature_level_ = 0;
  // The number of nnapi operations in this model
  size_t num_nnapi_ops_ = 0;
  uint32_t next_index_ = 0;
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
@ -847,9 +847,9 @@ Status AddSqueezeOp(ModelBuilder& model_builder,
                    const std::string& node_name,
                    const std::string& input, const std::string& output,
                    std::vector<int32_t> axes) {
-  if (model_builder.GetEfficientFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
+  if (model_builder.GetEffectiveFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) {
    return ORT_MAKE_STATUS(
-        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEfficientFeatureLevel());
+        ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetEffectiveFeatureLevel());
  }

  auto& shaper(model_builder.GetShaper());
@ -1013,7 +1013,7 @@ bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit

    // Now the dest node is Gemm/Matmul, we want to make sure it is supported
    OpSupportCheckParams params{
-        model_builder.GetEfficientFeatureLevel(),
+        model_builder.GetEffectiveFeatureLevel(),
        model_builder.UseNCHW(),
    };

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc
@ -7,7 +7,7 @@
 #include "core/providers/common.h"
 #include "core/providers/nnapi/nnapi_builtin/builders/helper.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
-#include "nnapi_api_helper.h"
+#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"

 #ifdef USENNAPISHAREDMEM
 #include <sys/mman.h>
@ -90,7 +90,7 @@ size_t Model::GetMappedOutputIdx(const std::string& name) const {

 bool Model::SupportsDynamicOutputShape() const {
  // dynamic output shape is only supported on Android API level 29+ (ANEURALNETWORKS_FEATURE_LEVEL_3)
-  return nnapi_target_device_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
+  return nnapi_effective_feature_level_ >= ANEURALNETWORKS_FEATURE_LEVEL_3 && dynamic_output_buffer_size_ > 0;
 }

 Status Model::PrepareForExecution(std::unique_ptr<Execution>& execution) {
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.cc
@ -1,11 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.

-#include "nnapi_api_helper.h"
-
 #include "core/common/inlined_containers_fwd.h"
 #include "core/providers/nnapi/nnapi_builtin/builders/model_builder.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
+#include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
 #include "core/common/logging/logging.h"

 #ifdef __ANDROID__
@ -15,23 +14,6 @@
 namespace onnxruntime {
 namespace nnapi {

-/**  How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
- *
- * NNAPI device feature level is closely related to NNAPI runtime feature level
-    (ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
-    (the most advanced NNAPI specification and features that the runtime implements).
-    An NNAPI device feature level is always less than or equal to the runtime feature level.
- *
- * On Android devices with API level 30 and older, the Android API level of the Android device
-    must be used for NNAPI runtime feature discovery.
-    Enum values in FeatureLevelCode from feature level 1 to 5 have their
-    corresponding Android API levels listed in their documentation,
-    and each such enum value equals the corresponding API level.
-    This allows using the Android API level as the feature level.
-    This mapping between enum value and Android API level does not exist for
-    feature levels after NNAPI feature level 5 and API levels after S (31).
-
- */
 static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
  int32_t runtime_level = static_cast<int32_t>(nnapi_handle.nnapi_runtime_feature_level);

@ -51,12 +33,12 @@ static int32_t GetNNAPIRuntimeFeatureLevel(const NnApi& nnapi_handle) {
 * @return The max feature level support by a set of devices.
 *
 */
-static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_sets) {
+static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> devices) {
  int32_t target_feature_level = GetNNAPIRuntimeFeatureLevel(nnapi_handle);

  int64_t devices_feature_level = -1;

-  for (const auto &device : device_sets) {
+  for (const auto &device : devices) {
    // we want to op run on the device with the highest feature level so we can support more ops.
    // and we don't care which device runs them.
    devices_feature_level = std::max(device.feature_level, devices_feature_level);
@ -75,7 +57,7 @@ static int32_t GetDeviceFeatureLevelInternal(const NnApi& nnapi_handle, const st
 // get all target devices which satisfy the target_device_option
 // we will always put CPU device at the end if cpu is enabled
 Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
-                        std::vector<DeviceWrapper>& device_sets) {
+                        InlinedVector<DeviceWrapper>& devices) {
  // GetTargetDevices is only supported when NNAPI runtime feature level >= ANEURALNETWORKS_FEATURE_LEVEL_3
  if (GetNNAPIRuntimeFeatureLevel(nnapi_handle) < ANEURALNETWORKS_FEATURE_LEVEL_3)
    return Status::OK();
@ -110,9 +92,9 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
    }

    if (device_is_cpu) {
-      cpu_index = static_cast<int32_t>(device_sets.size());
+      cpu_index = static_cast<int32_t>(devices.size());
    }
-    device_sets.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
+    devices.push_back({device, std::string(device_name), device_type, curr_device_feature_level});
  }

  // put CPU device at the end
@ -120,17 +102,16 @@ Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_dev
  // and nnapi internally skip the last device if it has already found one.
  // 2) we can easily exclude nnapi-reference when not strict excluding CPU.
  // 3) we can easily log the detail of how op was assigned on NNAPI devices which is helpful for debugging.
-  if (cpu_index != -1 && cpu_index != static_cast<int32_t>(device_sets.size()) - 1) {
-    std::swap(device_sets[device_sets.size() - 1], device_sets[cpu_index]);
+  if (cpu_index != -1 && cpu_index != static_cast<int32_t>(devices.size()) - 1) {
+    std::swap(devices[devices.size() - 1], devices[cpu_index]);
  }

  return Status::OK();
 }

-
-std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets) {
+std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices) {
  std::string nnapi_target_devices_detail;
-  for (const auto& device : device_sets) {
+  for (const auto& device : devices) {
    const auto device_detail = MakeString("[Name: [", device.name, "], Type [", device.type, "]], ");
    nnapi_target_devices_detail += device_detail + " ,";
  }
@ -140,7 +121,7 @@ std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets)
 // Get devices-set first and then get the max feature level supported by all target devices
 // return -1 if failed.  It's not necessary to handle the error here, because level=-1 will refuse all ops
 int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option) {
-  std::vector<DeviceWrapper> nnapi_target_devices;
+  InlinedVector<DeviceWrapper> nnapi_target_devices;
  if (auto st = GetTargetDevices(nnapi_handle, target_device_option, nnapi_target_devices); !st.IsOK()) {
    LOGS_DEFAULT(WARNING) << "GetTargetDevices failed for :" << st.ErrorMessage();
    return -1;
@ -150,7 +131,7 @@ int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_h

 // get the max feature level supported by all target devices, If no devices are specified,
 // it will return the runtime feature level
-int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles) {
+int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles) {
  return GetDeviceFeatureLevelInternal(nnapi_handle, device_handles);
 }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h
@ -34,12 +34,35 @@ enum class TargetDeviceOption : int8_t {
 };

 const char* const nnapi_cpu = ("nnapi-reference");
-int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, const std::vector<DeviceWrapper>& device_handles);

+/**  How feature level works for NNAPI. refer to https://developer.android.com/ndk/reference/group/neural-networks
+ *
+ * NNAPI device feature level is closely related to NNAPI runtime feature level
+    (ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level
+    (the most advanced NNAPI specification and features that the runtime implements).
+    An NNAPI device feature level is always less than or equal to the runtime feature level.
+ *
+ * On Android devices with API level 30 and older, the Android API level of the Android device
+    must be used for NNAPI runtime feature discovery.
+    Enum values in FeatureLevelCode from feature level 1 to 5 have their
+    corresponding Android API levels listed in their documentation,
+    and each such enum value equals the corresponding API level.
+    This allows using the Android API level as the feature level.
+    This mapping between enum value and Android API level does not exist for
+    feature levels after NNAPI feature level 5 and API levels after S (31).
+
+ */
+int32_t GetNNAPIEffectiveFeatureLevel(const NnApi& nnapi_handle, gsl::span<const DeviceWrapper> device_handles);
+
+/**
+ * Get all hardware accelerators by different deviceoption.
+ *
+*/
 Status GetTargetDevices(const NnApi& nnapi_handle, TargetDeviceOption target_device_option,
-                        std::vector<DeviceWrapper>& nnapi_target_devices);
+                        InlinedVector<DeviceWrapper>& nnapi_target_devices);
+
 int32_t GetNNAPIEffectiveFeatureLevelFromTargetDeviceOption(const NnApi& nnapi_handle, TargetDeviceOption target_device_option);

-std::string GetDeviceDescription(const std::vector<DeviceWrapper>& device_sets);
+std::string GetDeviceDescription(gsl::span<const DeviceWrapper> devices);
 }  // namespace nnapi
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
@ -3,6 +3,7 @@

 #pragma once

+#include "core/common/inlined_containers_fwd.h"
 #include "core/common/optional.h"
 #include "core/framework/execution_provider.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_api_helper.h"
@ -45,7 +46,7 @@ class NnapiExecutionProvider : public IExecutionProvider {

  // nnapi handle for either Android NNAPI or x86 hooker.
  const nnapi::NnApi* nnapi_handle_ = nullptr;
-  std::vector<nnapi::DeviceWrapper> nnapi_target_devices_;
+  InlinedVector<nnapi::DeviceWrapper> nnapi_target_devices_;
  nnapi::TargetDeviceOption target_device_option_;
 };
 }  // namespace onnxruntime