[NNAPI EP] Make NNAPI EP build on non-Android Platform (#5779)

* Make NNAPI EP build on non-Android Platform * minor updates * Adress CR comments * Fix build issue using Windows, address CR comments * Fix linux build warnings * Fix for test failure * Fix for test failure * Fix model_tests failure
2026-07-21 19:18:55 +00:00 · 2020-11-15 17:04:45 -08:00 · 2020-11-15 17:04:45 -08:00 · c4818d36ed
commit c4818d36ed
parent 5b7dc5aeee
15 changed files with 157 additions and 51 deletions
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@ -657,16 +657,48 @@ if (onnxruntime_USE_OPENVINO)
 endif()

 if (onnxruntime_USE_NNAPI_BUILTIN)
-  add_definitions(-DUSE_NNAPI=1)
+  add_compile_definitions(USE_NNAPI=1)
+
+  # This is the minimum Android API Level required by ORT NNAPI EP to run
+  # ORT running on any host system with Android API level less than this will fall back to CPU EP
+  if(onnxruntime_NNAPI_MIN_API)
+    add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API})
+  endif()
+
+  # This is the maximum Android API level supported in the ort model conversion for NNAPI EP
+  # Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
+  #       get the actually Android system version.
+  if(onnxruntime_NNAPI_HOST_API)
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+      message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target")
+    endif()
+    add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API})
+  endif()
+
  file(GLOB
    onnxruntime_providers_nnapi_cc_srcs_top CONFIGURE_DEPENDS
    "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc"
  )
-  file(GLOB_RECURSE
-    onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
-  )
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    file(GLOB_RECURSE
+      onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
+    )
+  else()
+    file(GLOB
+      onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
+    )
+  endif()
+
  set(onnxruntime_providers_nnapi_cc_srcs ${onnxruntime_providers_nnapi_cc_srcs_top} ${onnxruntime_providers_nnapi_cc_srcs_nested})
  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
  add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
@ -677,6 +709,10 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
  set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime")
  target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS})
  set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX)
+  # ignore the warning unknown-pragmas on "pragma region"
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas")
+  endif()
 endif()

 if (onnxruntime_USE_RKNPU)
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@ -236,7 +236,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
  dim_2 = std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), 1, std::multiplies<int32_t>());
 }

-bool IsValidSupportedNodesVec(const std::vector<int>& supported_node_vec, const GraphViewer& graph_viewer) {
+bool IsValidSupportedNodesVec(const std::vector<size_t>& supported_node_vec, const GraphViewer& graph_viewer) {
  if (supported_node_vec.empty())
    return false;

@ -266,8 +266,8 @@ bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const Op
  }
 }

-std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
-  std::vector<std::vector<int>> supported_node_vecs;
+std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
+  std::vector<std::vector<size_t>> supported_node_vecs;
  if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) {
    LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver
                          << "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL
@ -275,7 +275,7 @@ std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer,
    return supported_node_vecs;
  }

-  std::vector<int> supported_node_vec;
+  std::vector<size_t> supported_node_vec;
  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
  for (size_t i = 0; i < node_indices.size(); i++) {
    const auto* node(graph_viewer.GetNode(node_indices[i]));
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@ -8,10 +8,19 @@
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"

 // This is the minimal Android API Level required by ORT NNAPI EP to run
+// ORT running on any host system with Android API level less than this will fall back to CPU EP
 #ifndef ORT_NNAPI_MIN_API_LEVEL
 #define ORT_NNAPI_MIN_API_LEVEL 27
 #endif

+// This is the maximum Android API level supported in the ort model conversion for NNAPI EP
+// Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
+//       get the actually Android system version.
+//       If running on an actual Android system, this value will be ignored
+#ifndef ORT_NNAPI_MAX_SUPPORTED_API_LEVEL
+#define ORT_NNAPI_MAX_SUPPORTED_API_LEVEL 30
+#endif
+
 namespace onnxruntime {

 using Shape = std::vector<uint32_t>;
@ -114,7 +123,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
 bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const OpSupportCheckParams& params);

 // Get a list of groups of supported nodes, each group represents a subgraph supported by NNAPI EP
-std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
+std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);

 // Get string representation of a Shape
 std::string Shape2String(const std::vector<uint32_t>& shape);
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@ -17,9 +17,7 @@ using namespace android::nn::wrapper;
 using std::vector;

 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer)
-    : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {
-  GetAllInitializers();
-}
+    : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {}

 int32_t ModelBuilder::GetAndroidSdkVer() const {
  return nnapi_ ? nnapi_->android_sdk_version : 0;
@ -105,12 +103,6 @@ Status ModelBuilder::GetTargetDevices() {
  return Status::OK();
 }

-void ModelBuilder::GetAllInitializers() {
-  for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) {
-    initializers_.emplace(pair.first, *pair.second);
-  }
-}
-
 void ModelBuilder::PreprocessInitializers() {
  const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
  for (size_t i = 0; i < node_indices.size(); i++) {
@ -172,13 +164,14 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con

 Status ModelBuilder::RegisterInitializers() {
  // First pass to get all the stats of the initializers
-  auto initializer_size = initializers_.size();
+  const auto& initializer_tensors(GetInitializerTensors());
+  auto initializer_size = initializer_tensors.size();
  std::vector<std::tuple<uint32_t, size_t, size_t>> initializers(initializer_size);
  size_t sizeAll = 0;

  int i = 0;
-  for (const auto& pair : initializers_) {
-    const auto& tensor = pair.second;
+  for (const auto& pair : initializer_tensors) {
+    const auto& tensor = *pair.second;
    const auto& name = tensor.name();
    if (Contains(skipped_initializers_, name))
      continue;
@ -221,8 +214,8 @@ Status ModelBuilder::RegisterInitializers() {

  // 2nd pass to copy all the initializers into shared memory
  size_t offset = 0;
-  for (const auto& pair : initializers_) {
-    const auto& tensor = pair.second;
+  for (const auto& pair : initializer_tensors) {
+    const auto& tensor = *pair.second;
    if (Contains(skipped_initializers_, tensor.name()))
      continue;

@ -254,7 +247,7 @@ Status ModelBuilder::RegisterModelInputs() {
      if (Contains(operands_, input_name))
        continue;

-      if (Contains(initializers_, input_name))
+      if (Contains(GetInitializerTensors(), input_name))
        continue;
    }

@ -566,7 +559,7 @@ void ModelBuilder::RegisterNHWCOperand(const std::string& name) {
  nhwc_operands_.insert(name);
 }

-bool ModelBuilder::IsOperandNHWC(const std::string& name) {
+bool ModelBuilder::IsOperandNHWC(const std::string& name) const {
  return Contains(nhwc_operands_, name);
 }

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@ -8,13 +8,13 @@
 #include <core/graph/graph_viewer.h>
 #include "core/providers/nnapi/nnapi_builtin/model.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
+#include "op_support_checker.h"
 #include "shaper.h"

 namespace onnxruntime {
 namespace nnapi {

 class IOpBuilder;
-class IOpSupportChecker;

 class ModelBuilder {
 public:
@ -96,7 +96,7 @@ class ModelBuilder {
  const GraphViewer& GetGraphViewer() const { return graph_viewer_; }

  void RegisterNHWCOperand(const std::string& name);
-  bool IsOperandNHWC(const std::string& name);
+  bool IsOperandNHWC(const std::string& name) const;

  // Get the operand transposed to nchw/nhwc from given nhwc/nchw operand, if it exists
  bool GetNCHWOperand(const std::string& nhwc_name, std::string& nchw_name);
@ -127,7 +127,6 @@ class ModelBuilder {
  std::unordered_set<std::string> operands_;
  std::unordered_set<std::string> fused_activations_;

-  std::unordered_map<std::string, const ONNX_NAMESPACE::TensorProto&> initializers_;
  std::unordered_set<std::string> skipped_initializers_;

  // All activation nodes (Relu, Relu1, Relu6) as a map <NodeIndex, activation_code>
@ -156,8 +155,7 @@ class ModelBuilder {
  Status Prepare() ORT_MUST_USE_RESULT;

  Status GetTargetDevices() ORT_MUST_USE_RESULT;
-  // Get names of all the initializers
-  void GetAllInitializers();
+
  // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list
  void PreprocessInitializers();
  // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@ -950,13 +950,11 @@ bool ClipOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
  // TODO, support clip between 2 arbitrary numbers
  if ((min == 0.0f && max == 6.0f) || (min == -1.0f && max == 1.0f)) {
    return true;
-  } else {
-    LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
-                          << min << ", " << max << "]";
-    return false;
  }

-  return true;
+  LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
+                        << min << ", " << max << "]";
+  return false;
 }

 #pragma endregion
@ -1182,4 +1180,4 @@ const std::unordered_map<std::string, std::shared_ptr<IOpSupportChecker>>& GetOp
 #pragma endregion

 }  // namespace nnapi
-}  // namespace onnxruntime
+}  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
@ -9,6 +9,11 @@ namespace onnxruntime {
 namespace nnapi {

 struct OpSupportCheckParams {
+  OpSupportCheckParams(int32_t android_sdk_ver, bool use_nchw)
+      : android_sdk_ver(android_sdk_ver),
+        use_nchw(use_nchw) {
+  }
+
  int32_t android_sdk_ver = 0;
  bool use_nchw = false;
 };
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
@ -7,6 +7,8 @@
 #include "core/platform/ort_mutex.h"
 #include "nnapi_lib/NeuralNetworksWrapper.h"

+struct NnApi;
+
 namespace onnxruntime {
 namespace nnapi {

--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
@ -3,13 +3,18 @@

 #include "nnapi_execution_provider.h"

-#include "model.h"
 #include "builders/helper.h"
-#include "builders/model_builder.h"
 #include "builders/op_support_checker.h"
 #include "core/framework/allocatormgr.h"
 #include "core/framework/compute_capability.h"
+#include "core/graph/graph_viewer.h"
 #include "core/session/onnxruntime_cxx_api.h"
+#include "nnapi_lib/nnapi_implementation.h"
+
+#ifdef __ANDROID__
+#include "model.h"
+#include "builders/model_builder.h"
+#endif

 namespace onnxruntime {

@ -53,9 +58,21 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
    }
  }

-  nnapi::ModelBuilder builder(graph_view);
+  // We need to get the Android system API level to ensure the GetCapability giving the correct result
+  // based on the system.
+  // If we are actually running on Android system, we can get the API level by querying the system
+  // However, since we also allow the NNAPI EP run GetCapability for model conversion on a non-Android system,
+  // since we cannot get the runtime system API level, we have to specify it using complie definition.
+  int32_t android_sdk_ver;
+#ifdef __ANDROID__
+  const auto* _nnapi = NnApiImplementation();
+  android_sdk_ver = _nnapi->android_sdk_version;
+#else
+  android_sdk_ver = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL;
+#endif
+
  nnapi::OpSupportCheckParams params{
-      builder.GetAndroidSdkVer(),
+      android_sdk_ver,
      !!(nnapi_flags_ & NNAPI_FLAG_USE_NCHW),
  };
  const auto supported_nodes_vector = GetSupportedNodes(graph_view, params);
@ -177,6 +194,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
  return result;
 }

+#ifdef __ANDROID__
 static Status GetOutputBuffer(Ort::CustomOpApi& ort,
                              OrtKernelContext* context,
                              const nnapi::Model& model,
@ -412,5 +430,22 @@ common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::No
    node_compute_funcs.push_back(compute_info);
  }
  return Status::OK();
-}  // namespace onnxruntime
+}
+#else
+common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
+                                               std::vector<NodeComputeInfo>& node_compute_funcs) {
+  for (const auto* fused_node : fused_nodes) {
+    ORT_UNUSED_PARAMETER(fused_node);
+    NodeComputeInfo compute_info;
+    compute_info.create_state_func = [](ComputeContext* /*context*/, FunctionState* /*state*/) { return 0; };
+    compute_info.release_state_func = [](FunctionState /*state*/) {};
+    compute_info.compute_func = [](FunctionState /* state */, const OrtCustomOpApi* /* api */, OrtKernelContext* /* context */) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "Compute is not supported in this build.");
+    };
+    node_compute_funcs.push_back(compute_info);
+  }
+  return Status::OK();
+}
+#endif
+
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
@ -24,10 +24,12 @@ class NnapiExecutionProvider : public IExecutionProvider {
  unsigned long GetNNAPIFlags() const { return nnapi_flags_; }

 private:
-  std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
-
  // The bit flags which define bool options for NNAPI EP, bits are defined as
  // NNAPIFlags in include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h
  const unsigned long nnapi_flags_;
+
+#ifdef __ANDROID__
+  std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
+#endif
 };
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
@ -16,10 +16,11 @@
 // Provides C++ classes to more easily use the Neural Networks API.
 #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
 #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
-#include "nnapi_implementation.h"
 #include <vector>
 #include <numeric>

+#include "NeuralNetworksTypes.h"
+
 template <typename T>
 T Product(const std::vector<T>& v) {
  return static_cast<T>(
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@ -612,7 +612,8 @@ TEST_P(ModelTest, Run) {
 #ifdef USE_NUPHAR
  provider_names.push_back(ORT_TSTR("nuphar"));
 #endif
-#ifdef USE_NNAPI
+// For any non-Android system, NNAPI will only be used for ort model converter
+#if defined(USE_NNAPI) && defined(__ANDROID__)
  provider_names.push_back(ORT_TSTR("nnapi"));
 #endif
 #ifdef USE_RKNPU
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #include "core/common/logging/logging.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
 #include "core/session/inference_session.h"
@ -11,8 +14,9 @@ using namespace ONNX_NAMESPACE;
 using namespace ::onnxruntime::logging;

 namespace onnxruntime {
-
 namespace test {
+
+#ifdef __ANDROID__
 void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
                   const std::vector<float>& expected_values) {
  ASSERT_EQ(1, fetches.size());
@ -22,6 +26,7 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
  const std::vector<float> found(rtensor.template Data<float>(), rtensor.template Data<float>() + expected_values.size());
  ASSERT_EQ(expected_values, found);
 }
+#endif

 void RunAndVerifyOutputs(const std::string& model_file_name,
                         const char* log_id,
@ -45,10 +50,18 @@ void RunAndVerifyOutputs(const std::string& model_file_name,
  ASSERT_EQ(1, graph.NumberOfNodes());  // Make sure the graph has 1 fused node
  ASSERT_EQ(onnxruntime::kNnapiExecutionProvider, graph.Nodes().cbegin()->GetExecutionProviderType());

+// The execution can only be performed on Android
+#ifdef __ANDROID__
  // Now run and verify the result
  std::vector<OrtValue> fetches;
  ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &fetches));
  VerifyOutputs(fetches, expected_dims, expected_values);
+#else
+  ORT_UNUSED_PARAMETER(feeds);
+  ORT_UNUSED_PARAMETER(output_names);
+  ORT_UNUSED_PARAMETER(expected_dims);
+  ORT_UNUSED_PARAMETER(expected_values);
+#endif
 }

 // Since NNAPI EP handles Reshape and Flatten differently,
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@ -95,7 +95,9 @@ std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_un
 }

 std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider() {
-#ifdef USE_NNAPI
+// For any non - Android system, NNAPI will only be used for ort model converter
+// Make it unavailable here, you can still manually append NNAPI EP to session for model conversion
+#if defined(USE_NNAPI) && defined(__ANDROID__)
  return CreateExecutionProviderFactory_Nnapi(0)->CreateProvider();
 #else
  return nullptr;
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@ -318,6 +318,9 @@ def parse_arguments():
        help="Build with OpenVINO for specific hardware.")
    parser.add_argument(
        "--use_nnapi", action='store_true', help="Build with NNAPI support.")
+    parser.add_argument(
+        "--nnapi_min_api", type=int,
+        help="Minimum Android API level to enable NNAPI, should be no less than 27")
    parser.add_argument(
        "--use_rknpu", action='store_true', help="Build with RKNPU.")
    parser.add_argument(
@ -817,10 +820,12 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
        cmake_args += ["-Donnxruntime_USE_PREINSTALLED_EIGEN=ON",
                       "-Deigen_SOURCE_PATH=" + args.eigen_path]

+    if args.nnapi_min_api:
+        cmake_args += ["-Donnxruntime_NNAPI_MIN_API=" + str(args.nnapi_min_api)]
+
    if args.android:
        cmake_args += [
-            "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path +
-            "/build/cmake/android.toolchain.cmake",
+            "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake",
            "-DANDROID_PLATFORM=android-" + str(args.android_api),
            "-DANDROID_ABI=" + str(args.android_abi)
        ]
@ -1830,6 +1835,12 @@ def main():
    if args.minimal_build and args.disable_ort_format_load:
        raise BuildError('Minimal build requires loading ORT format models.')

+    if args.nnapi_min_api:
+        if not args.use_nnapi:
+            raise BuildError("Using --nnapi_min_api requires --use_nnapi")
+        if args.nnapi_min_api < 27:
+            raise BuildError("--nnapi_min_api should be 27+")
+
    # Disabling unit tests for VAD-F as FPGA only supports
    # models with NCHW layout
    if args.use_openvino == "VAD-F_FP32":