diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index 695cdc3efb..6b0d1de5d1 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -657,16 +657,48 @@ if (onnxruntime_USE_OPENVINO)
 endif()
 
 if (onnxruntime_USE_NNAPI_BUILTIN)
-  add_definitions(-DUSE_NNAPI=1)
+  add_compile_definitions(USE_NNAPI=1)
+
+  # This is the minimum Android API Level required by ORT NNAPI EP to run
+  # ORT running on any host system with Android API level less than this will fall back to CPU EP
+  if(onnxruntime_NNAPI_MIN_API)
+    add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API})
+  endif()
+
+  # This is the maximum Android API level supported in the ort model conversion for NNAPI EP
+  # Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
+  #       get the actually Android system version.
+  if(onnxruntime_NNAPI_HOST_API)
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+      message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target")
+    endif()
+    add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API})
+  endif()
+
   file(GLOB
     onnxruntime_providers_nnapi_cc_srcs_top CONFIGURE_DEPENDS
     "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc"
   )
-  file(GLOB_RECURSE
-    onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
-  )
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    file(GLOB_RECURSE
+      onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
+    )
+  else()
+    file(GLOB
+      onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
+    )
+  endif()
+
   set(onnxruntime_providers_nnapi_cc_srcs ${onnxruntime_providers_nnapi_cc_srcs_top} ${onnxruntime_providers_nnapi_cc_srcs_nested})
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
   add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
@@ -677,6 +709,10 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
   set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime")
   target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS})
   set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX)
+  # ignore the warning unknown-pragmas on "pragma region"
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas")
+  endif()
 endif()
 
 if (onnxruntime_USE_RKNPU)
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index fc2af0aea7..2bb775ddb9 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -236,7 +236,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
   dim_2 = std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), 1, std::multiplies<int32_t>());
 }
 
-bool IsValidSupportedNodesVec(const std::vector<int>& supported_node_vec, const GraphViewer& graph_viewer) {
+bool IsValidSupportedNodesVec(const std::vector<size_t>& supported_node_vec, const GraphViewer& graph_viewer) {
   if (supported_node_vec.empty())
     return false;
 
@@ -266,8 +266,8 @@ bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const Op
   }
 }
 
-std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
-  std::vector<std::vector<int>> supported_node_vecs;
+std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
+  std::vector<std::vector<size_t>> supported_node_vecs;
   if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) {
     LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver
                           << "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL
@@ -275,7 +275,7 @@ std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer,
     return supported_node_vecs;
   }
 
-  std::vector<int> supported_node_vec;
+  std::vector<size_t> supported_node_vec;
   const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer.GetNode(node_indices[i]));
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
index ae824c4dfb..4bbe194741 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -8,10 +8,19 @@
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
 
 // This is the minimal Android API Level required by ORT NNAPI EP to run
+// ORT running on any host system with Android API level less than this will fall back to CPU EP
 #ifndef ORT_NNAPI_MIN_API_LEVEL
 #define ORT_NNAPI_MIN_API_LEVEL 27
 #endif
 
+// This is the maximum Android API level supported in the ort model conversion for NNAPI EP
+// Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
+//       get the actually Android system version.
+//       If running on an actual Android system, this value will be ignored
+#ifndef ORT_NNAPI_MAX_SUPPORTED_API_LEVEL
+#define ORT_NNAPI_MAX_SUPPORTED_API_LEVEL 30
+#endif
+
 namespace onnxruntime {
 
 using Shape = std::vector<uint32_t>;
@@ -114,7 +123,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
 bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
 
 // Get a list of groups of supported nodes, each group represents a subgraph supported by NNAPI EP
-std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
+std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
 
 // Get string representation of a Shape
 std::string Shape2String(const std::vector<uint32_t>& shape);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index 22b0ebf674..5b908b1659 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -17,9 +17,7 @@ using namespace android::nn::wrapper;
 using std::vector;
 
 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer)
-    : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {
-  GetAllInitializers();
-}
+    : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {}
 
 int32_t ModelBuilder::GetAndroidSdkVer() const {
   return nnapi_ ? nnapi_->android_sdk_version : 0;
@@ -105,12 +103,6 @@ Status ModelBuilder::GetTargetDevices() {
   return Status::OK();
 }
 
-void ModelBuilder::GetAllInitializers() {
-  for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) {
-    initializers_.emplace(pair.first, *pair.second);
-  }
-}
-
 void ModelBuilder::PreprocessInitializers() {
   const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
   for (size_t i = 0; i < node_indices.size(); i++) {
@@ -172,13 +164,14 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con
 
 Status ModelBuilder::RegisterInitializers() {
   // First pass to get all the stats of the initializers
-  auto initializer_size = initializers_.size();
+  const auto& initializer_tensors(GetInitializerTensors());
+  auto initializer_size = initializer_tensors.size();
   std::vector<std::tuple<uint32_t, size_t, size_t>> initializers(initializer_size);
   size_t sizeAll = 0;
 
   int i = 0;
-  for (const auto& pair : initializers_) {
-    const auto& tensor = pair.second;
+  for (const auto& pair : initializer_tensors) {
+    const auto& tensor = *pair.second;
     const auto& name = tensor.name();
     if (Contains(skipped_initializers_, name))
       continue;
@@ -221,8 +214,8 @@ Status ModelBuilder::RegisterInitializers() {
 
   // 2nd pass to copy all the initializers into shared memory
   size_t offset = 0;
-  for (const auto& pair : initializers_) {
-    const auto& tensor = pair.second;
+  for (const auto& pair : initializer_tensors) {
+    const auto& tensor = *pair.second;
     if (Contains(skipped_initializers_, tensor.name()))
       continue;
 
@@ -254,7 +247,7 @@ Status ModelBuilder::RegisterModelInputs() {
       if (Contains(operands_, input_name))
         continue;
 
-      if (Contains(initializers_, input_name))
+      if (Contains(GetInitializerTensors(), input_name))
         continue;
     }
 
@@ -566,7 +559,7 @@ void ModelBuilder::RegisterNHWCOperand(const std::string& name) {
   nhwc_operands_.insert(name);
 }
 
-bool ModelBuilder::IsOperandNHWC(const std::string& name) {
+bool ModelBuilder::IsOperandNHWC(const std::string& name) const {
   return Contains(nhwc_operands_, name);
 }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
index b5b54ff5d8..09a482138d 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h
@@ -8,13 +8,13 @@
 #include <core/graph/graph_viewer.h>
 #include "core/providers/nnapi/nnapi_builtin/model.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
+#include "op_support_checker.h"
 #include "shaper.h"
 
 namespace onnxruntime {
 namespace nnapi {
 
 class IOpBuilder;
-class IOpSupportChecker;
 
 class ModelBuilder {
  public:
@@ -96,7 +96,7 @@ class ModelBuilder {
   const GraphViewer& GetGraphViewer() const { return graph_viewer_; }
 
   void RegisterNHWCOperand(const std::string& name);
-  bool IsOperandNHWC(const std::string& name);
+  bool IsOperandNHWC(const std::string& name) const;
 
   // Get the operand transposed to nchw/nhwc from given nhwc/nchw operand, if it exists
   bool GetNCHWOperand(const std::string& nhwc_name, std::string& nchw_name);
@@ -127,7 +127,6 @@ class ModelBuilder {
   std::unordered_set<std::string> operands_;
   std::unordered_set<std::string> fused_activations_;
 
-  std::unordered_map<std::string, const ONNX_NAMESPACE::TensorProto&> initializers_;
   std::unordered_set<std::string> skipped_initializers_;
 
   // All activation nodes (Relu, Relu1, Relu6) as a map <NodeIndex, activation_code>
@@ -156,8 +155,7 @@ class ModelBuilder {
   Status Prepare() ORT_MUST_USE_RESULT;
 
   Status GetTargetDevices() ORT_MUST_USE_RESULT;
-  // Get names of all the initializers
-  void GetAllInitializers();
+
   // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list
   void PreprocessInitializers();
   // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
index 5091f55674..91ce58597b 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@@ -950,13 +950,11 @@ bool ClipOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
   // TODO, support clip between 2 arbitrary numbers
   if ((min == 0.0f && max == 6.0f) || (min == -1.0f && max == 1.0f)) {
     return true;
-  } else {
-    LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
-                          << min << ", " << max << "]";
-    return false;
   }
 
-  return true;
+  LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
+                        << min << ", " << max << "]";
+  return false;
 }
 
 #pragma endregion
@@ -1182,4 +1180,4 @@ const std::unordered_map<std::string, std::shared_ptr<IOpSupportChecker>>& GetOp
 #pragma endregion
 
 }  // namespace nnapi
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
index ecce4084af..e3781fcb05 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h
@@ -9,6 +9,11 @@ namespace onnxruntime {
 namespace nnapi {
 
 struct OpSupportCheckParams {
+  OpSupportCheckParams(int32_t android_sdk_ver, bool use_nchw)
+      : android_sdk_ver(android_sdk_ver),
+        use_nchw(use_nchw) {
+  }
+
   int32_t android_sdk_ver = 0;
   bool use_nchw = false;
 };
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
index 3bfb09e54f..ea32f567c1 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h
@@ -7,6 +7,8 @@
 #include "core/platform/ort_mutex.h"
 #include "nnapi_lib/NeuralNetworksWrapper.h"
 
+struct NnApi;
+
 namespace onnxruntime {
 namespace nnapi {
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
index 3db36e2839..a4d9c18ee2 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
@@ -3,13 +3,18 @@
 
 #include "nnapi_execution_provider.h"
 
-#include "model.h"
 #include "builders/helper.h"
-#include "builders/model_builder.h"
 #include "builders/op_support_checker.h"
 #include "core/framework/allocatormgr.h"
 #include "core/framework/compute_capability.h"
+#include "core/graph/graph_viewer.h"
 #include "core/session/onnxruntime_cxx_api.h"
+#include "nnapi_lib/nnapi_implementation.h"
+
+#ifdef __ANDROID__
+#include "model.h"
+#include "builders/model_builder.h"
+#endif
 
 namespace onnxruntime {
 
@@ -53,9 +58,21 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
     }
   }
 
-  nnapi::ModelBuilder builder(graph_view);
+  // We need to get the Android system API level to ensure the GetCapability giving the correct result
+  // based on the system.
+  // If we are actually running on Android system, we can get the API level by querying the system
+  // However, since we also allow the NNAPI EP run GetCapability for model conversion on a non-Android system,
+  // since we cannot get the runtime system API level, we have to specify it using complie definition.
+  int32_t android_sdk_ver;
+#ifdef __ANDROID__
+  const auto* _nnapi = NnApiImplementation();
+  android_sdk_ver = _nnapi->android_sdk_version;
+#else
+  android_sdk_ver = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL;
+#endif
+
   nnapi::OpSupportCheckParams params{
-      builder.GetAndroidSdkVer(),
+      android_sdk_ver,
       !!(nnapi_flags_ & NNAPI_FLAG_USE_NCHW),
   };
   const auto supported_nodes_vector = GetSupportedNodes(graph_view, params);
@@ -177,6 +194,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
   return result;
 }
 
+#ifdef __ANDROID__
 static Status GetOutputBuffer(Ort::CustomOpApi& ort,
                               OrtKernelContext* context,
                               const nnapi::Model& model,
@@ -412,5 +430,22 @@ common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::No
     node_compute_funcs.push_back(compute_info);
   }
   return Status::OK();
-}  // namespace onnxruntime
+}
+#else
+common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
+                                               std::vector<NodeComputeInfo>& node_compute_funcs) {
+  for (const auto* fused_node : fused_nodes) {
+    ORT_UNUSED_PARAMETER(fused_node);
+    NodeComputeInfo compute_info;
+    compute_info.create_state_func = [](ComputeContext* /*context*/, FunctionState* /*state*/) { return 0; };
+    compute_info.release_state_func = [](FunctionState /*state*/) {};
+    compute_info.compute_func = [](FunctionState /* state */, const OrtCustomOpApi* /* api */, OrtKernelContext* /* context */) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "Compute is not supported in this build.");
+    };
+    node_compute_funcs.push_back(compute_info);
+  }
+  return Status::OK();
+}
+#endif
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
index 021645be58..ff72e93570 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h
@@ -24,10 +24,12 @@ class NnapiExecutionProvider : public IExecutionProvider {
   unsigned long GetNNAPIFlags() const { return nnapi_flags_; }
 
  private:
-  std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
-
   // The bit flags which define bool options for NNAPI EP, bits are defined as
   // NNAPIFlags in include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h
   const unsigned long nnapi_flags_;
+
+#ifdef __ANDROID__
+  std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
+#endif
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
index c61fc458e9..c75e301e5d 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h
@@ -16,10 +16,11 @@
 // Provides C++ classes to more easily use the Neural Networks API.
 #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
 #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
-#include "nnapi_implementation.h"
 #include <vector>
 #include <numeric>
 
+#include "NeuralNetworksTypes.h"
+
 template <typename T>
 T Product(const std::vector<T>& v) {
   return static_cast<T>(
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index a5360392e3..cfbd22be19 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -612,7 +612,8 @@ TEST_P(ModelTest, Run) {
 #ifdef USE_NUPHAR
   provider_names.push_back(ORT_TSTR("nuphar"));
 #endif
-#ifdef USE_NNAPI
+// For any non-Android system, NNAPI will only be used for ort model converter
+#if defined(USE_NNAPI) && defined(__ANDROID__)
   provider_names.push_back(ORT_TSTR("nnapi"));
 #endif
 #ifdef USE_RKNPU
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index f53ab7a244..3fa8ef5711 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #include "core/common/logging/logging.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
 #include "core/session/inference_session.h"
@@ -11,8 +14,9 @@ using namespace ONNX_NAMESPACE;
 using namespace ::onnxruntime::logging;
 
 namespace onnxruntime {
-
 namespace test {
+
+#ifdef __ANDROID__
 void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
                    const std::vector<float>& expected_values) {
   ASSERT_EQ(1, fetches.size());
@@ -22,6 +26,7 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
   const std::vector<float> found(rtensor.template Data<float>(), rtensor.template Data<float>() + expected_values.size());
   ASSERT_EQ(expected_values, found);
 }
+#endif
 
 void RunAndVerifyOutputs(const std::string& model_file_name,
                          const char* log_id,
@@ -45,10 +50,18 @@ void RunAndVerifyOutputs(const std::string& model_file_name,
   ASSERT_EQ(1, graph.NumberOfNodes());  // Make sure the graph has 1 fused node
   ASSERT_EQ(onnxruntime::kNnapiExecutionProvider, graph.Nodes().cbegin()->GetExecutionProviderType());
 
+// The execution can only be performed on Android
+#ifdef __ANDROID__
   // Now run and verify the result
   std::vector<OrtValue> fetches;
   ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &fetches));
   VerifyOutputs(fetches, expected_dims, expected_values);
+#else
+  ORT_UNUSED_PARAMETER(feeds);
+  ORT_UNUSED_PARAMETER(output_names);
+  ORT_UNUSED_PARAMETER(expected_dims);
+  ORT_UNUSED_PARAMETER(expected_values);
+#endif
 }
 
 // Since NNAPI EP handles Reshape and Flatten differently,
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index e44ef0ee16..584001d788 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -95,7 +95,9 @@ std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_un
 }
 
 std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider() {
-#ifdef USE_NNAPI
+// For any non - Android system, NNAPI will only be used for ort model converter
+// Make it unavailable here, you can still manually append NNAPI EP to session for model conversion
+#if defined(USE_NNAPI) && defined(__ANDROID__)
   return CreateExecutionProviderFactory_Nnapi(0)->CreateProvider();
 #else
   return nullptr;
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 7465872f0b..2e4490114d 100755
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -318,6 +318,9 @@ def parse_arguments():
         help="Build with OpenVINO for specific hardware.")
     parser.add_argument(
         "--use_nnapi", action='store_true', help="Build with NNAPI support.")
+    parser.add_argument(
+        "--nnapi_min_api", type=int,
+        help="Minimum Android API level to enable NNAPI, should be no less than 27")
     parser.add_argument(
         "--use_rknpu", action='store_true', help="Build with RKNPU.")
     parser.add_argument(
@@ -817,10 +820,12 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
         cmake_args += ["-Donnxruntime_USE_PREINSTALLED_EIGEN=ON",
                        "-Deigen_SOURCE_PATH=" + args.eigen_path]
 
+    if args.nnapi_min_api:
+        cmake_args += ["-Donnxruntime_NNAPI_MIN_API=" + str(args.nnapi_min_api)]
+
     if args.android:
         cmake_args += [
-            "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path +
-            "/build/cmake/android.toolchain.cmake",
+            "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake",
             "-DANDROID_PLATFORM=android-" + str(args.android_api),
             "-DANDROID_ABI=" + str(args.android_abi)
         ]
@@ -1830,6 +1835,12 @@ def main():
     if args.minimal_build and args.disable_ort_format_load:
         raise BuildError('Minimal build requires loading ORT format models.')
 
+    if args.nnapi_min_api:
+        if not args.use_nnapi:
+            raise BuildError("Using --nnapi_min_api requires --use_nnapi")
+        if args.nnapi_min_api < 27:
+            raise BuildError("--nnapi_min_api should be 27+")
+
     # Disabling unit tests for VAD-F as FPGA only supports
     # models with NCHW layout
     if args.use_openvino == "VAD-F_FP32":