diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 695cdc3efb..6b0d1de5d1 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -657,16 +657,48 @@ if (onnxruntime_USE_OPENVINO) endif() if (onnxruntime_USE_NNAPI_BUILTIN) - add_definitions(-DUSE_NNAPI=1) + add_compile_definitions(USE_NNAPI=1) + + # This is the minimum Android API Level required by ORT NNAPI EP to run + # ORT running on any host system with Android API level less than this will fall back to CPU EP + if(onnxruntime_NNAPI_MIN_API) + add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API}) + endif() + + # This is the maximum Android API level supported in the ort model conversion for NNAPI EP + # Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot + # get the actually Android system version. + if(onnxruntime_NNAPI_HOST_API) + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target") + endif() + add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API}) + endif() + file(GLOB onnxruntime_providers_nnapi_cc_srcs_top CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc" ) - file(GLOB_RECURSE - onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc" - ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + file(GLOB_RECURSE + onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc" + ) + else() + file(GLOB + onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.h" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.cc" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc" + "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h" + ) + endif() + set(onnxruntime_providers_nnapi_cc_srcs ${onnxruntime_providers_nnapi_cc_srcs_top} ${onnxruntime_providers_nnapi_cc_srcs_nested}) source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs}) add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs}) @@ -677,6 +709,10 @@ if (onnxruntime_USE_NNAPI_BUILTIN) set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime") target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS}) set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX) + # ignore the warning unknown-pragmas on "pragma region" + if(NOT MSVC) + target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas") + endif() endif() if (onnxruntime_USE_RKNPU) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index fc2af0aea7..2bb775ddb9 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -236,7 +236,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& dim_2 = std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), 1, std::multiplies()); } -bool IsValidSupportedNodesVec(const std::vector& supported_node_vec, const GraphViewer& graph_viewer) { +bool IsValidSupportedNodesVec(const std::vector& supported_node_vec, const GraphViewer& graph_viewer) { if (supported_node_vec.empty()) return false; @@ -266,8 +266,8 @@ bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const Op } } -std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) { - std::vector> supported_node_vecs; +std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) { + std::vector> supported_node_vecs; if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) { LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver << "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL @@ -275,7 +275,7 @@ std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, return supported_node_vecs; } - std::vector supported_node_vec; + std::vector supported_node_vec; const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer.GetNode(node_indices[i])); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index ae824c4dfb..4bbe194741 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -8,10 +8,19 @@ #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h" // This is the minimal Android API Level required by ORT NNAPI EP to run +// ORT running on any host system with Android API level less than this will fall back to CPU EP #ifndef ORT_NNAPI_MIN_API_LEVEL #define ORT_NNAPI_MIN_API_LEVEL 27 #endif +// This is the maximum Android API level supported in the ort model conversion for NNAPI EP +// Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot +// get the actually Android system version. +// If running on an actual Android system, this value will be ignored +#ifndef ORT_NNAPI_MAX_SUPPORTED_API_LEVEL +#define ORT_NNAPI_MAX_SUPPORTED_API_LEVEL 30 +#endif + namespace onnxruntime { using Shape = std::vector; @@ -114,7 +123,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const OpSupportCheckParams& params); // Get a list of groups of supported nodes, each group represents a subgraph supported by NNAPI EP -std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params); +std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params); // Get string representation of a Shape std::string Shape2String(const std::vector& shape); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 22b0ebf674..5b908b1659 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -17,9 +17,7 @@ using namespace android::nn::wrapper; using std::vector; ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer) - : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) { - GetAllInitializers(); -} + : nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {} int32_t ModelBuilder::GetAndroidSdkVer() const { return nnapi_ ? nnapi_->android_sdk_version : 0; @@ -105,12 +103,6 @@ Status ModelBuilder::GetTargetDevices() { return Status::OK(); } -void ModelBuilder::GetAllInitializers() { - for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) { - initializers_.emplace(pair.first, *pair.second); - } -} - void ModelBuilder::PreprocessInitializers() { const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { @@ -172,13 +164,14 @@ std::unordered_map> GetAllQuantizedOpInputs(con Status ModelBuilder::RegisterInitializers() { // First pass to get all the stats of the initializers - auto initializer_size = initializers_.size(); + const auto& initializer_tensors(GetInitializerTensors()); + auto initializer_size = initializer_tensors.size(); std::vector> initializers(initializer_size); size_t sizeAll = 0; int i = 0; - for (const auto& pair : initializers_) { - const auto& tensor = pair.second; + for (const auto& pair : initializer_tensors) { + const auto& tensor = *pair.second; const auto& name = tensor.name(); if (Contains(skipped_initializers_, name)) continue; @@ -221,8 +214,8 @@ Status ModelBuilder::RegisterInitializers() { // 2nd pass to copy all the initializers into shared memory size_t offset = 0; - for (const auto& pair : initializers_) { - const auto& tensor = pair.second; + for (const auto& pair : initializer_tensors) { + const auto& tensor = *pair.second; if (Contains(skipped_initializers_, tensor.name())) continue; @@ -254,7 +247,7 @@ Status ModelBuilder::RegisterModelInputs() { if (Contains(operands_, input_name)) continue; - if (Contains(initializers_, input_name)) + if (Contains(GetInitializerTensors(), input_name)) continue; } @@ -566,7 +559,7 @@ void ModelBuilder::RegisterNHWCOperand(const std::string& name) { nhwc_operands_.insert(name); } -bool ModelBuilder::IsOperandNHWC(const std::string& name) { +bool ModelBuilder::IsOperandNHWC(const std::string& name) const { return Contains(nhwc_operands_, name); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index b5b54ff5d8..09a482138d 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -8,13 +8,13 @@ #include #include "core/providers/nnapi/nnapi_builtin/model.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h" +#include "op_support_checker.h" #include "shaper.h" namespace onnxruntime { namespace nnapi { class IOpBuilder; -class IOpSupportChecker; class ModelBuilder { public: @@ -96,7 +96,7 @@ class ModelBuilder { const GraphViewer& GetGraphViewer() const { return graph_viewer_; } void RegisterNHWCOperand(const std::string& name); - bool IsOperandNHWC(const std::string& name); + bool IsOperandNHWC(const std::string& name) const; // Get the operand transposed to nchw/nhwc from given nhwc/nchw operand, if it exists bool GetNCHWOperand(const std::string& nhwc_name, std::string& nchw_name); @@ -127,7 +127,6 @@ class ModelBuilder { std::unordered_set operands_; std::unordered_set fused_activations_; - std::unordered_map initializers_; std::unordered_set skipped_initializers_; // All activation nodes (Relu, Relu1, Relu6) as a map @@ -156,8 +155,7 @@ class ModelBuilder { Status Prepare() ORT_MUST_USE_RESULT; Status GetTargetDevices() ORT_MUST_USE_RESULT; - // Get names of all the initializers - void GetAllInitializers(); + // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list void PreprocessInitializers(); // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 5091f55674..91ce58597b 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -950,13 +950,11 @@ bool ClipOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial // TODO, support clip between 2 arbitrary numbers if ((min == 0.0f && max == 6.0f) || (min == -1.0f && max == 1.0f)) { return true; - } else { - LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is [" - << min << ", " << max << "]"; - return false; } - return true; + LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is [" + << min << ", " << max << "]"; + return false; } #pragma endregion @@ -1182,4 +1180,4 @@ const std::unordered_map>& GetOp #pragma endregion } // namespace nnapi -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h index ecce4084af..e3781fcb05 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h @@ -9,6 +9,11 @@ namespace onnxruntime { namespace nnapi { struct OpSupportCheckParams { + OpSupportCheckParams(int32_t android_sdk_ver, bool use_nchw) + : android_sdk_ver(android_sdk_ver), + use_nchw(use_nchw) { + } + int32_t android_sdk_ver = 0; bool use_nchw = false; }; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h index 3bfb09e54f..ea32f567c1 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h @@ -7,6 +7,8 @@ #include "core/platform/ort_mutex.h" #include "nnapi_lib/NeuralNetworksWrapper.h" +struct NnApi; + namespace onnxruntime { namespace nnapi { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index 3db36e2839..a4d9c18ee2 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -3,13 +3,18 @@ #include "nnapi_execution_provider.h" -#include "model.h" #include "builders/helper.h" -#include "builders/model_builder.h" #include "builders/op_support_checker.h" #include "core/framework/allocatormgr.h" #include "core/framework/compute_capability.h" +#include "core/graph/graph_viewer.h" #include "core/session/onnxruntime_cxx_api.h" +#include "nnapi_lib/nnapi_implementation.h" + +#ifdef __ANDROID__ +#include "model.h" +#include "builders/model_builder.h" +#endif namespace onnxruntime { @@ -53,9 +58,21 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view } } - nnapi::ModelBuilder builder(graph_view); + // We need to get the Android system API level to ensure the GetCapability giving the correct result + // based on the system. + // If we are actually running on Android system, we can get the API level by querying the system + // However, since we also allow the NNAPI EP run GetCapability for model conversion on a non-Android system, + // since we cannot get the runtime system API level, we have to specify it using complie definition. + int32_t android_sdk_ver; +#ifdef __ANDROID__ + const auto* _nnapi = NnApiImplementation(); + android_sdk_ver = _nnapi->android_sdk_version; +#else + android_sdk_ver = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL; +#endif + nnapi::OpSupportCheckParams params{ - builder.GetAndroidSdkVer(), + android_sdk_ver, !!(nnapi_flags_ & NNAPI_FLAG_USE_NCHW), }; const auto supported_nodes_vector = GetSupportedNodes(graph_view, params); @@ -177,6 +194,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view return result; } +#ifdef __ANDROID__ static Status GetOutputBuffer(Ort::CustomOpApi& ort, OrtKernelContext* context, const nnapi::Model& model, @@ -412,5 +430,22 @@ common::Status NnapiExecutionProvider::Compile(const std::vector& fused_nodes, + std::vector& node_compute_funcs) { + for (const auto* fused_node : fused_nodes) { + ORT_UNUSED_PARAMETER(fused_node); + NodeComputeInfo compute_info; + compute_info.create_state_func = [](ComputeContext* /*context*/, FunctionState* /*state*/) { return 0; }; + compute_info.release_state_func = [](FunctionState /*state*/) {}; + compute_info.compute_func = [](FunctionState /* state */, const OrtCustomOpApi* /* api */, OrtKernelContext* /* context */) { + return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "Compute is not supported in this build."); + }; + node_compute_funcs.push_back(compute_info); + } + return Status::OK(); +} +#endif + } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h index 021645be58..ff72e93570 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h @@ -24,10 +24,12 @@ class NnapiExecutionProvider : public IExecutionProvider { unsigned long GetNNAPIFlags() const { return nnapi_flags_; } private: - std::unordered_map> nnapi_models_; - // The bit flags which define bool options for NNAPI EP, bits are defined as // NNAPIFlags in include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h const unsigned long nnapi_flags_; + +#ifdef __ANDROID__ + std::unordered_map> nnapi_models_; +#endif }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h index c61fc458e9..c75e301e5d 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h @@ -16,10 +16,11 @@ // Provides C++ classes to more easily use the Neural Networks API. #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H -#include "nnapi_implementation.h" #include #include +#include "NeuralNetworksTypes.h" + template T Product(const std::vector& v) { return static_cast( diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index a5360392e3..cfbd22be19 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -612,7 +612,8 @@ TEST_P(ModelTest, Run) { #ifdef USE_NUPHAR provider_names.push_back(ORT_TSTR("nuphar")); #endif -#ifdef USE_NNAPI +// For any non-Android system, NNAPI will only be used for ort model converter +#if defined(USE_NNAPI) && defined(__ANDROID__) provider_names.push_back(ORT_TSTR("nnapi")); #endif #ifdef USE_RKNPU diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc index f53ab7a244..3fa8ef5711 100644 --- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc +++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "core/common/logging/logging.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" #include "core/session/inference_session.h" @@ -11,8 +14,9 @@ using namespace ONNX_NAMESPACE; using namespace ::onnxruntime::logging; namespace onnxruntime { - namespace test { + +#ifdef __ANDROID__ void VerifyOutputs(const std::vector& fetches, const std::vector& expected_dims, const std::vector& expected_values) { ASSERT_EQ(1, fetches.size()); @@ -22,6 +26,7 @@ void VerifyOutputs(const std::vector& fetches, const std::vector found(rtensor.template Data(), rtensor.template Data() + expected_values.size()); ASSERT_EQ(expected_values, found); } +#endif void RunAndVerifyOutputs(const std::string& model_file_name, const char* log_id, @@ -45,10 +50,18 @@ void RunAndVerifyOutputs(const std::string& model_file_name, ASSERT_EQ(1, graph.NumberOfNodes()); // Make sure the graph has 1 fused node ASSERT_EQ(onnxruntime::kNnapiExecutionProvider, graph.Nodes().cbegin()->GetExecutionProviderType()); +// The execution can only be performed on Android +#ifdef __ANDROID__ // Now run and verify the result std::vector fetches; ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &fetches)); VerifyOutputs(fetches, expected_dims, expected_values); +#else + ORT_UNUSED_PARAMETER(feeds); + ORT_UNUSED_PARAMETER(output_names); + ORT_UNUSED_PARAMETER(expected_dims); + ORT_UNUSED_PARAMETER(expected_values); +#endif } // Since NNAPI EP handles Reshape and Flatten differently, diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index e44ef0ee16..584001d788 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -95,7 +95,9 @@ std::unique_ptr DefaultNupharExecutionProvider(bool allow_un } std::unique_ptr DefaultNnapiExecutionProvider() { -#ifdef USE_NNAPI +// For any non - Android system, NNAPI will only be used for ort model converter +// Make it unavailable here, you can still manually append NNAPI EP to session for model conversion +#if defined(USE_NNAPI) && defined(__ANDROID__) return CreateExecutionProviderFactory_Nnapi(0)->CreateProvider(); #else return nullptr; diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 7465872f0b..2e4490114d 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -318,6 +318,9 @@ def parse_arguments(): help="Build with OpenVINO for specific hardware.") parser.add_argument( "--use_nnapi", action='store_true', help="Build with NNAPI support.") + parser.add_argument( + "--nnapi_min_api", type=int, + help="Minimum Android API level to enable NNAPI, should be no less than 27") parser.add_argument( "--use_rknpu", action='store_true', help="Build with RKNPU.") parser.add_argument( @@ -817,10 +820,12 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home cmake_args += ["-Donnxruntime_USE_PREINSTALLED_EIGEN=ON", "-Deigen_SOURCE_PATH=" + args.eigen_path] + if args.nnapi_min_api: + cmake_args += ["-Donnxruntime_NNAPI_MIN_API=" + str(args.nnapi_min_api)] + if args.android: cmake_args += [ - "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + - "/build/cmake/android.toolchain.cmake", + "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake", "-DANDROID_PLATFORM=android-" + str(args.android_api), "-DANDROID_ABI=" + str(args.android_abi) ] @@ -1830,6 +1835,12 @@ def main(): if args.minimal_build and args.disable_ort_format_load: raise BuildError('Minimal build requires loading ORT format models.') + if args.nnapi_min_api: + if not args.use_nnapi: + raise BuildError("Using --nnapi_min_api requires --use_nnapi") + if args.nnapi_min_api < 27: + raise BuildError("--nnapi_min_api should be 27+") + # Disabling unit tests for VAD-F as FPGA only supports # models with NCHW layout if args.use_openvino == "VAD-F_FP32":