[NNAPI EP] Make NNAPI EP build on non-Android Platform (#5779)

* Make NNAPI EP build on non-Android Platform

* minor updates

* Adress CR comments

* Fix build issue using Windows, address CR comments

* Fix linux build warnings

* Fix for test failure

* Fix for test failure

* Fix model_tests failure
This commit is contained in:
Guoyu Wang 2020-11-15 17:04:45 -08:00 committed by GitHub
parent 5b7dc5aeee
commit c4818d36ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 157 additions and 51 deletions

View file

@ -657,16 +657,48 @@ if (onnxruntime_USE_OPENVINO)
endif()
if (onnxruntime_USE_NNAPI_BUILTIN)
add_definitions(-DUSE_NNAPI=1)
add_compile_definitions(USE_NNAPI=1)
# This is the minimum Android API Level required by ORT NNAPI EP to run
# ORT running on any host system with Android API level less than this will fall back to CPU EP
if(onnxruntime_NNAPI_MIN_API)
add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API})
endif()
# This is the maximum Android API level supported in the ort model conversion for NNAPI EP
# Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
# get the actually Android system version.
if(onnxruntime_NNAPI_HOST_API)
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target")
endif()
add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API})
endif()
file(GLOB
onnxruntime_providers_nnapi_cc_srcs_top CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc"
)
file(GLOB_RECURSE
onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
)
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
file(GLOB_RECURSE
onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
)
else()
file(GLOB
onnxruntime_providers_nnapi_cc_srcs_nested CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.h"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/helper.cc"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.h"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc"
"${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
)
endif()
set(onnxruntime_providers_nnapi_cc_srcs ${onnxruntime_providers_nnapi_cc_srcs_top} ${onnxruntime_providers_nnapi_cc_srcs_nested})
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
@ -677,6 +709,10 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime")
target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS})
set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX)
# ignore the warning unknown-pragmas on "pragma region"
if(NOT MSVC)
target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas")
endif()
endif()
if (onnxruntime_USE_RKNPU)

View file

@ -236,7 +236,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
dim_2 = std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), 1, std::multiplies<int32_t>());
}
bool IsValidSupportedNodesVec(const std::vector<int>& supported_node_vec, const GraphViewer& graph_viewer) {
bool IsValidSupportedNodesVec(const std::vector<size_t>& supported_node_vec, const GraphViewer& graph_viewer) {
if (supported_node_vec.empty())
return false;
@ -266,8 +266,8 @@ bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const Op
}
}
std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
std::vector<std::vector<int>> supported_node_vecs;
std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
std::vector<std::vector<size_t>> supported_node_vecs;
if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) {
LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver
<< "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL
@ -275,7 +275,7 @@ std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer,
return supported_node_vecs;
}
std::vector<int> supported_node_vec;
std::vector<size_t> supported_node_vec;
const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
const auto* node(graph_viewer.GetNode(node_indices[i]));

View file

@ -8,10 +8,19 @@
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
// This is the minimal Android API Level required by ORT NNAPI EP to run
// ORT running on any host system with Android API level less than this will fall back to CPU EP
#ifndef ORT_NNAPI_MIN_API_LEVEL
#define ORT_NNAPI_MIN_API_LEVEL 27
#endif
// This is the maximum Android API level supported in the ort model conversion for NNAPI EP
// Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
// get the actually Android system version.
// If running on an actual Android system, this value will be ignored
#ifndef ORT_NNAPI_MAX_SUPPORTED_API_LEVEL
#define ORT_NNAPI_MAX_SUPPORTED_API_LEVEL 30
#endif
namespace onnxruntime {
using Shape = std::vector<uint32_t>;
@ -114,7 +123,7 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
// Get a list of groups of supported nodes, each group represents a subgraph supported by NNAPI EP
std::vector<std::vector<int>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
// Get string representation of a Shape
std::string Shape2String(const std::vector<uint32_t>& shape);

View file

@ -17,9 +17,7 @@ using namespace android::nn::wrapper;
using std::vector;
ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer)
: nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {
GetAllInitializers();
}
: nnapi_(NnApiImplementation()), graph_viewer_(graph_viewer) {}
int32_t ModelBuilder::GetAndroidSdkVer() const {
return nnapi_ ? nnapi_->android_sdk_version : 0;
@ -105,12 +103,6 @@ Status ModelBuilder::GetTargetDevices() {
return Status::OK();
}
void ModelBuilder::GetAllInitializers() {
for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) {
initializers_.emplace(pair.first, *pair.second);
}
}
void ModelBuilder::PreprocessInitializers() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
@ -172,13 +164,14 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con
Status ModelBuilder::RegisterInitializers() {
// First pass to get all the stats of the initializers
auto initializer_size = initializers_.size();
const auto& initializer_tensors(GetInitializerTensors());
auto initializer_size = initializer_tensors.size();
std::vector<std::tuple<uint32_t, size_t, size_t>> initializers(initializer_size);
size_t sizeAll = 0;
int i = 0;
for (const auto& pair : initializers_) {
const auto& tensor = pair.second;
for (const auto& pair : initializer_tensors) {
const auto& tensor = *pair.second;
const auto& name = tensor.name();
if (Contains(skipped_initializers_, name))
continue;
@ -221,8 +214,8 @@ Status ModelBuilder::RegisterInitializers() {
// 2nd pass to copy all the initializers into shared memory
size_t offset = 0;
for (const auto& pair : initializers_) {
const auto& tensor = pair.second;
for (const auto& pair : initializer_tensors) {
const auto& tensor = *pair.second;
if (Contains(skipped_initializers_, tensor.name()))
continue;
@ -254,7 +247,7 @@ Status ModelBuilder::RegisterModelInputs() {
if (Contains(operands_, input_name))
continue;
if (Contains(initializers_, input_name))
if (Contains(GetInitializerTensors(), input_name))
continue;
}
@ -566,7 +559,7 @@ void ModelBuilder::RegisterNHWCOperand(const std::string& name) {
nhwc_operands_.insert(name);
}
bool ModelBuilder::IsOperandNHWC(const std::string& name) {
bool ModelBuilder::IsOperandNHWC(const std::string& name) const {
return Contains(nhwc_operands_, name);
}

View file

@ -8,13 +8,13 @@
#include <core/graph/graph_viewer.h>
#include "core/providers/nnapi/nnapi_builtin/model.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
#include "op_support_checker.h"
#include "shaper.h"
namespace onnxruntime {
namespace nnapi {
class IOpBuilder;
class IOpSupportChecker;
class ModelBuilder {
public:
@ -96,7 +96,7 @@ class ModelBuilder {
const GraphViewer& GetGraphViewer() const { return graph_viewer_; }
void RegisterNHWCOperand(const std::string& name);
bool IsOperandNHWC(const std::string& name);
bool IsOperandNHWC(const std::string& name) const;
// Get the operand transposed to nchw/nhwc from given nhwc/nchw operand, if it exists
bool GetNCHWOperand(const std::string& nhwc_name, std::string& nchw_name);
@ -127,7 +127,6 @@ class ModelBuilder {
std::unordered_set<std::string> operands_;
std::unordered_set<std::string> fused_activations_;
std::unordered_map<std::string, const ONNX_NAMESPACE::TensorProto&> initializers_;
std::unordered_set<std::string> skipped_initializers_;
// All activation nodes (Relu, Relu1, Relu6) as a map <NodeIndex, activation_code>
@ -156,8 +155,7 @@ class ModelBuilder {
Status Prepare() ORT_MUST_USE_RESULT;
Status GetTargetDevices() ORT_MUST_USE_RESULT;
// Get names of all the initializers
void GetAllInitializers();
// If a NNAPI operation will use initializers directly, we will add the initializers to the skip list
void PreprocessInitializers();
// Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later

View file

@ -950,13 +950,11 @@ bool ClipOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
// TODO, support clip between 2 arbitrary numbers
if ((min == 0.0f && max == 6.0f) || (min == -1.0f && max == 1.0f)) {
return true;
} else {
LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
<< min << ", " << max << "]";
return false;
}
return true;
LOGS_DEFAULT(VERBOSE) << "Clip only supports [min, max] = [0, 6] or [-1, 1], the input is ["
<< min << ", " << max << "]";
return false;
}
#pragma endregion
@ -1182,4 +1180,4 @@ const std::unordered_map<std::string, std::shared_ptr<IOpSupportChecker>>& GetOp
#pragma endregion
} // namespace nnapi
} // namespace onnxruntime
} // namespace onnxruntime

View file

@ -9,6 +9,11 @@ namespace onnxruntime {
namespace nnapi {
struct OpSupportCheckParams {
OpSupportCheckParams(int32_t android_sdk_ver, bool use_nchw)
: android_sdk_ver(android_sdk_ver),
use_nchw(use_nchw) {
}
int32_t android_sdk_ver = 0;
bool use_nchw = false;
};

View file

@ -7,6 +7,8 @@
#include "core/platform/ort_mutex.h"
#include "nnapi_lib/NeuralNetworksWrapper.h"
struct NnApi;
namespace onnxruntime {
namespace nnapi {

View file

@ -3,13 +3,18 @@
#include "nnapi_execution_provider.h"
#include "model.h"
#include "builders/helper.h"
#include "builders/model_builder.h"
#include "builders/op_support_checker.h"
#include "core/framework/allocatormgr.h"
#include "core/framework/compute_capability.h"
#include "core/graph/graph_viewer.h"
#include "core/session/onnxruntime_cxx_api.h"
#include "nnapi_lib/nnapi_implementation.h"
#ifdef __ANDROID__
#include "model.h"
#include "builders/model_builder.h"
#endif
namespace onnxruntime {
@ -53,9 +58,21 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
}
}
nnapi::ModelBuilder builder(graph_view);
// We need to get the Android system API level to ensure the GetCapability giving the correct result
// based on the system.
// If we are actually running on Android system, we can get the API level by querying the system
// However, since we also allow the NNAPI EP run GetCapability for model conversion on a non-Android system,
// since we cannot get the runtime system API level, we have to specify it using complie definition.
int32_t android_sdk_ver;
#ifdef __ANDROID__
const auto* _nnapi = NnApiImplementation();
android_sdk_ver = _nnapi->android_sdk_version;
#else
android_sdk_ver = ORT_NNAPI_MAX_SUPPORTED_API_LEVEL;
#endif
nnapi::OpSupportCheckParams params{
builder.GetAndroidSdkVer(),
android_sdk_ver,
!!(nnapi_flags_ & NNAPI_FLAG_USE_NCHW),
};
const auto supported_nodes_vector = GetSupportedNodes(graph_view, params);
@ -177,6 +194,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
return result;
}
#ifdef __ANDROID__
static Status GetOutputBuffer(Ort::CustomOpApi& ort,
OrtKernelContext* context,
const nnapi::Model& model,
@ -412,5 +430,22 @@ common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::No
node_compute_funcs.push_back(compute_info);
}
return Status::OK();
} // namespace onnxruntime
}
#else
common::Status NnapiExecutionProvider::Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
std::vector<NodeComputeInfo>& node_compute_funcs) {
for (const auto* fused_node : fused_nodes) {
ORT_UNUSED_PARAMETER(fused_node);
NodeComputeInfo compute_info;
compute_info.create_state_func = [](ComputeContext* /*context*/, FunctionState* /*state*/) { return 0; };
compute_info.release_state_func = [](FunctionState /*state*/) {};
compute_info.compute_func = [](FunctionState /* state */, const OrtCustomOpApi* /* api */, OrtKernelContext* /* context */) {
return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "Compute is not supported in this build.");
};
node_compute_funcs.push_back(compute_info);
}
return Status::OK();
}
#endif
} // namespace onnxruntime

View file

@ -24,10 +24,12 @@ class NnapiExecutionProvider : public IExecutionProvider {
unsigned long GetNNAPIFlags() const { return nnapi_flags_; }
private:
std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
// The bit flags which define bool options for NNAPI EP, bits are defined as
// NNAPIFlags in include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h
const unsigned long nnapi_flags_;
#ifdef __ANDROID__
std::unordered_map<std::string, std::unique_ptr<onnxruntime::nnapi::Model>> nnapi_models_;
#endif
};
} // namespace onnxruntime

View file

@ -16,10 +16,11 @@
// Provides C++ classes to more easily use the Neural Networks API.
#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
#include "nnapi_implementation.h"
#include <vector>
#include <numeric>
#include "NeuralNetworksTypes.h"
template <typename T>
T Product(const std::vector<T>& v) {
return static_cast<T>(

View file

@ -612,7 +612,8 @@ TEST_P(ModelTest, Run) {
#ifdef USE_NUPHAR
provider_names.push_back(ORT_TSTR("nuphar"));
#endif
#ifdef USE_NNAPI
// For any non-Android system, NNAPI will only be used for ort model converter
#if defined(USE_NNAPI) && defined(__ANDROID__)
provider_names.push_back(ORT_TSTR("nnapi"));
#endif
#ifdef USE_RKNPU

View file

@ -1,3 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/common/logging/logging.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h"
#include "core/session/inference_session.h"
@ -11,8 +14,9 @@ using namespace ONNX_NAMESPACE;
using namespace ::onnxruntime::logging;
namespace onnxruntime {
namespace test {
#ifdef __ANDROID__
void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
const std::vector<float>& expected_values) {
ASSERT_EQ(1, fetches.size());
@ -22,6 +26,7 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
const std::vector<float> found(rtensor.template Data<float>(), rtensor.template Data<float>() + expected_values.size());
ASSERT_EQ(expected_values, found);
}
#endif
void RunAndVerifyOutputs(const std::string& model_file_name,
const char* log_id,
@ -45,10 +50,18 @@ void RunAndVerifyOutputs(const std::string& model_file_name,
ASSERT_EQ(1, graph.NumberOfNodes()); // Make sure the graph has 1 fused node
ASSERT_EQ(onnxruntime::kNnapiExecutionProvider, graph.Nodes().cbegin()->GetExecutionProviderType());
// The execution can only be performed on Android
#ifdef __ANDROID__
// Now run and verify the result
std::vector<OrtValue> fetches;
ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &fetches));
VerifyOutputs(fetches, expected_dims, expected_values);
#else
ORT_UNUSED_PARAMETER(feeds);
ORT_UNUSED_PARAMETER(output_names);
ORT_UNUSED_PARAMETER(expected_dims);
ORT_UNUSED_PARAMETER(expected_values);
#endif
}
// Since NNAPI EP handles Reshape and Flatten differently,

View file

@ -95,7 +95,9 @@ std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_un
}
std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider() {
#ifdef USE_NNAPI
// For any non - Android system, NNAPI will only be used for ort model converter
// Make it unavailable here, you can still manually append NNAPI EP to session for model conversion
#if defined(USE_NNAPI) && defined(__ANDROID__)
return CreateExecutionProviderFactory_Nnapi(0)->CreateProvider();
#else
return nullptr;

View file

@ -318,6 +318,9 @@ def parse_arguments():
help="Build with OpenVINO for specific hardware.")
parser.add_argument(
"--use_nnapi", action='store_true', help="Build with NNAPI support.")
parser.add_argument(
"--nnapi_min_api", type=int,
help="Minimum Android API level to enable NNAPI, should be no less than 27")
parser.add_argument(
"--use_rknpu", action='store_true', help="Build with RKNPU.")
parser.add_argument(
@ -817,10 +820,12 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
cmake_args += ["-Donnxruntime_USE_PREINSTALLED_EIGEN=ON",
"-Deigen_SOURCE_PATH=" + args.eigen_path]
if args.nnapi_min_api:
cmake_args += ["-Donnxruntime_NNAPI_MIN_API=" + str(args.nnapi_min_api)]
if args.android:
cmake_args += [
"-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path +
"/build/cmake/android.toolchain.cmake",
"-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake",
"-DANDROID_PLATFORM=android-" + str(args.android_api),
"-DANDROID_ABI=" + str(args.android_abi)
]
@ -1830,6 +1835,12 @@ def main():
if args.minimal_build and args.disable_ort_format_load:
raise BuildError('Minimal build requires loading ORT format models.')
if args.nnapi_min_api:
if not args.use_nnapi:
raise BuildError("Using --nnapi_min_api requires --use_nnapi")
if args.nnapi_min_api < 27:
raise BuildError("--nnapi_min_api should be 27+")
# Disabling unit tests for VAD-F as FPGA only supports
# models with NCHW layout
if args.use_openvino == "VAD-F_FP32":