diff --git a/.gitmodules b/.gitmodules index 23bdfa36ce..f9be021588 100644 --- a/.gitmodules +++ b/.gitmodules @@ -25,9 +25,6 @@ [submodule "cmake/external/eigen"] path = cmake/external/eigen url = https://gitlab.com/libeigen/eigen.git -[submodule "cmake/external/DNNLibrary"] - path = cmake/external/DNNLibrary - url = https://github.com/JDAI-CV/DNNLibrary [submodule "cmake/external/horovod"] path = cmake/external/horovod url = https://github.com/horovod/horovod.git diff --git a/BUILD.md b/BUILD.md index 219508df21..e43593fbbf 100644 --- a/BUILD.md +++ b/BUILD.md @@ -257,13 +257,13 @@ DNNL: `./build.sh --use_dnnl` #### Deprecation Notice | | | -| --- | --- | +| --- | --- | | Deprecation Begins | June 1, 2020 | | Removal Date | December 1, 2020 | Starting with the OpenVINO™ toolkit 2020.2 release, all of the features previously available through nGraph have been merged into the OpenVINO™ toolkit. As a result, all the features previously available through ONNX RT Execution Provider for nGraph have been merged with ONNX RT Execution Provider for OpenVINO™ toolkit. -Therefore, ONNX RT Execution Provider for **nGraph** will be deprecated starting June 1, 2020 and will be completely removed on December 1, 2020. Users are recommended to migrate to the ONNX RT Execution Provider for OpenVINO™ toolkit as the unified solution for all AI inferencing on Intel® hardware. +Therefore, ONNX RT Execution Provider for **nGraph** will be deprecated starting June 1, 2020 and will be completely removed on December 1, 2020. Users are recommended to migrate to the ONNX RT Execution Provider for OpenVINO™ toolkit as the unified solution for all AI inferencing on Intel® hardware. See more information on the nGraph Execution Provider [here](./docs/execution_providers/nGraph-ExecutionProvider.md). @@ -345,31 +345,6 @@ For more information on OpenVINO Execution Provider's ONNX Layer support, To --- -### Android NNAPI - -See more information on the NNAPI Execution Provider [here](./docs/execution_providers/NNAPI-ExecutionProvider.md). - -#### Prerequisites - -To build ONNX Runtime with the NN API EP, first install Android NDK (see [Android Build instructions](#android)) - -#### Build Instructions - -The basic build commands are below. There are also some other parameters for building the Android version. See [Android Build instructions](#android) for more details. - -##### Cross compiling on Windows - -```bash -./build.bat --android --android_sdk_path --android_ndk_path --use_dnnlibrary -``` - -##### Cross compiling on Linux - -```bash -./build.sh --android --android_sdk_path --android_ndk_path --use_dnnlibrary -``` ---- - ### NUPHAR See more information on the Nuphar Execution Provider [here](./docs/execution_providers/Nuphar-ExecutionProvider.md). @@ -983,7 +958,7 @@ Install an NDK version - NDK path in our example with this install would be `.../Android/ndk/21.1.6352462` - NOTE: If you install the ndk-bundle package the path will be `.../Android/ndk-bundle` as there's no version number -#### Build Instructions +#### Android Build Instructions ##### Cross compiling on Windows @@ -998,14 +973,23 @@ e.g. using the paths from our example ./build.bat --android --android_sdk_path .../Android --android_ndk_path .../Android/ndk/21.1.6352462 --android_abi arm64-v8a --android_api 27 --cmake_generator Ninja ``` -##### Cross compiling on Linux +##### Cross compiling on Linux and macOS ``` ./build.sh --android --android_sdk_path --android_ndk_path --android_abi --android_api ``` -Android Archive (AAR) files, which can be imported directly in Android Studio, will be generated in your_build_dir/java/build/outputs/aar. -If you want to use NNAPI Execution Provider on Android, see [docs/execution_providers/NNAPI-ExecutionProvider.md](/docs/execution_providers/NNAPI-ExecutionProvider.md). +##### Build Android Archive (AAR) + +Android Archive (AAR) files, which can be imported directly in Android Studio, will be generated in your_build_dir/java/build/outputs/aar, by using the above building commands with `--build_java` + +#### Android NNAPI Execution Provider + +If you want to use NNAPI Execution Provider on Android, see [NNAPI Execution Provider](/docs/execution_providers/NNAPI-ExecutionProvider.md). + +##### Build Instructions + +Android NNAPI Execution Provider can be built using building commands in [Android Build instructions](#android-build-instructions) with `--use_nnapi` --- @@ -1014,7 +998,7 @@ If you want to use NNAPI Execution Provider on Android, see [docs/execution_prov See more information on the MIGraphX Execution Provider [here](./docs/execution_providers/MIGraphX-ExecutionProvider.md). #### Prerequisites -* Install [ROCM](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) +* Install [ROCM](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) * The MIGraphX execution provider for ONNX Runtime is built and tested with ROCM3.3 * Install [MIGraphX](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX) * The path to MIGraphX installation must be provided via the `--migraphx_home parameter`. diff --git a/cgmanifests/cgmanifest.json b/cgmanifests/cgmanifest.json index 944f5b730b..eb4042a1ec 100644 --- a/cgmanifests/cgmanifest.json +++ b/cgmanifests/cgmanifest.json @@ -173,15 +173,6 @@ } } }, - { - "component": { - "type": "git", - "git": { - "commitHash": "647d4c3f4d47d9cf63fb90ec175c414a005adea7", - "repositoryUrl": "https://github.com/JDAI-CV/DNNLibrary.git" - } - } - }, { "component": { "Type": "other", @@ -308,4 +299,4 @@ } ], "Version": 1 -} +} \ No newline at end of file diff --git a/cgmanifests/submodules/cgmanifest.json b/cgmanifests/submodules/cgmanifest.json index 45466db2aa..916ba53cf6 100644 --- a/cgmanifests/submodules/cgmanifest.json +++ b/cgmanifests/submodules/cgmanifest.json @@ -1,126 +1,6 @@ { "Version": 1, "Registrations": [ - { - "component": { - "type": "git", - "git": { - "commitHash": "e17f11e966b2cce7d747799b76bb9843813d4b01", - "repositoryUrl": "https://github.com/JDAI-CV/DNNLibrary" - }, - "comments": "git submodule at cmake/external/DNNLibrary" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "9e7e8cbe9f675123dd41b7c62868acad39188cae", - "repositoryUrl": "https://github.com/google/flatbuffers" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/flatbuffers" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "8d7a107d68c127f3f494bb7807b796c8c5a97a82", - "repositoryUrl": "https://github.com/google/glog" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/glog" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "94d238d96e3fb3a7ba34f03c284b9ad3516163be", - "repositoryUrl": "https://github.com/onnx/onnx" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/onnx" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "e776aa0275e293707b6a0901e0e8d8a8a3679508", - "repositoryUrl": "https://github.com/google/benchmark.git" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/onnx/third_party/benchmark" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "a1041190c8b8ff0cd9e2f0752248ad5e3789ea0c", - "repositoryUrl": "https://github.com/pybind/pybind11.git" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/onnx/third_party/pybind11" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "6a00cbc4a9b8e68b71caf7f774b3f9c753ae84d5", - "repositoryUrl": "https://github.com/wjakob/clang-cindex-python3" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/onnx/third_party/pybind11/tools/clang" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "6973c3a5041636c1d8dc5f7f6c8c1f3c15bc63d6", - "repositoryUrl": "https://github.com/google/protobuf/" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/protobuf" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "5b7683f49e1e9223cf9927b24f6fd3d6bd82e3f8", - "repositoryUrl": "https://github.com/google/benchmark.git" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/protobuf/third_party/benchmark" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "c3bb0ee2a63279a803aaad956b9b26d74bf9e6e2", - "repositoryUrl": "https://github.com/google/googletest.git" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/protobuf/third_party/googletest" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "9bb3313162c0b856125e481ceece9d8faa567716", - "repositoryUrl": "https://github.com/pybind/pybind11" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/pybind11" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "6a00cbc4a9b8e68b71caf7f774b3f9c753ae84d5", - "repositoryUrl": "https://github.com/wjakob/clang-cindex-python3" - }, - "comments": "git submodule at cmake/external/DNNLibrary/third_party/pybind11/tools/clang" - } - }, { "component": { "type": "git", diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3a9cd9d383..374d28e3ee 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -50,7 +50,6 @@ option(onnxruntime_ENABLE_MEMLEAK_CHECKER "Experimental: Enable memory leak chec option(onnxruntime_USE_CUDA "Build with CUDA support" OFF) option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF) option(onnxruntime_USE_EIGEN_FOR_BLAS "Use eign for blas" ON) -option(onnxruntime_USE_NNAPI_DNNLIBRARY "Build with DNNLibrary for Android NNAPI support" OFF) option(onnxruntime_USE_NNAPI_BUILTIN "Build with builtin NNAPI lib for Android NNAPI support" OFF) option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF) option(onnxruntime_USE_DNNL "Build with DNNL support" OFF) @@ -199,10 +198,6 @@ if(onnxruntime_ENABLE_LTO) endif() endif() -if(onnxruntime_USE_NNAPI_BUILTIN AND onnxruntime_USE_NNAPI_DNNLIBRARY) - message(FATAL_ERROR "Please use only one of onnxruntime_USE_NNAPI_BUILTIN, onnxruntime_USE_NNAPI_DNNLIBRARY") -endif() - if(onnxruntime_DISABLE_RTTI) add_compile_definitions(ORT_NO_RTTI GOOGLE_PROTOBUF_NO_RTTI) if(MSVC) @@ -924,7 +919,7 @@ if (onnxruntime_USE_TENSORRT) # needs to link with stdc++fs in Linux if (NOT APPLE) list(APPEND onnxruntime_EXTERNAL_LIBRARIES stdc++fs) - endif() + endif() endif() endif() @@ -998,7 +993,7 @@ if (onnxruntime_ENABLE_TRAINING) if(NOT DEFINED onnxruntime_MPI_HOME) execute_process(COMMAND mpirun --version OUTPUT_VARIABLE MPIRUN_OUTPUT) else() - execute_process(COMMAND ${onnxruntime_MPI_HOME}/bin/mpirun --version OUTPUT_VARIABLE MPIRUN_OUTPUT) + execute_process(COMMAND ${onnxruntime_MPI_HOME}/bin/mpirun --version OUTPUT_VARIABLE MPIRUN_OUTPUT) endif(NOT DEFINED onnxruntime_MPI_HOME) string( REGEX MATCH "[0-9]+.[0-9]+.[0-9]" MPI_VERSION ${MPIRUN_OUTPUT}) message( STATUS "MPI Version: ${MPI_VERSION}") diff --git a/cmake/external/DNNLibrary b/cmake/external/DNNLibrary deleted file mode 160000 index e17f11e966..0000000000 --- a/cmake/external/DNNLibrary +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e17f11e966b2cce7d747799b76bb9843813d4b01 diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index dc10831d99..8cf84f32fb 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -76,7 +76,7 @@ endif() if (onnxruntime_USE_TENSORRT) target_compile_definitions(onnxruntime4j_jni PRIVATE USE_TENSORRT=1) endif() -if (onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if (onnxruntime_USE_NNAPI_BUILTIN) target_compile_definitions(onnxruntime4j_jni PRIVATE USE_NNAPI=1) endif() if (onnxruntime_USE_NUPHAR) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index a9e246dbca..65f74790c9 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,7 +59,7 @@ if(onnxruntime_USE_TENSORRT) set(PROVIDERS_TENSORRT onnxruntime_providers_tensorrt) list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt) endif() -if(onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) set(PROVIDERS_NNAPI onnxruntime_providers_nnapi) list(APPEND ONNXRUNTIME_PROVIDER_NAMES nnapi) endif() @@ -159,7 +159,7 @@ if (onnxruntime_ENABLE_TRAINING) if (onnxruntime_USE_HOROVOD) target_include_directories(onnxruntime_providers PRIVATE ${HOROVOD_INCLUDE_DIRS}) endif() - if (onnxruntime_USE_NCCL OR onnxruntime_USE_HOROVOD) + if (onnxruntime_USE_NCCL OR onnxruntime_USE_HOROVOD) target_include_directories(onnxruntime_providers PUBLIC ${MPI_INCLUDE_DIRS}) endif() endif() @@ -509,36 +509,8 @@ if (onnxruntime_USE_OPENVINO) endif() -if (onnxruntime_USE_NNAPI_DNNLIBRARY) +if (onnxruntime_USE_NNAPI_BUILTIN) add_definitions(-DUSE_NNAPI=1) - add_definitions(-DUSE_NNAPI_DNNLIBRARY=1) - option(DNN_READ_ONNX "" ON) - set(DNN_CUSTOM_PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) - option(DNN_CMAKE_INSTALL "" OFF) - option(DNN_BUILD_BIN "" OFF) - add_subdirectory(${REPO_ROOT}/cmake/external/DNNLibrary) - file(GLOB - onnxruntime_providers_nnapi_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc" - "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_dnnlibrary/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_dnnlibrary/*.cc" - ) - source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs}) - add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs}) - onnxruntime_add_include_to_target(onnxruntime_providers_nnapi onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite dnnlibrary::dnnlibrary) - target_link_libraries(onnxruntime_providers_nnapi dnnlibrary::dnnlibrary) - add_dependencies(onnxruntime_providers_nnapi - dnnlibrary::dnnlibrary - onnx ${onnxruntime_EXTERNAL_DEPENDENCIES}) - # Header files of DNNLibrary requires C++17, fortunately, all modern Android NDKs support C++17 - set_target_properties(onnxruntime_providers_nnapi PROPERTIES CXX_STANDARD 17) - set_target_properties(onnxruntime_providers_nnapi PROPERTIES CXX_STANDARD_REQUIRED ON) - set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime") - target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS}) - set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX) -elseif (onnxruntime_USE_NNAPI_BUILTIN) - add_definitions(-DUSE_NNAPI=1) - add_definitions(-DUSE_NNAPI_BUILTIN=1) file(GLOB onnxruntime_providers_nnapi_cc_srcs_top CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc" diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 12dd6e0053..113ec17851 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -207,7 +207,7 @@ if (onnxruntime_USE_NGRAPH) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_ngraph_src}) endif() -if (onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if (onnxruntime_USE_NNAPI_BUILTIN) file(GLOB_RECURSE onnxruntime_test_providers_nnapi_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/nnapi/*" ) @@ -303,7 +303,7 @@ if(onnxruntime_USE_OPENVINO) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_openvino) endif() -if(onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi) endif() @@ -393,7 +393,7 @@ if(onnxruntime_USE_TENSORRT) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_tensorrt) endif() -if(onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/nnapi/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_nnapi) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi) @@ -692,9 +692,6 @@ add_test(NAME onnx_test_pytorch_operator if (CMAKE_SYSTEM_NAME STREQUAL "Android") list(APPEND android_shared_libs log android) - if (onnxruntime_USE_NNAPI_DNNLIBRARY) - list(APPEND android_shared_libs neuralnetworks) - endif() endif() #perf test runner diff --git a/docs/execution_providers/NNAPI-ExecutionProvider.md b/docs/execution_providers/NNAPI-ExecutionProvider.md index e6dd8452c8..59b8659a8f 100644 --- a/docs/execution_providers/NNAPI-ExecutionProvider.md +++ b/docs/execution_providers/NNAPI-ExecutionProvider.md @@ -1,14 +1,14 @@ # NNAPI Execution Provider -[Android Neural Networks API (NNAPI)](https://developer.android.com/ndk/guides/neuralnetworks) is a unified interface to CPU, GPU, and NN accelerators on Android. It is supported by onnxruntime via [DNNLibrary](https://github.com/JDAI-CV/DNNLibrary). +[Android Neural Networks API (NNAPI)](https://developer.android.com/ndk/guides/neuralnetworks) is a unified interface to CPU, GPU, and NN accelerators on Android. ## Minimum requirements -The NNAPI EP requires Android devices with Android 8.1 or higher. +The NNAPI EP requires Android devices with Android 8.1 or higher, it is recommended to use Android devices with Android 9 or higher to achieve optimal performance. ## Build NNAPI EP -For build instructions, please see the [BUILD page](../../BUILD.md#Android-NNAPI). +For build instructions, please see the [BUILD page](../../BUILD.md#Android-NNAPI-Execution-Provider). ## Using NNAPI EP in C/C++ @@ -27,11 +27,3 @@ session_object.RegisterExecutionProvider(std::make_unique<::onnxruntime::NnapiEx status = session_object.Load(model_file_name); ``` The C API details are [here](../C_API.md#c-api). - -## Performance - -![NNAPI EP on RK3399](./images/nnapi-ep-rk3399.png) - -![NNAPI EP on OnePlus 6T](./images/nnapi-ep-oneplus6t.png) - -![NNAPI EP on Huawei Honor V10](./images/nnapi-ep-huaweihonorv10.png) diff --git a/docs/execution_providers/images/nnapi-ep-huaweihonorv10.png b/docs/execution_providers/images/nnapi-ep-huaweihonorv10.png deleted file mode 100644 index f3c5cd2a0c..0000000000 Binary files a/docs/execution_providers/images/nnapi-ep-huaweihonorv10.png and /dev/null differ diff --git a/docs/execution_providers/images/nnapi-ep-oneplus6t.png b/docs/execution_providers/images/nnapi-ep-oneplus6t.png deleted file mode 100644 index 7d626a3572..0000000000 Binary files a/docs/execution_providers/images/nnapi-ep-oneplus6t.png and /dev/null differ diff --git a/docs/execution_providers/images/nnapi-ep-rk3399.png b/docs/execution_providers/images/nnapi-ep-rk3399.png deleted file mode 100644 index d83784fca5..0000000000 Binary files a/docs/execution_providers/images/nnapi-ep-rk3399.png and /dev/null differ diff --git a/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.cc deleted file mode 100644 index d80f51191c..0000000000 --- a/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.cc +++ /dev/null @@ -1,328 +0,0 @@ -// Copyright 2019 JD.com Inc. JD AI - -#include "nnapi_execution_provider.h" -#include "core/framework/allocatormgr.h" -#include "core/framework/compute_capability.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/session/inference_session.h" -#include "core/graph/model.h" -#include "dnnlibrary/ModelBuilder.h" -#include "dnnlibrary/OnnxReader.h" -#include "tools/onnx2daq/OnnxConverter.h" - -namespace onnxruntime { - -constexpr const char* NNAPI = "Nnapi"; - -NnapiExecutionProvider::NnapiExecutionProvider() - : IExecutionProvider{onnxruntime::kNnapiExecutionProvider} { - DeviceAllocatorRegistrationInfo device_info( - {OrtMemTypeDefault, - [](int) { - return onnxruntime::make_unique(OrtMemoryInfo(NNAPI, OrtAllocatorType::OrtDeviceAllocator)); - }, - std::numeric_limits::max()}); - - InsertAllocator(CreateAllocator(device_info)); - - DeviceAllocatorRegistrationInfo cpu_memory_info( - {OrtMemTypeCPUOutput, - [](int) { - return onnxruntime::make_unique( - OrtMemoryInfo(NNAPI, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeCPUOutput)); - }, - std::numeric_limits::max()}); - - InsertAllocator(CreateAllocator(cpu_memory_info)); -} - -NnapiExecutionProvider::~NnapiExecutionProvider() {} - -std::vector> NnapiExecutionProvider::GetSupportedNodes(const ONNX_NAMESPACE::ModelProto& model_proto) const { - dnn::OnnxConverter converter; - const auto nodes = converter.GetSupportedNodes(model_proto); - if (!nodes) { - return {{}}; - } - return nodes.value(); -} - -std::vector> -NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, - const std::vector& /*kernel_registries*/) const { - // This method is based on that of TRT EP - // Construct modelproto from graph - onnxruntime::Model model(graph.Name(), true, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), - graph.DomainToVersionMap(), std::vector(), *GetLogger()); - onnxruntime::Graph& graph_build = model.MainGraph(); - const std::vector& node_index = graph.GetNodesInTopologicalOrder(); - std::set all_node_inputs; - for (const auto& node : graph.Nodes()) { - std::vector inputs, outputs; - for (auto input : node.InputDefs()) { - auto& n_input = graph_build.GetOrCreateNodeArg(input->Name(), input->TypeAsProto()); - inputs.push_back(&n_input); - all_node_inputs.insert(&n_input); - } - for (auto output : node.OutputDefs()) { - auto& n_output = graph_build.GetOrCreateNodeArg(output->Name(), output->TypeAsProto()); - outputs.push_back(&n_output); - } - graph_build.AddNode(node.Name(), node.OpType(), node.Description(), inputs, outputs, &node.GetAttributes(), node.Domain()); - } - const auto graph_outputs = graph.GetOutputs(); - //Add initializer to graph - const auto& init_tensors = graph.GetAllInitializedTensors(); - for (const auto& tensor : init_tensors) { - graph_build.AddInitializedTensor(*(tensor.second)); - } - - ORT_ENFORCE(graph_build.Resolve().IsOK()); - ONNX_NAMESPACE::ModelProto model_proto = model.ToProto(); - model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - - const auto supported_nodes_vector = GetSupportedNodes(model_proto); - - std::unique_ptr sub_graph = onnxruntime::make_unique(); - - // Find inputs, initializers and outputs for each supported subgraph - std::vector> result; - - int counter = 0; - - for (const auto& group : supported_nodes_vector) { - if (!group.empty()) { - std::unordered_set node_set; - node_set.reserve(group.size()); - for (const auto& index : group) { - node_set.insert(node_index[index]); - } - std::unique_ptr sub_graph = onnxruntime::make_unique(); - // Find inputs and outputs of the subgraph - std::unordered_map fused_inputs, fused_outputs, fused_outputs_to_add; - std::unordered_set erased; - int input_order = 0; - int output_order = 0; - - for (const auto& index : group) { - sub_graph->nodes.push_back(node_index[index]); - const auto& node = graph.GetNode(node_index[index]); - - for (const auto& input : node->InputDefs()) { - const auto& it = fused_outputs.find(input); - - if (it != fused_outputs.end()) { - fused_outputs.erase(it); - erased.insert(input); - } - //only when input is neither in output list nor erased list, add the input to input list - else if (erased.find(input) == erased.end()) { - fused_inputs[input] = input_order++; - } - } - - // For output searching, there is a special case: - // If node's OutputEdges are more than its outputs, meaning certain output is used more than once, - // if the output is connected to nodes that don't belong to the subgraph, the output need to be added - // to the output list - if (node->GetOutputEdgesCount() > node->OutputDefs().size()) { - for (auto it = node->OutputEdgesBegin(), end = node->OutputEdgesEnd(); it != end; ++it) { - const auto& node_idx = it->GetNode().Index(); - const auto& output = (it->GetNode()).InputDefs()[it->GetDstArgIndex()]; - - if (node_set.find(node_idx) != node_set.end()) { - const auto& iter = fused_inputs.find(output); - - if (iter != fused_inputs.end()) { - fused_inputs.erase(iter); - erased.insert(output); - } else if (erased.find(output) == erased.end()) { - fused_outputs[output] = output_order++; - } - } else { - fused_outputs_to_add[output] = output_order++; - } - } - } else { - for (const auto& output : node->OutputDefs()) { - const auto& it = fused_inputs.find(output); - - if (it != fused_inputs.end()) { - fused_inputs.erase(it); - erased.insert(output); - } - // only when output is neither in input list nor erased list, add the output to output list - else if (erased.find(output) == erased.end()) { - fused_outputs[output] = output_order++; - } - } - } - } - - fused_outputs.insert(fused_outputs_to_add.begin(), fused_outputs_to_add.end()); - - // Sort inputs and outputs by the order they were added - std::multimap inputs, outputs; - - for (auto it = fused_inputs.begin(), end = fused_inputs.end(); it != end; ++it) { - inputs.insert(std::pair(it->second, it->first)); - } - - for (auto it = fused_outputs.begin(), end = fused_outputs.end(); it != end; ++it) { - for (const auto& x : all_node_inputs) { - if (x->Name() == it->first->Name()) { - outputs.insert(std::pair(it->second, it->first)); - break; - } - } - if (std::find(graph_outputs.begin(), graph_outputs.end(), it->first) != graph_outputs.end()) { - outputs.insert(std::pair(it->second, it->first)); - } - } - - // Assign inputs and outputs to subgraph's meta_def - auto meta_def = onnxruntime::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>(); - meta_def->name = "NNAPI_" + std::to_string(counter++); - meta_def->domain = kMSDomain; - - for (const auto& input : inputs) { - meta_def->inputs.push_back(input.second->Name()); - } - - for (const auto& output : outputs) { - meta_def->outputs.push_back(output.second->Name()); - } - - meta_def->since_version = 1; - sub_graph->SetMetaDef(meta_def); - - result.push_back(onnxruntime::make_unique(std::move(sub_graph))); - } - } - - return result; -} - -common::Status NnapiExecutionProvider::Compile(const std::vector& fused_nodes, - std::vector& node_compute_funcs) { - for (const auto* fused_node : fused_nodes) { - // Reconstruct graph proto from fused node's function body - const auto* func_body = fused_node->GetFunctionBody(); - if (!func_body) { - return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Function body is empty"); - } - const Graph& graph_body = func_body->Body(); - onnxruntime::Model model(graph_body.Name(), true, ModelMetaData(), PathString(), - IOnnxRuntimeOpSchemaRegistryList(), graph_body.DomainToVersionMap(), - std::vector(), *GetLogger()); - ONNX_NAMESPACE::ModelProto model_proto = model.ToProto(); - *(model_proto.mutable_graph()) = graph_body.ToGraphProto(); - model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - - dnn::OnnxReader onnx_reader; - dnn::ModelBuilder model_builder; - onnx_reader.ReadOnnx(model_proto, model_builder); - model_builder.AllowFp16(true); - auto dnn_model = model_builder.Compile(model_builder.PREFERENCE_SUSTAINED_SPEED); - dnn_models_.emplace(fused_node->Name(), std::move(dnn_model)); - - NodeComputeInfo compute_info; - compute_info.create_state_func = [&](ComputeContext* context, FunctionState* state) { - *state = dnn_models_[context->node_name].get(); - return 0; - }; - - compute_info.release_state_func = [](FunctionState state) { - // the `state` is a dnn::model managed by unique_ptr - ORT_UNUSED_PARAMETER(state); - }; - - compute_info.compute_func = [](FunctionState state, const OrtCustomOpApi* api, OrtKernelContext* context) { - Ort::CustomOpApi ort{*api}; - dnn::Model* model = reinterpret_cast(state); - const size_t num_inputs = ort.KernelContext_GetInputCount(context); - const size_t num_outputs = ort.KernelContext_GetOutputCount(context); - ORT_ENFORCE(model->GetInputs().size() <= num_inputs, "Inconsistent input sizes"); - ORT_ENFORCE(model->GetOutputs().size() == num_outputs, "Inconsistent output sizes"); - // Maintain the created nhwc buffers so that they can be deleted after inferencing - std::vector nhwc_inputs; - std::vector>> nhwc_outputs; - for (size_t i = 0; i < num_outputs; i++) { - const auto output_name = model->GetOutputs()[i]; - const auto output_shape = model->GetShape(output_name); - std::vector int64_output_shape(output_shape.begin(), output_shape.end()); - if (int64_output_shape.size() == 4) { - // NHWC to NCHW - std::swap(int64_output_shape[1], int64_output_shape[3]); - std::swap(int64_output_shape[2], int64_output_shape[3]); - float* nhwc_output = new float[model->GetSize(output_name)]; - model->SetOutputBuffer(i, nhwc_output); - nhwc_outputs.push_back(std::make_tuple(i, nhwc_output, int64_output_shape)); - } else { - auto* output_tensor = ort.KernelContext_GetOutput(context, i, int64_output_shape.data(), int64_output_shape.size()); - model->SetOutputBuffer(i, ort.GetTensorMutableData(output_tensor)); - } - } - std::vector inputs; - for (size_t i = 0; i < model->GetInputs().size(); i++) { - const OrtValue* input_tensor = ort.KernelContext_GetInput(context, i); - float* input = const_cast(ort.GetTensorData(input_tensor)); - - const auto tensor_info = ort.GetTensorTypeAndShape(input_tensor); - const auto& tensor_shape = ort.GetTensorShape(tensor_info); - - if (tensor_shape.size() == 4) { - // Transpose nchw -> nhwc manually - const int N = tensor_shape[0], C = tensor_shape[1], H = tensor_shape[2], W = tensor_shape[3]; - float* nhwc_input = new float[N * C * H * W]; - for (int n = 0; n < N; n++) { - for (int c = 0; c < C; c++) { - for (int h = 0; h < H; h++) { - for (int w = 0; w < W; w++) { - nhwc_input[n * H * W * C + h * W * C + w * C + c] = input[n * C * H * W + c * H * W + h * W + w]; - } - } - } - } - inputs.push_back(nhwc_input); - nhwc_inputs.push_back(nhwc_input); - } else { - inputs.push_back(input); - } - ort.ReleaseTensorTypeAndShapeInfo(tensor_info); - } - model->Predict(inputs); - // Transpose nhwc -> nchw manually - for (size_t i = 0; i < nhwc_outputs.size(); i++) { - const auto output = nhwc_outputs[i]; - size_t index; - float* nhwc_data; - std::vector nchw_shape; - std::tie(index, nhwc_data, nchw_shape) = output; - auto* output_tensor = ort.KernelContext_GetOutput(context, index, nchw_shape.data(), nchw_shape.size()); - const int N = nchw_shape[0], C = nchw_shape[1], H = nchw_shape[2], W = nchw_shape[3]; - float* nchw_output = ort.GetTensorMutableData(output_tensor); - for (int n = 0; n < N; n++) { - for (int c = 0; c < C; c++) { - for (int h = 0; h < H; h++) { - for (int w = 0; w < W; w++) { - nchw_output[n * C * H * W + c * H * W + h * W + w] = nhwc_data[n * H * W * C + h * W * C + w * C + c]; - } - } - } - } - } - for (auto nhwc_input : nhwc_inputs) { - delete[] nhwc_input; - } - for (auto nhwc_output : nhwc_outputs) { - delete[] std::get<1>(nhwc_output); - } - return Status::OK(); - }; - - node_compute_funcs.push_back(compute_info); - } - return Status::OK(); -} -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.h b/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.h deleted file mode 100644 index a03eb5c7f4..0000000000 --- a/onnxruntime/core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 JD.com Inc. JD AI - -#pragma once - -#include "core/framework/execution_provider.h" -#include "core/graph/onnx_protobuf.h" -#include "dnnlibrary/Model.h" - -namespace onnxruntime { -class NnapiExecutionProvider : public IExecutionProvider { - public: - NnapiExecutionProvider(); - virtual ~NnapiExecutionProvider(); - - std::vector> - GetCapability(const onnxruntime::GraphViewer& graph, - const std::vector& /*kernel_registries*/) const override; - common::Status Compile(const std::vector& fused_nodes, - std::vector& node_compute_funcs) override; - - private: - std::unordered_map> dnn_models_; - std::vector> GetSupportedNodes(const ONNX_NAMESPACE::ModelProto& model_proto) const; -}; -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_provider_factory.cc b/onnxruntime/core/providers/nnapi/nnapi_provider_factory.cc index 1b50a5bdb4..11f3bf67d8 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_provider_factory.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_provider_factory.cc @@ -2,12 +2,7 @@ #include "core/providers/nnapi/nnapi_provider_factory.h" #include "core/session/abi_session_options_impl.h" - -#ifdef USE_NNAPI_DNNLIBRARY -#include "nnapi_dnnlibrary/nnapi_execution_provider.h" -#elif USE_NNAPI_BUILTIN #include "nnapi_builtin/nnapi_execution_provider.h" -#endif using namespace onnxruntime; @@ -33,5 +28,3 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Nnapi, _In_ OrtSess options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_Nnapi()); return nullptr; } - - diff --git a/onnxruntime/test/framework/test_utils.h b/onnxruntime/test/framework/test_utils.h index 431ea52ef8..56184b2262 100644 --- a/onnxruntime/test/framework/test_utils.h +++ b/onnxruntime/test/framework/test_utils.h @@ -22,11 +22,7 @@ #include "core/providers/openvino/openvino_execution_provider.h" #endif #ifdef USE_NNAPI -# ifdef USE_NNAPI_DNNLIBRARY -# include "core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.h" -# elif USE_NNAPI_BUILTIN -# include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" -# endif +#include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" #endif #ifdef USE_RKNPU #include "core/providers/rknpu/rknpu_execution_provider.h" diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index db82906abe..3864461149 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -344,7 +344,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { #ifdef USE_NNAPI Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nnapi(sf)); #else - fprintf(stderr, "DNNLibrary/NNAPI is not supported in this build"); + fprintf(stderr, "NNAPI is not supported in this build"); return -1; #endif } diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc index 1354a81cdc..877303f921 100644 --- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc +++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc @@ -1,14 +1,9 @@ +#include "core/common/logging/logging.h" +#include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" #include "core/session/inference_session.h" +#include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" #include "test/framework/test_utils.h" -#include "gtest/gtest.h" -#include "core/common/logging/logging.h" - -#ifdef USE_NNAPI_DNNLIBRARY -#include "core/providers/nnapi/nnapi_dnnlibrary/nnapi_execution_provider.h" -#elif USE_NNAPI_BUILTIN -#include "core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.h" -#endif using namespace std; using namespace ONNX_NAMESPACE; diff --git a/samples/c_cxx/CMakeLists.txt b/samples/c_cxx/CMakeLists.txt index d4b37dcba6..078c53c0f9 100644 --- a/samples/c_cxx/CMakeLists.txt +++ b/samples/c_cxx/CMakeLists.txt @@ -15,7 +15,6 @@ endif() #onnxruntime providers option(onnxruntime_USE_CUDA "Build with CUDA support" OFF) option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF) -option(onnxruntime_USE_NNAPI_DNNLIBRARY "Build with DNNLibrary for Android NNAPI support" OFF) option(onnxruntime_USE_NNAPI_BUILTIN "Build with builtin NNAPI lib for Android NNAPI support" OFF) option(onnxruntime_USE_DNNL "Build with DNNL support" OFF) option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF) @@ -56,7 +55,7 @@ endif() if(onnxruntime_USE_OPENVINO) add_definitions(-DUSE_OPENVINO) endif() -if(onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) add_definitions(-DUSE_NNAPI) endif() if(onnxruntime_USE_DNNL) diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt index 1712fdb1ef..cd2ba56871 100755 --- a/server/CMakeLists.txt +++ b/server/CMakeLists.txt @@ -8,7 +8,6 @@ project(onnxruntime C CXX) option(onnxruntime_USE_CUDA "Build with CUDA support" OFF) option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF) -option(onnxruntime_USE_NNAPI_DNNLIBRARY "Build with DNNLibrary for Android NNAPI support" OFF) option(onnxruntime_USE_NNAPI_BUILTIN "Build with builtin NNAPI lib for Android NNAPI support" OFF) option(onnxruntime_USE_DNNL "Build with DNNL support" OFF) option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF) @@ -24,7 +23,7 @@ endif() if(onnxruntime_USE_OPENVINO) add_definitions(-DUSE_OPENVINO=1) endif() -if(onnxruntime_USE_NNAPI_DNNLIBRARY OR onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) add_definitions(-DUSE_NNAPI=1) endif() if(onnxruntime_USE_DNNL) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index e5ca353a38..f5ab736535 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -257,8 +257,6 @@ def parse_arguments(): choices=["CPU_FP32", "GPU_FP32", "GPU_FP16", "VAD-M_FP16", "MYRIAD_FP16", "VAD-F_FP32"], help="Build with OpenVINO for specific hardware.") - parser.add_argument( - "--use_dnnlibrary", action='store_true', help="Build with DNNLibrary.") parser.add_argument( "--use_nnapi", action='store_true', help="Build with NNAPI support.") parser.add_argument( @@ -601,14 +599,13 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "ON" if args.use_openvino == "VAD-F_FP32" else "OFF"), "-Donnxruntime_USE_OPENVINO_BINARY=" + ( "ON" if args.use_openvino else "OFF"), - "-Donnxruntime_USE_NNAPI_DNNLIBRARY=" + ("ON" if args.use_dnnlibrary else "OFF"), "-Donnxruntime_USE_NNAPI_BUILTIN=" + ("ON" if args.use_nnapi else "OFF"), "-Donnxruntime_USE_RKNPU=" + ("ON" if args.use_rknpu else "OFF"), "-Donnxruntime_USE_OPENMP=" + ( "ON" if args.use_openmp and not ( - args.use_dnnlibrary or args.use_mklml or args.use_ngraph or + args.use_nnapi or args.use_mklml or args.use_ngraph or args.android or (args.ios and is_macOS()) - or args.use_rknpu or args.use_nnapi) + or args.use_rknpu) else "OFF"), "-Donnxruntime_USE_TVM=" + ("ON" if args.use_tvm else "OFF"), "-Donnxruntime_USE_LLVM=" + ("ON" if args.use_llvm else "OFF"), @@ -707,12 +704,6 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "-Deigen_SOURCE_PATH=" + args.eigen_path] if args.android: - if args.use_dnnlibrary and args.use_nnapi: - raise BuildError( - "Only one of --use_dnnlibrary and --use_nnapi " + - "can be enabled" - ) - cmake_args += [ "-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake", @@ -1176,7 +1167,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): adb_push('onnx_test_runner', '/data/local/tmp/', cwd=cwd) adb_shell( 'cd /data/local/tmp && /data/local/tmp/onnxruntime_test_all') - if args.use_dnnlibrary or args.use_nnapi: + if args.use_nnapi: adb_shell( 'cd /data/local/tmp && /data/local/tmp/onnx_test_runner -e nnapi /data/local/tmp/test') # noqa else: diff --git a/tools/ci_build/github/linux/run_build.sh b/tools/ci_build/github/linux/run_build.sh index a8800dba2c..2c0ba57c10 100755 --- a/tools/ci_build/github/linux/run_build.sh +++ b/tools/ci_build/github/linux/run_build.sh @@ -23,7 +23,7 @@ if [ $BUILD_OS = "android" ]; then pushd /onnxruntime_src mkdir build-android && cd build-android if [ $BUILD_DEVICE = "nnapi" ]; then - cmake -DCMAKE_TOOLCHAIN_FILE=/android-ndk/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_USE_NNAPI=ON ../cmake + cmake -DCMAKE_TOOLCHAIN_FILE=/android-ndk/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_USE_NNAPI_BUILTIN=ON ../cmake else cmake -DCMAKE_TOOLCHAIN_FILE=/android-ndk/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc ../cmake fi