From 381fee47ab7c6ef04ec2d152e4cd2650497b24a6 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Mon, 20 Apr 2020 13:35:28 +0530 Subject: [PATCH] Added support to build onnxruntime with ACL (#3586) * Added support to build onnxruntime with ACL * Added ACL build instructions --- BUILD.md | 21 +++++++++++++++++ cmake/CMakeLists.txt | 2 +- cmake/onnxruntime_providers.cmake | 1 + onnxruntime/core/providers/acl/nn/conv.cc | 2 +- tools/ci_build/build.py | 28 +++++++++++++++++++---- 5 files changed, 48 insertions(+), 6 deletions(-) diff --git a/BUILD.md b/BUILD.md index 5cf420c44c..172e0fd3c6 100644 --- a/BUILD.md +++ b/BUILD.md @@ -418,6 +418,27 @@ onnxruntime_perf_test onnxruntime_test_all ``` +#### Build Instructions(Jetson Nano) + +1. Build ACL Library (skip if already built) +``` +cd ~ +git clone https://github.com/Arm-software/ComputeLibrary.git +cd ComputeLibrary +sudo apt install scons +sudo apt install g++-arm-linux-gnueabihf +scons -j8 arch=arm64-v8a Werror=1 debug=0 asserts=0 neon=1 opencl=1 examples=1 build=native +``` +2. Set environment variables to set include directory and shared object library path. +``` +export CPATH=~/ComputeLibrary/include/:~/ComputeLibrary/ +export LD_LIBRARY_PATH=~/ComputeLibrary/build/ +``` +3. Build onnxruntime with --use_acl flag +``` +./build.sh --use_acl +``` + --- ## Options diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index f738038bb9..55519e5a2a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -487,7 +487,7 @@ if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 endif() endif() - list(APPEND onnxruntime_EXTERNAL_LIBRARIES arm_compute acl arm_compute_graph arm_compute_core) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES arm_compute arm_compute_graph arm_compute_core) endif() # MKLML diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 880d8b4958..06a738625a 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -482,6 +482,7 @@ if (onnxruntime_USE_ACL) source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_acl_cc_srcs}) add_library(onnxruntime_providers_acl ${onnxruntime_providers_acl_cc_srcs}) onnxruntime_add_include_to_target(onnxruntime_providers_acl onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf) + target_link_libraries(onnxruntime_providers_acl -L$ENV{LD_LIBRARY_PATH}) add_dependencies(onnxruntime_providers_acl ${onnxruntime_EXTERNAL_DEPENDENCIES}) set_target_properties(onnxruntime_providers_acl PROPERTIES FOLDER "ONNXRuntime") target_include_directories(onnxruntime_providers_acl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${ACL_INCLUDE_DIR}) diff --git a/onnxruntime/core/providers/acl/nn/conv.cc b/onnxruntime/core/providers/acl/nn/conv.cc index c7ccaf5f04..f95d6cd6ef 100644 --- a/onnxruntime/core/providers/acl/nn/conv.cc +++ b/onnxruntime/core/providers/acl/nn/conv.cc @@ -208,7 +208,7 @@ Status Conv::Compute(OpKernelContext* context) const { if(optimizable) { //optimized depthwise convolution #if defined(ACL_1902) || defined(ACL_1905) - auto layer = std::make_shared(); + auto layer = std::make_shared(); #endif #ifdef ACL_1908 auto layer = std::make_shared(); diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 2b2754419b..8e8188708d 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -293,6 +293,10 @@ def parse_arguments(): parser.add_argument( "--enable_lto", action='store_true', help="Enable Link Time Optimization") + parser.add_argument( + "--use_acl", nargs="?", const="ACL_1905", + choices=["ACL_1902", "ACL_1905", "ACL_1908"], + help="Build with ACL for ARM architectures.") return parser.parse_args() @@ -479,7 +483,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, "OFF" if args.skip_winml_tests else "ON"), "-Donnxruntime_GENERATE_TEST_REPORTS=ON", "-Donnxruntime_DEV_MODE=" + ( - "OFF" if args.android else "ON"), + "OFF" if args.android or args.use_acl else "ON"), "-DPYTHON_EXECUTABLE=" + sys.executable, "-Donnxruntime_USE_CUDA=" + ("ON" if args.use_cuda else "OFF"), "-Donnxruntime_CUDNN_HOME=" + (cudnn_home if args.use_cuda else ""), @@ -551,6 +555,13 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, "-Donnxruntime_USE_TELEMETRY=" + ( "ON" if args.use_telemetry else "OFF"), "-Donnxruntime_ENABLE_LTO=" + ("ON" if args.enable_lto else "OFF"), + "-Donnxruntime_USE_ACL=" + ("ON" if args.use_acl else "OFF"), + "-Donnxruntime_USE_ACL_1902=" + ( + "ON" if args.use_acl == "ACL_1902" else "OFF"), + "-Donnxruntime_USE_ACL_1905=" + ( + "ON" if args.use_acl == "ACL_1905" else "OFF"), + "-Donnxruntime_USE_ACL_1908=" + ( + "ON" if args.use_acl == "ACL_1908" else "OFF"), ] if args.winml_root_namespace_override: @@ -1165,9 +1176,10 @@ def nuphar_run_python_tests(build_dir, configs): cwd=cwd, dll_path=dll_path) -def build_python_wheel(source_dir, build_dir, configs, use_cuda, use_ngraph, - use_dnnl, use_tensorrt, use_openvino, use_nuphar, - wheel_name_suffix, nightly_build=False): +def build_python_wheel( + source_dir, build_dir, configs, use_cuda, use_ngraph, use_dnnl, + use_tensorrt, use_openvino, use_nuphar, wheel_name_suffix, use_acl, + nightly_build=False): for config in configs: cwd = get_config_build_dir(build_dir, config) if is_windows(): @@ -1190,6 +1202,8 @@ def build_python_wheel(source_dir, build_dir, configs, use_cuda, use_ngraph, args.append('--use_nuphar') if wheel_name_suffix: args.append('--wheel_name_suffix={}'.format(wheel_name_suffix)) + elif use_acl: + args.append('--use_acl') run_subprocess(args, cwd=cwd) @@ -1483,6 +1497,11 @@ def main(): build_dir, configs, onnx_test_data_dir, 'dml', args.enable_multi_device_test, False, 1) + if args.use_acl: + run_onnx_tests( + build_dir, configs, onnx_test_data_dir, 'acl', + args.enable_multi_device_test, False, 1, 1) + # Run some models are disabled to keep memory utilization # under control. if args.use_dnnl: @@ -1516,6 +1535,7 @@ def main(): args.use_openvino, args.use_nuphar, args.wheel_name_suffix, + args.use_acl, nightly_build=nightly_build, )