From bd215b79a2aeeb5e0b77feaaed67cc7e3ac98ead Mon Sep 17 00:00:00 2001 From: Andrews548 <32704142+Andrews548@users.noreply.github.com> Date: Fri, 4 Sep 2020 06:44:27 +0300 Subject: [PATCH] ACL v20.02 (#4981) * Add ACL version 20.02 * fix loging typo * check depthwise operation based on group param * Generate ArmNN runtime inside class constructor * Update to the latest ONNX operation set * Update BUILD.md Co-authored-by: Andrei-Alexandru --- BUILD.md | 2 +- cmake/CMakeLists.txt | 9 +++++-- .../providers/acl/acl_execution_provider.cc | 15 ++++++++--- onnxruntime/core/providers/acl/math/gemm.cc | 17 +++++++++++++ onnxruntime/core/providers/acl/nn/conv.cc | 17 +++++++------ onnxruntime/core/providers/acl/nn/pool.cc | 25 ++++++++++++++++--- .../core/providers/acl/tensor/concat.cc | 8 ++++++ .../providers/armnn/activation/activations.cc | 2 +- .../providers/armnn/activation/activations.h | 9 ++++--- .../armnn/armnn_execution_provider.cc | 6 +++++ onnxruntime/core/providers/armnn/math/gemm.h | 5 ++-- .../core/providers/armnn/nn/batch_norm.cc | 2 +- .../core/providers/armnn/nn/batch_norm.h | 1 + onnxruntime/core/providers/armnn/nn/conv.cc | 2 +- onnxruntime/core/providers/armnn/nn/conv.h | 9 ++++--- onnxruntime/core/providers/armnn/nn/pool.cc | 20 +++++++++++++-- onnxruntime/core/providers/armnn/nn/pool.h | 6 +++-- .../core/providers/armnn/tensor/concat.cc | 8 ++++++ tools/ci_build/build.py | 4 ++- 19 files changed, 132 insertions(+), 35 deletions(-) diff --git a/BUILD.md b/BUILD.md index 3b807f9572..59370834e9 100644 --- a/BUILD.md +++ b/BUILD.md @@ -468,7 +468,7 @@ alias cmake="/usr/bin/cmake -DCMAKE_TOOLCHAIN_FILE=$OECORE_NATIVE_SYSROOT/usr/sh cmake ../onnxruntime-arm-upstream/cmake -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_RUN_ONNX_TESTS=OFF -Donnxruntime_GENERATE_TEST_REPORTS=ON -Donnxruntime_DEV_MODE=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -Donnxruntime_USE_CUDA=OFF -Donnxruntime_USE_NSYNC=OFF -Donnxruntime_CUDNN_HOME= -Donnxruntime_USE_JEMALLOC=OFF -Donnxruntime_ENABLE_PYTHON=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_USE_EIGEN_FOR_BLAS=ON -Donnxruntime_USE_OPENBLAS=OFF -Donnxruntime_USE_ACL=ON -Donnxruntime_USE_DNNL=OFF -Donnxruntime_USE_MKLML=OFF -Donnxruntime_USE_OPENMP=ON -Donnxruntime_USE_TVM=OFF -Donnxruntime_USE_LLVM=OFF -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_BRAINSLICE=OFF -Donnxruntime_USE_NUPHAR=OFF -Donnxruntime_USE_EIGEN_THREADPOOL=OFF -Donnxruntime_BUILD_UNIT_TESTS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ``` The ```-Donnxruntime_USE_ACL=ON``` option will use, by default, the 19.05 version of the Arm Compute Library. To set the right version you can use: -```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON``` or ```-Donnxruntime_USE_ACL_1908=ON```; +```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON```, ```-Donnxruntime_USE_ACL_1908=ON``` or ```-Donnxruntime_USE_ACL_2002=ON```; 2. Build ONNX Runtime library, test and performance application: ``` diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7c85de67e3..c93b3c4645 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -97,6 +97,7 @@ option(onnxruntime_USE_ACL "Build with ACL support" OFF) option(onnxruntime_USE_ACL_1902 "Build with ACL version 1902 support" OFF) option(onnxruntime_USE_ACL_1905 "Build with ACL version 1905 support" OFF) option(onnxruntime_USE_ACL_1908 "Build with ACL version 1908 support" OFF) +option(onnxruntime_USE_ACL_2002 "Build with ACL version 2002 support" OFF) option(onnxruntime_USE_ARMNN "Build with ArmNN support" OFF) option(onnxruntime_ARMNN_RELU_USE_CPU "Use the CPU implementation for the Relu operator for the ArmNN EP" ON) option(onnxruntime_ARMNN_BN_USE_CPU "Use the CPU implementation for the Batch Normalization operator for the ArmNN EP" ON) @@ -597,7 +598,7 @@ endfunction() set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto) # ACL -if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908) +if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908 OR onnxruntime_USE_ACL_2002) set(onnxruntime_USE_ACL ON) if(onnxruntime_USE_ACL_1902) add_definitions(-DACL_1902=1) @@ -605,7 +606,11 @@ if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 if(onnxruntime_USE_ACL_1908) add_definitions(-DACL_1908=1) else() - add_definitions(-DACL_1905=1) + if(onnxruntime_USE_ACL_2002) + add_definitions(-DACL_2002=1) + else() + add_definitions(-DACL_1905=1) + endif() endif() endif() diff --git a/onnxruntime/core/providers/acl/acl_execution_provider.cc b/onnxruntime/core/providers/acl/acl_execution_provider.cc index 474773da6b..af726ee3c9 100644 --- a/onnxruntime/core/providers/acl/acl_execution_provider.cc +++ b/onnxruntime/core/providers/acl/acl_execution_provider.cc @@ -19,41 +19,50 @@ namespace acl { // Forward declarations of op kernels class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 6, Relu); -class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, Gemm); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 8, Gemm); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 9, 10, Gemm); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Gemm); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, Conv); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 12, MaxPool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalAveragePool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalMaxPool); -// Opset 10 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, AveragePool); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 4, 10, Concat); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Concat); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kMSDomain, 1, float, FusedConv); static void RegisterACLKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); - kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); // Opset 10 kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); } diff --git a/onnxruntime/core/providers/acl/math/gemm.cc b/onnxruntime/core/providers/acl/math/gemm.cc index 1b064c502b..795cf4d308 100755 --- a/onnxruntime/core/providers/acl/math/gemm.cc +++ b/onnxruntime/core/providers/acl/math/gemm.cc @@ -13,7 +13,24 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( Gemm, kOnnxDomain, 7, + 8, + kAclExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Gemm); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Gemm, + kOnnxDomain, 9, + 10, + kAclExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Gemm); + +ONNX_OPERATOR_KERNEL_EX( + Gemm, + kOnnxDomain, + 11, kAclExecutionProvider, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Gemm); diff --git a/onnxruntime/core/providers/acl/nn/conv.cc b/onnxruntime/core/providers/acl/nn/conv.cc index 59c5ae1190..664c248008 100644 --- a/onnxruntime/core/providers/acl/nn/conv.cc +++ b/onnxruntime/core/providers/acl/nn/conv.cc @@ -154,7 +154,7 @@ Status Conv::Compute(OpKernelContext* context) const { const arm_compute::DataLayout data_layout = tconv.in->info()->data_layout(); const int idx_channel = arm_compute::get_data_layout_dimension_index(data_layout, arm_compute::DataLayoutDimension::CHANNEL); - bool isDepthwise = (1 == tconv.k->info()->tensor_shape()[idx_channel]); + bool isDepthwise = (conv_attrs_.group > 1 && conv_attrs_.group == tconv.in->info()->tensor_shape()[idx_channel]); tconv.isDepthwiseCPU = isDepthwise; std::vector aclStrides(2); @@ -208,29 +208,32 @@ Status Conv::Compute(OpKernelContext* context) const { tconv.in->info()->data_type(), 1 /* depth multiplier */, tconv.in->info()->data_layout()); -#endif -#if defined(ACL_1905) || defined(ACL_1908) +#elif defined(ACL_1905) || defined(ACL_1908) bool optimizable = arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(), tconv.k->info(), aclPadStride, 1 /* depth multiplier */, arm_compute::Size2D(aclDilation0, dilations[0])); +#elif defined(ACL_2002) + bool optimizable = false; #endif + if (optimizable) { LOGS_DEFAULT(VERBOSE) << "ACL optimized depthwise convolution"; #if defined(ACL_1902) || defined(ACL_1905) auto layer = std::make_shared(); -#endif -#ifdef ACL_1908 +#elif defined(ACL_1908) auto layer = std::make_shared(); +#elif defined(ACL_2002) + auto layer = std::make_shared(); #endif + #ifdef ACL_1902 layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(), aclPadStride, 1 /* depth multiplier */, acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo()); -#endif -#if defined(ACL_1905) || defined(ACL_1908) +#elif defined(ACL_1905) || defined(ACL_1908) || defined(ACL_2002) layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(), aclPadStride, 1 /* depth multiplier */, acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo(), diff --git a/onnxruntime/core/providers/acl/nn/pool.cc b/onnxruntime/core/providers/acl/nn/pool.cc index 73b7c081ce..69d272c72d 100644 --- a/onnxruntime/core/providers/acl/nn/pool.cc +++ b/onnxruntime/core/providers/acl/nn/pool.cc @@ -163,13 +163,13 @@ Status Pool::Compute(OpKernelContext* context) const { } arm_compute::PoolingType pool_type; - if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool") + if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool") { pool_type = arm_compute::PoolingType::AVG; LOGS_DEFAULT(VERBOSE) << "AveragePool"; - else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool") + } else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool") { pool_type = arm_compute::PoolingType::MAX; LOGS_DEFAULT(VERBOSE) << "MaxPool"; - else { + } else { LOGS_DEFAULT(WARNING) << "Pooling operation not supported in ArmNN; defaulting to cpu implementation"; return onnxruntime::Pool::Compute(context); } @@ -250,5 +250,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ MaxPoolV8); +ONNX_OPERATOR_KERNEL_EX( + MaxPool, + kOnnxDomain, + 12, + kAclExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + MaxPoolV8); + +ONNX_OPERATOR_KERNEL_EX( + AveragePool, + kOnnxDomain, + 11, + kAclExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Pool); + + } // namespace acl -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/acl/tensor/concat.cc b/onnxruntime/core/providers/acl/tensor/concat.cc index 246a341e40..9aad25f0b0 100644 --- a/onnxruntime/core/providers/acl/tensor/concat.cc +++ b/onnxruntime/core/providers/acl/tensor/concat.cc @@ -131,5 +131,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Concat); +ONNX_OPERATOR_KERNEL_EX( + Concat, + kOnnxDomain, + 11, + kAclExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Concat); + } // namespace acl } // namespace onnxruntime diff --git a/onnxruntime/core/providers/armnn/activation/activations.cc b/onnxruntime/core/providers/armnn/activation/activations.cc index b76a2bf3b4..499e3d44aa 100644 --- a/onnxruntime/core/providers/armnn/activation/activations.cc +++ b/onnxruntime/core/providers/armnn/activation/activations.cc @@ -19,7 +19,7 @@ template thread_local std::map Relu::reluLayers; template -armnn::IRuntimePtr Relu::run = Relu::initRuntime(); +armnn::IRuntimePtr Relu::run = armnn::IRuntimePtr(nullptr, nullptr); template Status Relu::Compute(OpKernelContext* context) const { diff --git a/onnxruntime/core/providers/armnn/activation/activations.h b/onnxruntime/core/providers/armnn/activation/activations.h index 7d0f0dc617..e8585cc2ac 100644 --- a/onnxruntime/core/providers/armnn/activation/activations.h +++ b/onnxruntime/core/providers/armnn/activation/activations.h @@ -23,8 +23,9 @@ template class Relu : public OpKernel { public: explicit Relu(const OpKernelInfo& info) : OpKernel(info) { - provider_ = (const_cast( + provider_ = (const_cast( static_cast(info.GetExecutionProvider()))); + run = Relu::initRuntime(); } ~Relu() { @@ -34,11 +35,11 @@ class Relu : public OpKernel { Status Compute(OpKernelContext* context) const override; static armnn::IRuntimePtr initRuntime(){ - if (Relu::run) - return std::move(Relu::run); + if(Relu::run) + return std::move(Relu::run); armnn::IRuntime::CreationOptions options; return std::move(armnn::IRuntime::Create(options)); - } + } private: static thread_local std::map reluLayers; diff --git a/onnxruntime/core/providers/armnn/armnn_execution_provider.cc b/onnxruntime/core/providers/armnn/armnn_execution_provider.cc index f43e8a905f..67218da12c 100644 --- a/onnxruntime/core/providers/armnn/armnn_execution_provider.cc +++ b/onnxruntime/core/providers/armnn/armnn_execution_provider.cc @@ -31,8 +31,10 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, float, AveragePool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, AveragePool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 12, MaxPool); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool); @@ -41,6 +43,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization); #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 4, 10, Concat); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, Concat); static void RegisterArmNNKernels(KernelRegistry& kernel_registry) { #ifdef RELU_ARMNN @@ -56,8 +59,10 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); @@ -66,6 +71,7 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); #endif kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); } std::shared_ptr GetArmNNKernelRegistry() { diff --git a/onnxruntime/core/providers/armnn/math/gemm.h b/onnxruntime/core/providers/armnn/math/gemm.h index 9c4ad47b9f..e491dc86bb 100644 --- a/onnxruntime/core/providers/armnn/math/gemm.h +++ b/onnxruntime/core/providers/armnn/math/gemm.h @@ -28,6 +28,7 @@ class Gemm : public onnxruntime::Gemm { ORT_ENFORCE(info.GetAttr("alpha", &alpha_).IsOK()); ORT_ENFORCE(info.GetAttr("beta", &beta_).IsOK()); + run = Gemm::initRuntime(); } Status Compute(OpKernelContext* context) const override { @@ -165,7 +166,7 @@ class Gemm : public onnxruntime::Gemm { } static armnn::IRuntimePtr initRuntime(){ - if (Gemm::run) + if(Gemm::run) return std::move(Gemm::run); armnn::IRuntime::CreationOptions options; return std::move(armnn::IRuntime::Create(options)); @@ -186,7 +187,7 @@ template thread_local std::map onnxruntime::armnn_ep::Gemm::gemmLayers; template -armnn::IRuntimePtr Gemm::run = Gemm::initRuntime(); +armnn::IRuntimePtr Gemm::run = armnn::IRuntimePtr(nullptr, nullptr); } // namespace armnn_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/armnn/nn/batch_norm.cc b/onnxruntime/core/providers/armnn/nn/batch_norm.cc index 0204edd0c7..26215c3ebd 100755 --- a/onnxruntime/core/providers/armnn/nn/batch_norm.cc +++ b/onnxruntime/core/providers/armnn/nn/batch_norm.cc @@ -24,7 +24,7 @@ template thread_local std::map BatchNorm::batchNormLayers; template -armnn::IRuntimePtr BatchNorm::run = BatchNorm::initRuntime(); +armnn::IRuntimePtr BatchNorm::run = armnn::IRuntimePtr(nullptr, nullptr); template Status BatchNorm::Compute(OpKernelContext* context) const { diff --git a/onnxruntime/core/providers/armnn/nn/batch_norm.h b/onnxruntime/core/providers/armnn/nn/batch_norm.h index 8b613b91a5..7681a371c0 100755 --- a/onnxruntime/core/providers/armnn/nn/batch_norm.h +++ b/onnxruntime/core/providers/armnn/nn/batch_norm.h @@ -28,6 +28,7 @@ class BatchNorm final : public OpKernel { provider_ = (const_cast( dynamic_cast(info.GetExecutionProvider()))); + run = BatchNorm::initRuntime(); } ~BatchNorm() { diff --git a/onnxruntime/core/providers/armnn/nn/conv.cc b/onnxruntime/core/providers/armnn/nn/conv.cc index 4fe0168763..56f487ed8c 100644 --- a/onnxruntime/core/providers/armnn/nn/conv.cc +++ b/onnxruntime/core/providers/armnn/nn/conv.cc @@ -24,7 +24,7 @@ template thread_local std::map Conv::convLayers; template -armnn::IRuntimePtr Conv::run = Conv::initRuntime(); +armnn::IRuntimePtr Conv::run = armnn::IRuntimePtr(nullptr, nullptr); armnn::Convolution2dDescriptor createConvDescriptor(std::vector pads, std::vector dilations, std::vector strides, bool biasEnabled) { std::vector armnnStrides(2); diff --git a/onnxruntime/core/providers/armnn/nn/conv.h b/onnxruntime/core/providers/armnn/nn/conv.h index 3db77c9c25..72035b7fa2 100644 --- a/onnxruntime/core/providers/armnn/nn/conv.h +++ b/onnxruntime/core/providers/armnn/nn/conv.h @@ -23,6 +23,7 @@ class Conv : public onnxruntime::Conv { explicit Conv(const OpKernelInfo& info) : onnxruntime::Conv(info), conv_attrs_(info) { provider_ = (const_cast( static_cast(info.GetExecutionProvider()))); + run = Conv::initRuntime(); } ~Conv() { @@ -32,10 +33,10 @@ class Conv : public onnxruntime::Conv { Status Compute(OpKernelContext* context) const override; static armnn::IRuntimePtr initRuntime(){ - if (Conv::run) - return std::move(Conv::run); - armnn::IRuntime::CreationOptions options; - return std::move(armnn::IRuntime::Create(options)); + if(Conv::run) + return std::move(Conv::run); + armnn::IRuntime::CreationOptions options; + return std::move(armnn::IRuntime::Create(options)); } protected: diff --git a/onnxruntime/core/providers/armnn/nn/pool.cc b/onnxruntime/core/providers/armnn/nn/pool.cc index 681b155c42..c22af36451 100644 --- a/onnxruntime/core/providers/armnn/nn/pool.cc +++ b/onnxruntime/core/providers/armnn/nn/pool.cc @@ -22,13 +22,13 @@ template thread_local std::map Pool::poolLayers; template -armnn::IRuntimePtr Pool::run = Pool::initRuntime(); +armnn::IRuntimePtr Pool::run = armnn::IRuntimePtr(nullptr, nullptr); template thread_local std::map MaxPoolV8::maxPoolLayers; template -armnn::IRuntimePtr MaxPoolV8::run = MaxPoolV8::initRuntime(); +armnn::IRuntimePtr MaxPoolV8::run = armnn::IRuntimePtr(nullptr, nullptr); armnn::Pooling2dDescriptor createDescriptor(std::vector pads, std::vector strides, std::vector kernel_shape, armnn::PoolingAlgorithm pool_type, onnxruntime::PoolAttributes pool_attrs){ @@ -323,6 +323,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ MaxPoolV8); +ONNX_OPERATOR_KERNEL_EX( + MaxPool, + kOnnxDomain, + 12, + kArmNNExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + MaxPoolV8); + +ONNX_OPERATOR_KERNEL_EX( + AveragePool, + kOnnxDomain, + 11, + kArmNNExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Pool); + } // namespace armnn_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/armnn/nn/pool.h b/onnxruntime/core/providers/armnn/nn/pool.h index c621cf7749..fb827a033e 100644 --- a/onnxruntime/core/providers/armnn/nn/pool.h +++ b/onnxruntime/core/providers/armnn/nn/pool.h @@ -23,6 +23,7 @@ class Pool final : public onnxruntime::Pool { explicit Pool(const OpKernelInfo& info) : onnxruntime::Pool(info) { provider_ = (const_cast( static_cast(info.GetExecutionProvider()))); + run = Pool::initRuntime(); } ~Pool() { @@ -32,7 +33,7 @@ class Pool final : public onnxruntime::Pool { Status Compute(OpKernelContext* context) const override; static armnn::IRuntimePtr initRuntime(){ - if (Pool::run) + if(Pool::run) return std::move(Pool::run); armnn::IRuntime::CreationOptions options; return std::move(armnn::IRuntime::Create(options)); @@ -50,6 +51,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 { explicit MaxPoolV8(const OpKernelInfo& info) : onnxruntime::MaxPoolV8(info) { provider_ = (const_cast( static_cast(info.GetExecutionProvider()))); + run = MaxPoolV8::initRuntime(); } ~MaxPoolV8() { @@ -59,7 +61,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 { Status Compute(OpKernelContext* context) const override; static armnn::IRuntimePtr initRuntime(){ - if (MaxPoolV8::run) + if(MaxPoolV8::run) return std::move(MaxPoolV8::run); armnn::IRuntime::CreationOptions options; return std::move(armnn::IRuntime::Create(options)); diff --git a/onnxruntime/core/providers/armnn/tensor/concat.cc b/onnxruntime/core/providers/armnn/tensor/concat.cc index ad3d9c653f..e6af286f3f 100644 --- a/onnxruntime/core/providers/armnn/tensor/concat.cc +++ b/onnxruntime/core/providers/armnn/tensor/concat.cc @@ -145,5 +145,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Concat); +ONNX_OPERATOR_KERNEL_EX( + Concat, + kOnnxDomain, + 11, + kArmNNExecutionProvider, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Concat); + } // namespace armnn_ep } // namespace onnxruntime diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 329a6f1d43..8f7e7e3ed1 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -341,7 +341,7 @@ def parse_arguments(): help="Enable Link Time Optimization") parser.add_argument( "--use_acl", nargs="?", const="ACL_1905", - choices=["ACL_1902", "ACL_1905", "ACL_1908"], + choices=["ACL_1902", "ACL_1905", "ACL_1908", "ACL_2002"], help="Build with ACL for ARM architectures.") parser.add_argument( "--use_armnn", action='store_true', @@ -641,6 +641,8 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "ON" if args.use_acl == "ACL_1905" else "OFF"), "-Donnxruntime_USE_ACL_1908=" + ( "ON" if args.use_acl == "ACL_1908" else "OFF"), + "-Donnxruntime_USE_ACL_2002=" + ( + "ON" if args.use_acl == "ACL_2002" else "OFF"), "-Donnxruntime_USE_ARMNN=" + ( "ON" if args.use_armnn else "OFF"), "-Donnxruntime_ARMNN_RELU_USE_CPU=" + (