ACL v20.02 (#4981)

* Add ACL version 20.02

* fix loging typo

* check depthwise operation based on group param

* Generate ArmNN runtime inside class constructor

* Update to the latest ONNX operation set

* Update BUILD.md

Co-authored-by: Andrei-Alexandru <andrei-alexandru.avram@nxp.com>
This commit is contained in:
Andrews548 2020-09-04 06:44:27 +03:00 committed by GitHub
parent 73456f10cd
commit bd215b79a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 132 additions and 35 deletions

View file

@ -468,7 +468,7 @@ alias cmake="/usr/bin/cmake -DCMAKE_TOOLCHAIN_FILE=$OECORE_NATIVE_SYSROOT/usr/sh
cmake ../onnxruntime-arm-upstream/cmake -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_RUN_ONNX_TESTS=OFF -Donnxruntime_GENERATE_TEST_REPORTS=ON -Donnxruntime_DEV_MODE=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -Donnxruntime_USE_CUDA=OFF -Donnxruntime_USE_NSYNC=OFF -Donnxruntime_CUDNN_HOME= -Donnxruntime_USE_JEMALLOC=OFF -Donnxruntime_ENABLE_PYTHON=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_USE_EIGEN_FOR_BLAS=ON -Donnxruntime_USE_OPENBLAS=OFF -Donnxruntime_USE_ACL=ON -Donnxruntime_USE_DNNL=OFF -Donnxruntime_USE_MKLML=OFF -Donnxruntime_USE_OPENMP=ON -Donnxruntime_USE_TVM=OFF -Donnxruntime_USE_LLVM=OFF -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_BRAINSLICE=OFF -Donnxruntime_USE_NUPHAR=OFF -Donnxruntime_USE_EIGEN_THREADPOOL=OFF -Donnxruntime_BUILD_UNIT_TESTS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
```
The ```-Donnxruntime_USE_ACL=ON``` option will use, by default, the 19.05 version of the Arm Compute Library. To set the right version you can use:
```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON``` or ```-Donnxruntime_USE_ACL_1908=ON```;
```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON```, ```-Donnxruntime_USE_ACL_1908=ON``` or ```-Donnxruntime_USE_ACL_2002=ON```;
2. Build ONNX Runtime library, test and performance application:
```

View file

@ -97,6 +97,7 @@ option(onnxruntime_USE_ACL "Build with ACL support" OFF)
option(onnxruntime_USE_ACL_1902 "Build with ACL version 1902 support" OFF)
option(onnxruntime_USE_ACL_1905 "Build with ACL version 1905 support" OFF)
option(onnxruntime_USE_ACL_1908 "Build with ACL version 1908 support" OFF)
option(onnxruntime_USE_ACL_2002 "Build with ACL version 2002 support" OFF)
option(onnxruntime_USE_ARMNN "Build with ArmNN support" OFF)
option(onnxruntime_ARMNN_RELU_USE_CPU "Use the CPU implementation for the Relu operator for the ArmNN EP" ON)
option(onnxruntime_ARMNN_BN_USE_CPU "Use the CPU implementation for the Batch Normalization operator for the ArmNN EP" ON)
@ -597,7 +598,7 @@ endfunction()
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto)
# ACL
if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908)
if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908 OR onnxruntime_USE_ACL_2002)
set(onnxruntime_USE_ACL ON)
if(onnxruntime_USE_ACL_1902)
add_definitions(-DACL_1902=1)
@ -605,7 +606,11 @@ if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905
if(onnxruntime_USE_ACL_1908)
add_definitions(-DACL_1908=1)
else()
add_definitions(-DACL_1905=1)
if(onnxruntime_USE_ACL_2002)
add_definitions(-DACL_2002=1)
else()
add_definitions(-DACL_1905=1)
endif()
endif()
endif()

View file

@ -19,41 +19,50 @@ namespace acl {
// Forward declarations of op kernels
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 6, Relu);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, Gemm);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 8, Gemm);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 9, 10, Gemm);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Gemm);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, Conv);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 12, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalAveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalMaxPool);
// Opset 10
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, AveragePool);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization);
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 4, 10, Concat);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Concat);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kMSDomain, 1, float, FusedConv);
static void RegisterACLKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 6, Relu)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, Gemm)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 8, Gemm)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 9, 10, Gemm)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Gemm)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, Conv)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 12, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalAveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalMaxPool)>());
// Opset 10
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 4, 10, Concat)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Concat)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kMSDomain, 1, float, FusedConv)>());
}

View file

@ -13,7 +13,24 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
Gemm,
kOnnxDomain,
7,
8,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Gemm<float>);
ONNX_OPERATOR_VERSIONED_KERNEL_EX(
Gemm,
kOnnxDomain,
9,
10,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Gemm<float>);
ONNX_OPERATOR_KERNEL_EX(
Gemm,
kOnnxDomain,
11,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Gemm<float>);

View file

@ -154,7 +154,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
const arm_compute::DataLayout data_layout = tconv.in->info()->data_layout();
const int idx_channel = arm_compute::get_data_layout_dimension_index(data_layout, arm_compute::DataLayoutDimension::CHANNEL);
bool isDepthwise = (1 == tconv.k->info()->tensor_shape()[idx_channel]);
bool isDepthwise = (conv_attrs_.group > 1 && conv_attrs_.group == tconv.in->info()->tensor_shape()[idx_channel]);
tconv.isDepthwiseCPU = isDepthwise;
std::vector<int64_t> aclStrides(2);
@ -208,29 +208,32 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
tconv.in->info()->data_type(),
1 /* depth multiplier */,
tconv.in->info()->data_layout());
#endif
#if defined(ACL_1905) || defined(ACL_1908)
#elif defined(ACL_1905) || defined(ACL_1908)
bool optimizable =
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
tconv.k->info(),
aclPadStride,
1 /* depth multiplier */,
arm_compute::Size2D(aclDilation0, dilations[0]));
#elif defined(ACL_2002)
bool optimizable = false;
#endif
if (optimizable) {
LOGS_DEFAULT(VERBOSE) << "ACL optimized depthwise convolution";
#if defined(ACL_1902) || defined(ACL_1905)
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer3x3>();
#endif
#ifdef ACL_1908
#elif defined(ACL_1908)
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayerOptimized>();
#elif defined(ACL_2002)
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer>();
#endif
#ifdef ACL_1902
layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(),
aclPadStride, 1 /* depth multiplier */,
acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo());
#endif
#if defined(ACL_1905) || defined(ACL_1908)
#elif defined(ACL_1905) || defined(ACL_1908) || defined(ACL_2002)
layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(),
aclPadStride, 1 /* depth multiplier */,
acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo(),

View file

@ -163,13 +163,13 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
}
arm_compute::PoolingType pool_type;
if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool")
if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool") {
pool_type = arm_compute::PoolingType::AVG;
LOGS_DEFAULT(VERBOSE) << "AveragePool";
else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool")
} else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool") {
pool_type = arm_compute::PoolingType::MAX;
LOGS_DEFAULT(VERBOSE) << "MaxPool";
else {
} else {
LOGS_DEFAULT(WARNING) << "Pooling operation not supported in ArmNN; defaulting to cpu implementation";
return onnxruntime::Pool<T, PoolType>::Compute(context);
}
@ -250,5 +250,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()), \
MaxPoolV8<float>);
ONNX_OPERATOR_KERNEL_EX(
MaxPool,
kOnnxDomain,
12,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
MaxPoolV8<float>);
ONNX_OPERATOR_KERNEL_EX(
AveragePool,
kOnnxDomain,
11,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Pool<float, AveragePool>);
} // namespace acl
} // namespace onnxruntime
} // namespace onnxruntime

View file

@ -131,5 +131,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Concat<float>);
ONNX_OPERATOR_KERNEL_EX(
Concat,
kOnnxDomain,
11,
kAclExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Concat<float>);
} // namespace acl
} // namespace onnxruntime

View file

@ -19,7 +19,7 @@ template <typename T>
thread_local std::map<OpKernel*, armnn::NetworkId> Relu<T>::reluLayers;
template <typename T>
armnn::IRuntimePtr Relu<T>::run = Relu<T>::initRuntime();
armnn::IRuntimePtr Relu<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
template <typename T>
Status Relu<T>::Compute(OpKernelContext* context) const {

View file

@ -23,8 +23,9 @@ template <typename T>
class Relu : public OpKernel {
public:
explicit Relu(const OpKernelInfo& info) : OpKernel(info) {
provider_ = (const_cast<ArmNNExecutionProvider*>(
provider_ = (const_cast<ArmNNExecutionProvider*>(
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
run = Relu<T>::initRuntime();
}
~Relu() {
@ -34,11 +35,11 @@ class Relu : public OpKernel {
Status Compute(OpKernelContext* context) const override;
static armnn::IRuntimePtr initRuntime(){
if (Relu::run)
return std::move(Relu::run);
if(Relu::run)
return std::move(Relu::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));
}
}
private:
static thread_local std::map<OpKernel*, armnn::NetworkId> reluLayers;

View file

@ -31,8 +31,10 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11,
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, float, AveragePool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, AveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 12, MaxPool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool);
@ -41,6 +43,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization);
#endif
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 4, 10, Concat);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, Concat);
static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
#ifdef RELU_ARMNN
@ -56,8 +59,10 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, float, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, AveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 12, MaxPool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool)>());
@ -66,6 +71,7 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization)>());
#endif
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 4, 10, Concat)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, Concat)>());
}
std::shared_ptr<KernelRegistry> GetArmNNKernelRegistry() {

View file

@ -28,6 +28,7 @@ class Gemm : public onnxruntime::Gemm<T> {
ORT_ENFORCE(info.GetAttr<float>("alpha", &alpha_).IsOK());
ORT_ENFORCE(info.GetAttr<float>("beta", &beta_).IsOK());
run = Gemm<T>::initRuntime();
}
Status Compute(OpKernelContext* context) const override {
@ -165,7 +166,7 @@ class Gemm : public onnxruntime::Gemm<T> {
}
static armnn::IRuntimePtr initRuntime(){
if (Gemm::run)
if(Gemm::run)
return std::move(Gemm::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));
@ -186,7 +187,7 @@ template <typename T>
thread_local std::map<OpKernel*, armnn::NetworkId> onnxruntime::armnn_ep::Gemm<T>::gemmLayers;
template <typename T>
armnn::IRuntimePtr Gemm<T>::run = Gemm<T>::initRuntime();
armnn::IRuntimePtr Gemm<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
} // namespace armnn_ep
} // namespace onnxruntime

View file

@ -24,7 +24,7 @@ template <typename T>
thread_local std::map<OpKernel*, armnn::NetworkId> BatchNorm<T>::batchNormLayers;
template <typename T>
armnn::IRuntimePtr BatchNorm<T>::run = BatchNorm<T>::initRuntime();
armnn::IRuntimePtr BatchNorm<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
template <typename T>
Status BatchNorm<T>::Compute(OpKernelContext* context) const {

View file

@ -28,6 +28,7 @@ class BatchNorm final : public OpKernel {
provider_ = (const_cast<ArmNNExecutionProvider*>(
dynamic_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
run = BatchNorm<T>::initRuntime();
}
~BatchNorm() {

View file

@ -24,7 +24,7 @@ template <typename T>
thread_local std::map<OpKernel*, armnn::NetworkId> Conv<T>::convLayers;
template <typename T>
armnn::IRuntimePtr Conv<T>::run = Conv<T>::initRuntime();
armnn::IRuntimePtr Conv<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled) {
std::vector<int64_t> armnnStrides(2);

View file

@ -23,6 +23,7 @@ class Conv : public onnxruntime::Conv<T> {
explicit Conv(const OpKernelInfo& info) : onnxruntime::Conv<T>(info), conv_attrs_(info) {
provider_ = (const_cast<ArmNNExecutionProvider*>(
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
run = Conv<T>::initRuntime();
}
~Conv() {
@ -32,10 +33,10 @@ class Conv : public onnxruntime::Conv<T> {
Status Compute(OpKernelContext* context) const override;
static armnn::IRuntimePtr initRuntime(){
if (Conv::run)
return std::move(Conv::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));
if(Conv::run)
return std::move(Conv::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));
}
protected:

View file

@ -22,13 +22,13 @@ template <typename T, typename PoolType>
thread_local std::map<OpKernel*, armnn::NetworkId> Pool<T, PoolType>::poolLayers;
template <typename T, typename PoolType>
armnn::IRuntimePtr Pool<T, PoolType>::run = Pool<T, PoolType>::initRuntime();
armnn::IRuntimePtr Pool<T, PoolType>::run = armnn::IRuntimePtr(nullptr, nullptr);
template <typename T>
thread_local std::map<OpKernel*, armnn::NetworkId> MaxPoolV8<T>::maxPoolLayers;
template <typename T>
armnn::IRuntimePtr MaxPoolV8<T>::run = MaxPoolV8<T>::initRuntime();
armnn::IRuntimePtr MaxPoolV8<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
armnn::Pooling2dDescriptor createDescriptor(std::vector<int64_t> pads, std::vector<int64_t> strides, std::vector<int64_t> kernel_shape, armnn::PoolingAlgorithm pool_type, onnxruntime::PoolAttributes pool_attrs){
@ -323,6 +323,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()), \
MaxPoolV8<float>);
ONNX_OPERATOR_KERNEL_EX(
MaxPool,
kOnnxDomain,
12,
kArmNNExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
MaxPoolV8<float>);
ONNX_OPERATOR_KERNEL_EX(
AveragePool,
kOnnxDomain,
11,
kArmNNExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Pool<float, AveragePool>);
} // namespace armnn_ep
} // namespace onnxruntime

View file

@ -23,6 +23,7 @@ class Pool final : public onnxruntime::Pool<T, PoolType> {
explicit Pool(const OpKernelInfo& info) : onnxruntime::Pool<T, PoolType>(info) {
provider_ = (const_cast<ArmNNExecutionProvider*>(
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
run = Pool<T, PoolType>::initRuntime();
}
~Pool() {
@ -32,7 +33,7 @@ class Pool final : public onnxruntime::Pool<T, PoolType> {
Status Compute(OpKernelContext* context) const override;
static armnn::IRuntimePtr initRuntime(){
if (Pool::run)
if(Pool::run)
return std::move(Pool::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));
@ -50,6 +51,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 {
explicit MaxPoolV8(const OpKernelInfo& info) : onnxruntime::MaxPoolV8(info) {
provider_ = (const_cast<ArmNNExecutionProvider*>(
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
run = MaxPoolV8<T>::initRuntime();
}
~MaxPoolV8() {
@ -59,7 +61,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 {
Status Compute(OpKernelContext* context) const override;
static armnn::IRuntimePtr initRuntime(){
if (MaxPoolV8::run)
if(MaxPoolV8::run)
return std::move(MaxPoolV8::run);
armnn::IRuntime::CreationOptions options;
return std::move(armnn::IRuntime::Create(options));

View file

@ -145,5 +145,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Concat<float>);
ONNX_OPERATOR_KERNEL_EX(
Concat,
kOnnxDomain,
11,
kArmNNExecutionProvider,
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Concat<float>);
} // namespace armnn_ep
} // namespace onnxruntime

View file

@ -341,7 +341,7 @@ def parse_arguments():
help="Enable Link Time Optimization")
parser.add_argument(
"--use_acl", nargs="?", const="ACL_1905",
choices=["ACL_1902", "ACL_1905", "ACL_1908"],
choices=["ACL_1902", "ACL_1905", "ACL_1908", "ACL_2002"],
help="Build with ACL for ARM architectures.")
parser.add_argument(
"--use_armnn", action='store_true',
@ -641,6 +641,8 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
"ON" if args.use_acl == "ACL_1905" else "OFF"),
"-Donnxruntime_USE_ACL_1908=" + (
"ON" if args.use_acl == "ACL_1908" else "OFF"),
"-Donnxruntime_USE_ACL_2002=" + (
"ON" if args.use_acl == "ACL_2002" else "OFF"),
"-Donnxruntime_USE_ARMNN=" + (
"ON" if args.use_armnn else "OFF"),
"-Donnxruntime_ARMNN_RELU_USE_CPU=" + (