mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-23 22:13:38 +00:00
ACL v20.02 (#4981)
* Add ACL version 20.02 * fix loging typo * check depthwise operation based on group param * Generate ArmNN runtime inside class constructor * Update to the latest ONNX operation set * Update BUILD.md Co-authored-by: Andrei-Alexandru <andrei-alexandru.avram@nxp.com>
This commit is contained in:
parent
73456f10cd
commit
bd215b79a2
19 changed files with 132 additions and 35 deletions
2
BUILD.md
2
BUILD.md
|
|
@ -468,7 +468,7 @@ alias cmake="/usr/bin/cmake -DCMAKE_TOOLCHAIN_FILE=$OECORE_NATIVE_SYSROOT/usr/sh
|
|||
cmake ../onnxruntime-arm-upstream/cmake -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_RUN_ONNX_TESTS=OFF -Donnxruntime_GENERATE_TEST_REPORTS=ON -Donnxruntime_DEV_MODE=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -Donnxruntime_USE_CUDA=OFF -Donnxruntime_USE_NSYNC=OFF -Donnxruntime_CUDNN_HOME= -Donnxruntime_USE_JEMALLOC=OFF -Donnxruntime_ENABLE_PYTHON=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_USE_EIGEN_FOR_BLAS=ON -Donnxruntime_USE_OPENBLAS=OFF -Donnxruntime_USE_ACL=ON -Donnxruntime_USE_DNNL=OFF -Donnxruntime_USE_MKLML=OFF -Donnxruntime_USE_OPENMP=ON -Donnxruntime_USE_TVM=OFF -Donnxruntime_USE_LLVM=OFF -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_BRAINSLICE=OFF -Donnxruntime_USE_NUPHAR=OFF -Donnxruntime_USE_EIGEN_THREADPOOL=OFF -Donnxruntime_BUILD_UNIT_TESTS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
```
|
||||
The ```-Donnxruntime_USE_ACL=ON``` option will use, by default, the 19.05 version of the Arm Compute Library. To set the right version you can use:
|
||||
```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON``` or ```-Donnxruntime_USE_ACL_1908=ON```;
|
||||
```-Donnxruntime_USE_ACL_1902=ON```, ```-Donnxruntime_USE_ACL_1905=ON```, ```-Donnxruntime_USE_ACL_1908=ON``` or ```-Donnxruntime_USE_ACL_2002=ON```;
|
||||
|
||||
2. Build ONNX Runtime library, test and performance application:
|
||||
```
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ option(onnxruntime_USE_ACL "Build with ACL support" OFF)
|
|||
option(onnxruntime_USE_ACL_1902 "Build with ACL version 1902 support" OFF)
|
||||
option(onnxruntime_USE_ACL_1905 "Build with ACL version 1905 support" OFF)
|
||||
option(onnxruntime_USE_ACL_1908 "Build with ACL version 1908 support" OFF)
|
||||
option(onnxruntime_USE_ACL_2002 "Build with ACL version 2002 support" OFF)
|
||||
option(onnxruntime_USE_ARMNN "Build with ArmNN support" OFF)
|
||||
option(onnxruntime_ARMNN_RELU_USE_CPU "Use the CPU implementation for the Relu operator for the ArmNN EP" ON)
|
||||
option(onnxruntime_ARMNN_BN_USE_CPU "Use the CPU implementation for the Batch Normalization operator for the ArmNN EP" ON)
|
||||
|
|
@ -597,7 +598,7 @@ endfunction()
|
|||
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto)
|
||||
|
||||
# ACL
|
||||
if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908)
|
||||
if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905 OR onnxruntime_USE_ACL_1908 OR onnxruntime_USE_ACL_2002)
|
||||
set(onnxruntime_USE_ACL ON)
|
||||
if(onnxruntime_USE_ACL_1902)
|
||||
add_definitions(-DACL_1902=1)
|
||||
|
|
@ -605,7 +606,11 @@ if (onnxruntime_USE_ACL OR onnxruntime_USE_ACL_1902 OR onnxruntime_USE_ACL_1905
|
|||
if(onnxruntime_USE_ACL_1908)
|
||||
add_definitions(-DACL_1908=1)
|
||||
else()
|
||||
add_definitions(-DACL_1905=1)
|
||||
if(onnxruntime_USE_ACL_2002)
|
||||
add_definitions(-DACL_2002=1)
|
||||
else()
|
||||
add_definitions(-DACL_1905=1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -19,41 +19,50 @@ namespace acl {
|
|||
|
||||
// Forward declarations of op kernels
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 6, Relu);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, Gemm);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 8, Gemm);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 9, 10, Gemm);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Gemm);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, Conv);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 12, MaxPool);
|
||||
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalAveragePool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalMaxPool);
|
||||
|
||||
// Opset 10
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, AveragePool);
|
||||
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 4, 10, Concat);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Concat);
|
||||
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kMSDomain, 1, float, FusedConv);
|
||||
|
||||
static void RegisterACLKernels(KernelRegistry& kernel_registry) {
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 6, Relu)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, Gemm)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 8, Gemm)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 9, 10, Gemm)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Gemm)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, Conv)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 12, MaxPool)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalAveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 1, 8, float, GlobalMaxPool)>());
|
||||
|
||||
// Opset 10
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, AveragePool)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 4, 10, Concat)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kAclExecutionProvider, kOnnxDomain, 11, Concat)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kAclExecutionProvider, kMSDomain, 1, float, FusedConv)>());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,24 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
|||
Gemm,
|
||||
kOnnxDomain,
|
||||
7,
|
||||
8,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Gemm<float>);
|
||||
|
||||
ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
||||
Gemm,
|
||||
kOnnxDomain,
|
||||
9,
|
||||
10,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Gemm<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
Gemm,
|
||||
kOnnxDomain,
|
||||
11,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Gemm<float>);
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
|
||||
const arm_compute::DataLayout data_layout = tconv.in->info()->data_layout();
|
||||
const int idx_channel = arm_compute::get_data_layout_dimension_index(data_layout, arm_compute::DataLayoutDimension::CHANNEL);
|
||||
bool isDepthwise = (1 == tconv.k->info()->tensor_shape()[idx_channel]);
|
||||
bool isDepthwise = (conv_attrs_.group > 1 && conv_attrs_.group == tconv.in->info()->tensor_shape()[idx_channel]);
|
||||
tconv.isDepthwiseCPU = isDepthwise;
|
||||
|
||||
std::vector<int64_t> aclStrides(2);
|
||||
|
|
@ -208,29 +208,32 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
tconv.in->info()->data_type(),
|
||||
1 /* depth multiplier */,
|
||||
tconv.in->info()->data_layout());
|
||||
#endif
|
||||
#if defined(ACL_1905) || defined(ACL_1908)
|
||||
#elif defined(ACL_1905) || defined(ACL_1908)
|
||||
bool optimizable =
|
||||
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
|
||||
tconv.k->info(),
|
||||
aclPadStride,
|
||||
1 /* depth multiplier */,
|
||||
arm_compute::Size2D(aclDilation0, dilations[0]));
|
||||
#elif defined(ACL_2002)
|
||||
bool optimizable = false;
|
||||
#endif
|
||||
|
||||
if (optimizable) {
|
||||
LOGS_DEFAULT(VERBOSE) << "ACL optimized depthwise convolution";
|
||||
#if defined(ACL_1902) || defined(ACL_1905)
|
||||
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer3x3>();
|
||||
#endif
|
||||
#ifdef ACL_1908
|
||||
#elif defined(ACL_1908)
|
||||
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayerOptimized>();
|
||||
#elif defined(ACL_2002)
|
||||
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer>();
|
||||
#endif
|
||||
|
||||
#ifdef ACL_1902
|
||||
layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(),
|
||||
aclPadStride, 1 /* depth multiplier */,
|
||||
acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo());
|
||||
#endif
|
||||
#if defined(ACL_1905) || defined(ACL_1908)
|
||||
#elif defined(ACL_1905) || defined(ACL_1908) || defined(ACL_2002)
|
||||
layer->configure(tconv.in.get(), tconv.k.get(), (B != nullptr) ? tconv.b.get() : nullptr, tconv.out.get(),
|
||||
aclPadStride, 1 /* depth multiplier */,
|
||||
acl_activ_enabled ? arm_compute::ActivationLayerInfo(acl_activ_func, conv_attrs_.alpha) : arm_compute::ActivationLayerInfo(),
|
||||
|
|
|
|||
|
|
@ -163,13 +163,13 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
|
|||
}
|
||||
|
||||
arm_compute::PoolingType pool_type;
|
||||
if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool")
|
||||
if (PoolBase::op_name_ == "GlobalAveragePool" || PoolBase::op_name_ == "AveragePool") {
|
||||
pool_type = arm_compute::PoolingType::AVG;
|
||||
LOGS_DEFAULT(VERBOSE) << "AveragePool";
|
||||
else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool")
|
||||
} else if (PoolBase::op_name_ == "GlobalMaxPool" || PoolBase::op_name_ == "MaxPool") {
|
||||
pool_type = arm_compute::PoolingType::MAX;
|
||||
LOGS_DEFAULT(VERBOSE) << "MaxPool";
|
||||
else {
|
||||
} else {
|
||||
LOGS_DEFAULT(WARNING) << "Pooling operation not supported in ArmNN; defaulting to cpu implementation";
|
||||
return onnxruntime::Pool<T, PoolType>::Compute(context);
|
||||
}
|
||||
|
|
@ -250,5 +250,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()), \
|
||||
MaxPoolV8<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
MaxPool,
|
||||
kOnnxDomain,
|
||||
12,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
MaxPoolV8<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
AveragePool,
|
||||
kOnnxDomain,
|
||||
11,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, AveragePool>);
|
||||
|
||||
|
||||
} // namespace acl
|
||||
} // namespace onnxruntime
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -131,5 +131,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Concat<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
Concat,
|
||||
kOnnxDomain,
|
||||
11,
|
||||
kAclExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Concat<float>);
|
||||
|
||||
} // namespace acl
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ template <typename T>
|
|||
thread_local std::map<OpKernel*, armnn::NetworkId> Relu<T>::reluLayers;
|
||||
|
||||
template <typename T>
|
||||
armnn::IRuntimePtr Relu<T>::run = Relu<T>::initRuntime();
|
||||
armnn::IRuntimePtr Relu<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
template <typename T>
|
||||
Status Relu<T>::Compute(OpKernelContext* context) const {
|
||||
|
|
|
|||
|
|
@ -23,8 +23,9 @@ template <typename T>
|
|||
class Relu : public OpKernel {
|
||||
public:
|
||||
explicit Relu(const OpKernelInfo& info) : OpKernel(info) {
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
|
||||
run = Relu<T>::initRuntime();
|
||||
}
|
||||
|
||||
~Relu() {
|
||||
|
|
@ -34,11 +35,11 @@ class Relu : public OpKernel {
|
|||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
static armnn::IRuntimePtr initRuntime(){
|
||||
if (Relu::run)
|
||||
return std::move(Relu::run);
|
||||
if(Relu::run)
|
||||
return std::move(Relu::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static thread_local std::map<OpKernel*, armnn::NetworkId> reluLayers;
|
||||
|
|
|
|||
|
|
@ -31,8 +31,10 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11,
|
|||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, float, AveragePool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, AveragePool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 12, MaxPool);
|
||||
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool);
|
||||
|
|
@ -41,6 +43,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain
|
|||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization);
|
||||
#endif
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 4, 10, Concat);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, Concat);
|
||||
|
||||
static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
|
||||
#ifdef RELU_ARMNN
|
||||
|
|
@ -56,8 +59,10 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
|
|||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, float, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, AveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 8, 11, float, MaxPool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 12, MaxPool)>());
|
||||
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool)>());
|
||||
|
|
@ -66,6 +71,7 @@ static void RegisterArmNNKernels(KernelRegistry& kernel_registry) {
|
|||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 7, 9, BatchNormalization)>());
|
||||
#endif
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 4, 10, Concat)>());
|
||||
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kArmNNExecutionProvider, kOnnxDomain, 11, Concat)>());
|
||||
}
|
||||
|
||||
std::shared_ptr<KernelRegistry> GetArmNNKernelRegistry() {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ class Gemm : public onnxruntime::Gemm<T> {
|
|||
|
||||
ORT_ENFORCE(info.GetAttr<float>("alpha", &alpha_).IsOK());
|
||||
ORT_ENFORCE(info.GetAttr<float>("beta", &beta_).IsOK());
|
||||
run = Gemm<T>::initRuntime();
|
||||
}
|
||||
|
||||
Status Compute(OpKernelContext* context) const override {
|
||||
|
|
@ -165,7 +166,7 @@ class Gemm : public onnxruntime::Gemm<T> {
|
|||
}
|
||||
|
||||
static armnn::IRuntimePtr initRuntime(){
|
||||
if (Gemm::run)
|
||||
if(Gemm::run)
|
||||
return std::move(Gemm::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
|
|
@ -186,7 +187,7 @@ template <typename T>
|
|||
thread_local std::map<OpKernel*, armnn::NetworkId> onnxruntime::armnn_ep::Gemm<T>::gemmLayers;
|
||||
|
||||
template <typename T>
|
||||
armnn::IRuntimePtr Gemm<T>::run = Gemm<T>::initRuntime();
|
||||
armnn::IRuntimePtr Gemm<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
} // namespace armnn_ep
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ template <typename T>
|
|||
thread_local std::map<OpKernel*, armnn::NetworkId> BatchNorm<T>::batchNormLayers;
|
||||
|
||||
template <typename T>
|
||||
armnn::IRuntimePtr BatchNorm<T>::run = BatchNorm<T>::initRuntime();
|
||||
armnn::IRuntimePtr BatchNorm<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
template <typename T>
|
||||
Status BatchNorm<T>::Compute(OpKernelContext* context) const {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ class BatchNorm final : public OpKernel {
|
|||
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
dynamic_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
|
||||
run = BatchNorm<T>::initRuntime();
|
||||
}
|
||||
|
||||
~BatchNorm() {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ template <typename T>
|
|||
thread_local std::map<OpKernel*, armnn::NetworkId> Conv<T>::convLayers;
|
||||
|
||||
template <typename T>
|
||||
armnn::IRuntimePtr Conv<T>::run = Conv<T>::initRuntime();
|
||||
armnn::IRuntimePtr Conv<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled) {
|
||||
std::vector<int64_t> armnnStrides(2);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class Conv : public onnxruntime::Conv<T> {
|
|||
explicit Conv(const OpKernelInfo& info) : onnxruntime::Conv<T>(info), conv_attrs_(info) {
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
|
||||
run = Conv<T>::initRuntime();
|
||||
}
|
||||
|
||||
~Conv() {
|
||||
|
|
@ -32,10 +33,10 @@ class Conv : public onnxruntime::Conv<T> {
|
|||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
static armnn::IRuntimePtr initRuntime(){
|
||||
if (Conv::run)
|
||||
return std::move(Conv::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
if(Conv::run)
|
||||
return std::move(Conv::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
|||
|
|
@ -22,13 +22,13 @@ template <typename T, typename PoolType>
|
|||
thread_local std::map<OpKernel*, armnn::NetworkId> Pool<T, PoolType>::poolLayers;
|
||||
|
||||
template <typename T, typename PoolType>
|
||||
armnn::IRuntimePtr Pool<T, PoolType>::run = Pool<T, PoolType>::initRuntime();
|
||||
armnn::IRuntimePtr Pool<T, PoolType>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
template <typename T>
|
||||
thread_local std::map<OpKernel*, armnn::NetworkId> MaxPoolV8<T>::maxPoolLayers;
|
||||
|
||||
template <typename T>
|
||||
armnn::IRuntimePtr MaxPoolV8<T>::run = MaxPoolV8<T>::initRuntime();
|
||||
armnn::IRuntimePtr MaxPoolV8<T>::run = armnn::IRuntimePtr(nullptr, nullptr);
|
||||
|
||||
armnn::Pooling2dDescriptor createDescriptor(std::vector<int64_t> pads, std::vector<int64_t> strides, std::vector<int64_t> kernel_shape, armnn::PoolingAlgorithm pool_type, onnxruntime::PoolAttributes pool_attrs){
|
||||
|
||||
|
|
@ -323,6 +323,22 @@ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()), \
|
||||
MaxPoolV8<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
MaxPool,
|
||||
kOnnxDomain,
|
||||
12,
|
||||
kArmNNExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
MaxPoolV8<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
AveragePool,
|
||||
kOnnxDomain,
|
||||
11,
|
||||
kArmNNExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Pool<float, AveragePool>);
|
||||
|
||||
} // namespace armnn_ep
|
||||
} // namespace onnxruntime
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class Pool final : public onnxruntime::Pool<T, PoolType> {
|
|||
explicit Pool(const OpKernelInfo& info) : onnxruntime::Pool<T, PoolType>(info) {
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
|
||||
run = Pool<T, PoolType>::initRuntime();
|
||||
}
|
||||
|
||||
~Pool() {
|
||||
|
|
@ -32,7 +33,7 @@ class Pool final : public onnxruntime::Pool<T, PoolType> {
|
|||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
static armnn::IRuntimePtr initRuntime(){
|
||||
if (Pool::run)
|
||||
if(Pool::run)
|
||||
return std::move(Pool::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
|
|
@ -50,6 +51,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 {
|
|||
explicit MaxPoolV8(const OpKernelInfo& info) : onnxruntime::MaxPoolV8(info) {
|
||||
provider_ = (const_cast<ArmNNExecutionProvider*>(
|
||||
static_cast<const ArmNNExecutionProvider*>(info.GetExecutionProvider())));
|
||||
run = MaxPoolV8<T>::initRuntime();
|
||||
}
|
||||
|
||||
~MaxPoolV8() {
|
||||
|
|
@ -59,7 +61,7 @@ class MaxPoolV8 final : public onnxruntime::MaxPoolV8 {
|
|||
Status Compute(OpKernelContext* context) const override;
|
||||
|
||||
static armnn::IRuntimePtr initRuntime(){
|
||||
if (MaxPoolV8::run)
|
||||
if(MaxPoolV8::run)
|
||||
return std::move(MaxPoolV8::run);
|
||||
armnn::IRuntime::CreationOptions options;
|
||||
return std::move(armnn::IRuntime::Create(options));
|
||||
|
|
|
|||
|
|
@ -145,5 +145,13 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Concat<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
Concat,
|
||||
kOnnxDomain,
|
||||
11,
|
||||
kArmNNExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Concat<float>);
|
||||
|
||||
} // namespace armnn_ep
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ def parse_arguments():
|
|||
help="Enable Link Time Optimization")
|
||||
parser.add_argument(
|
||||
"--use_acl", nargs="?", const="ACL_1905",
|
||||
choices=["ACL_1902", "ACL_1905", "ACL_1908"],
|
||||
choices=["ACL_1902", "ACL_1905", "ACL_1908", "ACL_2002"],
|
||||
help="Build with ACL for ARM architectures.")
|
||||
parser.add_argument(
|
||||
"--use_armnn", action='store_true',
|
||||
|
|
@ -641,6 +641,8 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
|
|||
"ON" if args.use_acl == "ACL_1905" else "OFF"),
|
||||
"-Donnxruntime_USE_ACL_1908=" + (
|
||||
"ON" if args.use_acl == "ACL_1908" else "OFF"),
|
||||
"-Donnxruntime_USE_ACL_2002=" + (
|
||||
"ON" if args.use_acl == "ACL_2002" else "OFF"),
|
||||
"-Donnxruntime_USE_ARMNN=" + (
|
||||
"ON" if args.use_armnn else "OFF"),
|
||||
"-Donnxruntime_ARMNN_RELU_USE_CPU=" + (
|
||||
|
|
|
|||
Loading…
Reference in a new issue