From cf92497c160b3a72ae663ab4dd9cd818331b62db Mon Sep 17 00:00:00 2001 From: gwang-msft <62914304+gwang-msft@users.noreply.github.com> Date: Wed, 15 Jul 2020 00:21:42 -0700 Subject: [PATCH] Nnapi, add auto_pad support for Conv/GlobalAveragePool/AveragePool/GlobalMaxPool/MaxPool operators (#4499) * Split ComputePadAndOutputShape into ComputePad and ComputeOutputShape * update NNAPI conv ouput shape compute to use shared ComputeOutputShapec * move use ptr to use reference for ComputePadAndOutputShape * nnapi conv support auto_pad * add logging operator support bt target devices * update InferOutputShape/ComputePadAndOutputShape/ComputePad to use force_symmetric_auto_padding as param instead of template * make log op support for target devices optional * add auto_pad support to pool operators * ignore GetTargetDevices if using all devices * fix some typo in padding calculation * fix a bug of compute padding difference between conv and pool ops * addressed CR comments, removed NNAPI device logging and move nnapi ep autopad handling into a shared function * change helper functions to static --- onnxruntime/contrib_ops/cpu/nchwc_ops.cc | 2 +- onnxruntime/core/providers/acl/nn/conv.cc | 32 +- onnxruntime/core/providers/armnn/nn/conv.cc | 427 +++++++++--------- onnxruntime/core/providers/common.h | 26 +- onnxruntime/core/providers/cpu/nn/conv.cc | 4 +- .../core/providers/cpu/nn/conv_attributes.h | 30 +- .../core/providers/cpu/nn/conv_integer.cc | 2 +- .../core/providers/cpu/nn/qlinearconv.cc | 2 +- onnxruntime/core/providers/cuda/nn/conv.cc | 4 +- .../nnapi_builtin/builders/op_builder.cc | 163 +++++-- .../nnapi/nnapi_builtin/builders/shaper.cc | 38 +- .../nnapi/nnapi_builtin/builders/shaper.h | 6 + 12 files changed, 419 insertions(+), 317 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/nchwc_ops.cc b/onnxruntime/contrib_ops/cpu/nchwc_ops.cc index 925d8b5eb1..e81a04d587 100644 --- a/onnxruntime/contrib_ops/cpu/nchwc_ops.cc +++ b/onnxruntime/contrib_ops/cpu/nchwc_ops.cc @@ -152,7 +152,7 @@ Status NchwcConv::Compute(OpKernelContext* context) const { std::vector Y_dims; Y_dims.insert(Y_dims.begin(), {X_shape[0], W_shape[0]}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); auto* Y = context->Output(0, Y_dims); auto* y_data = Y->template MutableData(); diff --git a/onnxruntime/core/providers/acl/nn/conv.cc b/onnxruntime/core/providers/acl/nn/conv.cc index 4fb119ed39..386119416c 100644 --- a/onnxruntime/core/providers/acl/nn/conv.cc +++ b/onnxruntime/core/providers/acl/nn/conv.cc @@ -60,7 +60,7 @@ Status Conv::Compute(OpKernelContext* context) const { ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this); if (it != Conv::convLayers.end()) { pConv = &it->second; - if(pConv->isDepthwiseCPU == true) { + if (pConv->isDepthwiseCPU == true) { Status s = onnxruntime::Conv::Compute(context); return s; } @@ -103,7 +103,7 @@ Status Conv::Compute(OpKernelContext* context) const { std::vector Y_dims; Y_dims.insert(Y_dims.begin(), {N, M}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); Tensor* Y = context->Output(0, TensorShape(Y_dims)); LOGS_DEFAULT(VERBOSE) << "Y " << Y->Shape().ToString().c_str() << std::endl; @@ -127,7 +127,6 @@ Status Conv::Compute(OpKernelContext* context) const { } if (it == Conv::convLayers.end()) { - auto mm_layer = ACLCreateMemoryManager(); ACLNEConv tconv; @@ -192,21 +191,21 @@ Status Conv::Compute(OpKernelContext* context) const { // in the configure function for NEDepthwiseConvolutionLayer3x3, there is a separation based on the optimization #ifdef ACL_1902 bool optimizable = - arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(), - aclPadStride, - tconv.in->info()->data_type(), - 1 /* depth multiplier */, - tconv.in->info()->data_layout()); + arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(), + aclPadStride, + tconv.in->info()->data_type(), + 1 /* depth multiplier */, + tconv.in->info()->data_layout()); #endif #if defined(ACL_1905) || defined(ACL_1908) bool optimizable = - arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(), - tconv.k->info(), - aclPadStride, - 1 /* depth multiplier */, - arm_compute::Size2D(aclDilation0, dilations[0])); + arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(), + tconv.k->info(), + aclPadStride, + 1 /* depth multiplier */, + arm_compute::Size2D(aclDilation0, dilations[0])); #endif - if(optimizable) { + if (optimizable) { //optimized depthwise convolution #if defined(ACL_1902) || defined(ACL_1905) auto layer = std::make_shared(); @@ -234,9 +233,9 @@ Status Conv::Compute(OpKernelContext* context) const { ret = Conv::convLayers.insert(std::pair((OpKernel*)this, tconv)); return s; } -#endif //DEPTHWISE_CPU +#endif //DEPTHWISE_CPU } else { - if(tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) { + if (tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) { //pointwise convolution Status s = onnxruntime::Conv::Compute(context); return s; @@ -291,7 +290,6 @@ Status Conv::Compute(OpKernelContext* context) const { pConv->b->allocator()->free(); pConv->out->allocator()->free(); - return Status::OK(); } #else diff --git a/onnxruntime/core/providers/armnn/nn/conv.cc b/onnxruntime/core/providers/armnn/nn/conv.cc index f793d5acba..f5e24e7dd2 100644 --- a/onnxruntime/core/providers/armnn/nn/conv.cc +++ b/onnxruntime/core/providers/armnn/nn/conv.cc @@ -26,258 +26,253 @@ thread_local std::map Conv::convLayers; template armnn::IRuntimePtr Conv::run = Conv::initRuntime(); -armnn::Convolution2dDescriptor createConvDescriptor(std::vector pads, std::vector dilations, std::vector strides, bool biasEnabled){ +armnn::Convolution2dDescriptor createConvDescriptor(std::vector pads, std::vector dilations, std::vector strides, bool biasEnabled) { + std::vector armnnStrides(2); + armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1; + armnnStrides[1] = strides[0]; - std::vector armnnStrides(2); - armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1; - armnnStrides[1] = strides[0]; + std::vector armnnDilations(2); + armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1; + armnnDilations[1] = dilations[0]; - std::vector armnnDilations(2); - armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1; - armnnDilations[1] = dilations[0]; - - std::vector armnnPads(4); - if (pads.size() == 2) { - if (strides.size() == 1) { - armnnPads[0] = 0; - armnnPads[1] = 0; - armnnPads[2] = pads[1]; - armnnPads[3] = pads[0]; - } else { - armnnPads[0] = pads[1]; - armnnPads[1] = pads[0]; - armnnPads[2] = pads[1]; - armnnPads[3] = pads[0]; - } + std::vector armnnPads(4); + if (pads.size() == 2) { + if (strides.size() == 1) { + armnnPads[0] = 0; + armnnPads[1] = 0; + armnnPads[2] = pads[1]; + armnnPads[3] = pads[0]; } else { armnnPads[0] = pads[1]; - armnnPads[1] = pads[3]; - armnnPads[2] = pads[0]; - armnnPads[3] = pads[2]; + armnnPads[1] = pads[0]; + armnnPads[2] = pads[1]; + armnnPads[3] = pads[0]; } + } else { + armnnPads[0] = pads[1]; + armnnPads[1] = pads[3]; + armnnPads[2] = pads[0]; + armnnPads[3] = pads[2]; + } - armnn::Convolution2dDescriptor convolutionDescriptor; - convolutionDescriptor.m_PadLeft = armnnPads[0]; - convolutionDescriptor.m_PadRight = armnnPads[1]; - convolutionDescriptor.m_PadTop = armnnPads[2]; - convolutionDescriptor.m_PadBottom = armnnPads[3]; - convolutionDescriptor.m_StrideX = armnnStrides[0]; - convolutionDescriptor.m_StrideY = armnnStrides[1]; - convolutionDescriptor.m_DilationX = armnnDilations[0]; - convolutionDescriptor.m_DilationY = armnnDilations[1]; - convolutionDescriptor.m_BiasEnabled = biasEnabled; - convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW; + armnn::Convolution2dDescriptor convolutionDescriptor; + convolutionDescriptor.m_PadLeft = armnnPads[0]; + convolutionDescriptor.m_PadRight = armnnPads[1]; + convolutionDescriptor.m_PadTop = armnnPads[2]; + convolutionDescriptor.m_PadBottom = armnnPads[3]; + convolutionDescriptor.m_StrideX = armnnStrides[0]; + convolutionDescriptor.m_StrideY = armnnStrides[1]; + convolutionDescriptor.m_DilationX = armnnDilations[0]; + convolutionDescriptor.m_DilationY = armnnDilations[1]; + convolutionDescriptor.m_BiasEnabled = biasEnabled; + convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW; - return convolutionDescriptor; + return convolutionDescriptor; } -armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor){ +armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor) { + armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor; + depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft; + depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight; + depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop; + depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom; + depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX; + depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY; + depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX; + depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY; + depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled; + depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout; - armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor; - depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft; - depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight; - depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop; - depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom; - depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX; - depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY; - depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX; - depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY; - depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled; - depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout; - - return depthwiseDescriptor; + return depthwiseDescriptor; } template Status Conv::Compute(OpKernelContext* context) const { size_t num_inputs = OpKernel::Node().InputDefs().size(); const Tensor* X = context->Input(0); - const Tensor* W = context->Input(1); - const Tensor* B = num_inputs == 3 ? context->Input(2) : nullptr; + const Tensor* W = context->Input(1); + const Tensor* B = num_inputs == 3 ? context->Input(2) : nullptr; - const int64_t N = X->Shape()[0]; - const int64_t M = W->Shape()[0]; + const int64_t N = X->Shape()[0]; + const int64_t M = W->Shape()[0]; - if (X->Shape().NumDimensions() != PREF_DIM) { + if (X->Shape().NumDimensions() != PREF_DIM) { + Status s = onnxruntime::Conv::Compute(context); + return s; + } + + ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W)); + + std::vector kernel_shape; + ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape)); + + std::vector pads(conv_attrs_.pads); + if (pads.empty()) { + pads.resize(kernel_shape.size() * 2, 0); + } + std::vector dilations(conv_attrs_.dilations); + if (dilations.empty()) { + dilations.resize(kernel_shape.size(), 1); + } + std::vector strides(conv_attrs_.strides); + if (strides.empty()) { + strides.resize(kernel_shape.size(), 1); + } + + std::vector Y_dims; + Y_dims.insert(Y_dims.begin(), {N, M}); + TensorShape input_shape = X->Shape().Slice(2); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); + Tensor* Y = context->Output(0, TensorShape(Y_dims)); + + bool biasEnabled = B != nullptr; + + const T* x_data = X->template Data(); + const T* k_data = W->template Data(); + + const T* b_data; + if (biasEnabled) { + b_data = B->template Data(); + } + + T* y_data = Y->template MutableData(); + + armnn::NetworkId* pNetworkId; + ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this); + if (it == Conv::convLayers.end()) { + armnn::NetworkId networkId; + armnn::INetworkPtr myNetwork = armnn::INetwork::Create(); + + armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled); + + armnn::IConnectableLayer* convolution_armnn; + armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape()); + armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape()); + + if (weightShape[2] == 1 && weightShape[3] == 1) { Status s = onnxruntime::Conv::Compute(context); return s; } - ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W)); + if (conv_attrs_.group > 1) { + if (conv_attrs_.group == inputShape[1]) { + // depthwise convolution + armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor); - std::vector kernel_shape; - ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape)); - - std::vector pads(conv_attrs_.pads); - if (pads.empty()) { - pads.resize(kernel_shape.size() * 2, 0); - } - std::vector dilations(conv_attrs_.dilations); - if (dilations.empty()) { - dilations.resize(kernel_shape.size(), 1); - } - std::vector strides(conv_attrs_.strides); - if (strides.empty()) { - strides.resize(kernel_shape.size(), 1); - } - - std::vector Y_dims; - Y_dims.insert(Y_dims.begin(), {N, M}); - TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); - Tensor* Y = context->Output(0, TensorShape(Y_dims)); - - bool biasEnabled = B != nullptr; - - const T* x_data = X->template Data(); - const T* k_data = W->template Data(); - - const T* b_data; - if (biasEnabled) { - b_data = B->template Data(); - } - - T* y_data = Y->template MutableData(); - - armnn::NetworkId* pNetworkId; - ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this); - if (it == Conv::convLayers.end()) { - - armnn::NetworkId networkId; - armnn::INetworkPtr myNetwork = armnn::INetwork::Create(); - - armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled); - - armnn::IConnectableLayer *convolution_armnn; - armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape()); - armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape()); - - if (weightShape[2] == 1 && weightShape[3] == 1) { - Status s = onnxruntime::Conv::Compute(context); - return s; - } - - if (conv_attrs_.group > 1) { - - if (conv_attrs_.group == inputShape[1]) { - // depthwise convolution - armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor); - - weightShape[1] = weightShape[0]; - weightShape[0] = 1; - armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32); - armnn::ConstTensor weights(weightsInfo, k_data); - - if (biasEnabled) { - armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32); - armnn::ConstTensor bias(biasDesc, b_data); - convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor, - weights, - armnn::Optional(bias), - "depthwise_convolution_armnn"); - } else { - convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor, - weights, - armnn::EmptyOptional(), - "depthwise_convolution_armnn"); - } - } else { - // NCHWc convolution - Status s = onnxruntime::Conv::Compute(context); - return s; - } - } else { - // normal convolution + weightShape[1] = weightShape[0]; + weightShape[0] = 1; armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32); armnn::ConstTensor weights(weightsInfo, k_data); if (biasEnabled) { armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32); armnn::ConstTensor bias(biasDesc, b_data); - convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor, - weights, - armnn::Optional(bias), - "convolution_armnn"); + convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor, + weights, + armnn::Optional(bias), + "depthwise_convolution_armnn"); } else { - convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor, - weights, - armnn::EmptyOptional(), - "convolution_armnn"); + convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor, + weights, + armnn::EmptyOptional(), + "depthwise_convolution_armnn"); } + } else { + // NCHWc convolution + Status s = onnxruntime::Conv::Compute(context); + return s; } - - bool armnn_activ_enabled = false; - armnn::ActivationDescriptor desc; - desc.m_A = conv_attrs_.alpha; - - if (activation_type == "Relu") { - desc.m_Function = armnn::ActivationFunction::ReLu; - armnn_activ_enabled = true; - } else if (activation_type == "LeakyRelu") { - desc.m_Function = armnn::ActivationFunction::LeakyReLu; - armnn_activ_enabled = true; - } else if (activation_type == "Tanh") { - desc.m_Function = armnn::ActivationFunction::TanH; - armnn_activ_enabled = true; - } else if (activation_type == "Sigmoid") { - desc.m_Function = armnn::ActivationFunction::Sigmoid; - armnn_activ_enabled = true; - } else if (!activation_type.empty()) { - ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type); - } - - armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn"); - - armnn::IConnectableLayer *InputLayer = myNetwork->AddInputLayer(0); - armnn::IConnectableLayer *OutputLayer = myNetwork->AddOutputLayer(0); - - InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0)); - if (armnn_activ_enabled) { - convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); - } - else { - convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); - } - - //Set the tensors in the network. - armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32); - InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); - - armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32); - convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - - if (armnn_activ_enabled) { - activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - } - - // Optimise ArmNN network - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec()); - - if (optNet == nullptr) { - return onnxruntime::Conv::Compute(context); - } - - // Load graph into runtime - Conv::run->LoadNetwork(networkId, std::move(optNet)); - - std::pair ret; - ret = Conv::convLayers.insert(std::pair((OpKernel*)this, networkId)); - pNetworkId = &ret.first->second; - } else { - pNetworkId = &it->second; + // normal convolution + armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32); + armnn::ConstTensor weights(weightsInfo, k_data); + + if (biasEnabled) { + armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32); + armnn::ConstTensor bias(biasDesc, b_data); + convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor, + weights, + armnn::Optional(bias), + "convolution_armnn"); + } else { + convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor, + weights, + armnn::EmptyOptional(), + "convolution_armnn"); + } } - armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0), - x_data)}}; - armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0), - y_data)}}; + bool armnn_activ_enabled = false; + armnn::ActivationDescriptor desc; + desc.m_A = conv_attrs_.alpha; - // Execute network - Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors); + if (activation_type == "Relu") { + desc.m_Function = armnn::ActivationFunction::ReLu; + armnn_activ_enabled = true; + } else if (activation_type == "LeakyRelu") { + desc.m_Function = armnn::ActivationFunction::LeakyReLu; + armnn_activ_enabled = true; + } else if (activation_type == "Tanh") { + desc.m_Function = armnn::ActivationFunction::TanH; + armnn_activ_enabled = true; + } else if (activation_type == "Sigmoid") { + desc.m_Function = armnn::ActivationFunction::Sigmoid; + armnn_activ_enabled = true; + } else if (!activation_type.empty()) { + ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type); + } - return Status::OK(); + armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn"); + + armnn::IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0); + armnn::IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0); + + InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0)); + if (armnn_activ_enabled) { + convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); + } else { + convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); + } + + //Set the tensors in the network. + armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32); + InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32); + convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + if (armnn_activ_enabled) { + activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + } + + // Optimise ArmNN network + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec()); + + if (optNet == nullptr) { + return onnxruntime::Conv::Compute(context); + } + + // Load graph into runtime + Conv::run->LoadNetwork(networkId, std::move(optNet)); + + std::pair ret; + ret = Conv::convLayers.insert(std::pair((OpKernel*)this, networkId)); + pNetworkId = &ret.first->second; + + } else { + pNetworkId = &it->second; + } + + armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0), + x_data)}}; + armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0), + y_data)}}; + + // Execute network + Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors); + + return Status::OK(); } ONNX_OPERATOR_VERSIONED_KERNEL_EX( @@ -296,5 +291,5 @@ ONNX_OPERATOR_KERNEL_EX( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Conv); -} // namespace armnn +} // namespace armnn_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/common.h b/onnxruntime/core/providers/common.h index d192b01cf8..51903ca01f 100644 --- a/onnxruntime/core/providers/common.h +++ b/onnxruntime/core/providers/common.h @@ -70,11 +70,11 @@ inline AutoPadType StringToAutoPadType(const std::string& str) { // helper function -template -Status ComputePad(const int64_t in_dim, - const int64_t stride, const int64_t kernel, const int64_t dilation, - AutoPadType pad_type, - int64_t& pad_head, int64_t& pad_tail) { +inline Status ComputePad(const int64_t in_dim, + const int64_t stride, const int64_t kernel, const int64_t dilation, + AutoPadType pad_type, + int64_t& pad_head, int64_t& pad_tail, + bool force_symmetric_auto_padding = false) { switch (pad_type) { case AutoPadType::NOTSET: break; @@ -91,7 +91,7 @@ Status ComputePad(const int64_t in_dim, int64_t legacy_target_size = (in_dim + stride - 1) / stride; int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_dim; // make sure padding is symmetric - if (ForceSymmetricAutoPadding) { + if (force_symmetric_auto_padding) { // Inlining math::roundUpPow2() from util/math.h to avoid bringing in the transitive dependencies. pad_needed = (pad_needed + 1) & ~1; } @@ -117,14 +117,14 @@ inline int64_t ComputeOutputShape(const int64_t in_dim, return static_cast(static_cast(in_dim + pad_head + pad_tail - dkernel) / stride + 1); } -template -Status ComputePadAndOutputShape(const int64_t in_dim, - const int64_t stride, const int64_t kernel, const int64_t dilation, - AutoPadType pad_type, - int64_t& pad_head, int64_t& pad_tail, - int64_t& out_dim) { +inline Status ComputePadAndOutputShape(const int64_t in_dim, + const int64_t stride, const int64_t kernel, const int64_t dilation, + AutoPadType pad_type, + int64_t& pad_head, int64_t& pad_tail, + int64_t& out_dim, + bool force_symmetric_auto_padding = false) { ORT_RETURN_IF_ERROR( - ComputePad(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail)); + ComputePad(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail, force_symmetric_auto_padding)); out_dim = ComputeOutputShape(in_dim, stride, kernel, dilation, pad_head, pad_tail); return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/nn/conv.cc b/onnxruntime/core/providers/cpu/nn/conv.cc index 08645b55cb..4581d1f83a 100644 --- a/onnxruntime/core/providers/cpu/nn/conv.cc +++ b/onnxruntime/core/providers/cpu/nn/conv.cc @@ -50,7 +50,7 @@ Status Conv::Compute(OpKernelContext* context) const { std::vector Y_dims({N, M}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); Tensor* Y = context->Output(0, Y_dims); TensorShape output_shape = Y->Shape().Slice(2); @@ -188,7 +188,7 @@ Status Conv::Compute(OpKernelContext* context) const { std::vector Y_dims({N, M}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); Tensor* Y = context->Output(0, TensorShape(Y_dims)); TensorShape output_shape = Y->Shape().Slice(2); diff --git a/onnxruntime/core/providers/cpu/nn/conv_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_attributes.h index 2ed503deca..24d19f8cd2 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/conv_attributes.h @@ -104,34 +104,34 @@ struct ConvAttributes { return Status::OK(); } - template Status InferOutputShape(const TensorShape& input_shape, const std::vector& kernel_shape, const std::vector& strides_p, const std::vector& dilations_p, - std::vector* pads_p, - std::vector* output_shape) const { + std::vector& pads_p, + std::vector& output_shape, + bool force_symmetric_auto_padding = false) const { size_t rank = input_shape.NumDimensions(); for (size_t dim = 0; dim < rank; ++dim) { if (dim >= strides_p.size() || dim >= kernel_shape.size() || - dim >= dilations_p.size() || dim >= pads_p->size() || - rank + dim >= pads_p->size()) { + dim >= dilations_p.size() || dim >= pads_p.size() || + rank + dim >= pads_p.size()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Out of bound access to array"); } int64_t dim_size = 0; - ORT_RETURN_IF_ERROR(ComputePadAndOutputShape( - input_shape[dim], - strides_p[dim], - kernel_shape[dim], - dilations_p[dim], - auto_pad, - pads_p->at(dim), - pads_p->at(input_shape.NumDimensions() + dim), - dim_size)); + ORT_RETURN_IF_ERROR(ComputePadAndOutputShape(input_shape[dim], + strides_p[dim], + kernel_shape[dim], + dilations_p[dim], + auto_pad, + pads_p.at(dim), + pads_p.at(input_shape.NumDimensions() + dim), + dim_size, + force_symmetric_auto_padding)); if (dim_size <= 0) { return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid input shape: " + input_shape.ToString()); } - output_shape->push_back(dim_size); + output_shape.push_back(dim_size); } return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/nn/conv_integer.cc b/onnxruntime/core/providers/cpu/nn/conv_integer.cc index bc7e68aa05..10ef6a3133 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_integer.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_integer.cc @@ -71,7 +71,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const { std::vector Y_dims({N, M}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); Tensor* Y = context->Output(0, TensorShape(Y_dims)); TensorShape output_shape = Y->Shape().Slice(2); diff --git a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc index 7098a8748e..98799c5833 100644 --- a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc +++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc @@ -97,7 +97,7 @@ Status QLinearConv::Compute(OpKernelContext* context) const { std::vector Y_dims({N, M}); TensorShape input_shape = X->Shape().Slice(2); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims)); Tensor* Y = context->Output(0, TensorShape(Y_dims)); TensorShape output_shape = Y->Shape().Slice(2); diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc index 0cccf3b41d..c29b39a259 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.cc +++ b/onnxruntime/core/providers/cuda/nn/conv.cc @@ -89,8 +89,8 @@ Status Conv::ComputeInternal(OpKernelContext* context) const { std::vector y_dims; y_dims.insert(y_dims.begin(), {N, M}); - ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(x_shape.Slice(2), kernel_shape, - strides, dilations, &pads, &y_dims)); + ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(x_shape.Slice(2), kernel_shape, + strides, dilations, pads, y_dims, true)); s_.y_dims = y_dims; Tensor* Y = context->Output(0, TensorShape(s_.y_dims)); y_data = reinterpret_cast(Y->template MutableData()); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 695537efdd..dcb87c1f75 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -3,6 +3,7 @@ #include #include +#include #include #include "helper.h" @@ -258,6 +259,67 @@ uint32_t AddInitializerTransposed(ModelBuilder& model_builder, return operand_idx; } +static vector ComputeConvPads( + const Shape& input_dimen, + const uint32_t weight_size_y, const uint32_t weight_size_x, + const std::vector& onnx_pads, const std::vector& onnx_strides, const std::vector& onnx_dilations, + AutoPadType auto_pad_type, bool nchw) { + const int32_t input_size_y = nchw ? input_dimen[2] : input_dimen[1]; + const int32_t input_size_x = nchw ? input_dimen[3] : input_dimen[2]; + const int32_t stride_y = onnx_strides[0]; + const int32_t stride_x = onnx_strides[1]; + const int32_t dilation_y = onnx_dilations[0]; + const int32_t dilation_x = onnx_dilations[1]; + + int64_t padding_top = onnx_pads[0]; + int64_t padding_bottom = onnx_pads[2]; + int64_t padding_left = onnx_pads[1]; + int64_t padding_right = onnx_pads[3]; + + ORT_THROW_IF_ERROR(ComputePad(input_size_y, + stride_y, weight_size_y, dilation_y, + auto_pad_type, + padding_top, padding_bottom)); + ORT_THROW_IF_ERROR(ComputePad(input_size_x, + stride_x, weight_size_x, dilation_x, + auto_pad_type, + padding_left, padding_right)); + + return {static_cast(padding_top), static_cast(padding_left), + static_cast(padding_bottom), static_cast(padding_right)}; +} + +static void HandleAutoPad(const Shape& input_shape, + const uint32_t weight_size_y, + const uint32_t weight_size_x, + const vector& onnx_strides, + const vector& onnx_dilations, + AutoPadType auto_pad_type, + bool use_nchw, + vector& onnx_pads, + int32_t& nnapi_padding_code, + bool& use_auto_pad) { + if (auto_pad_type != AutoPadType::NOTSET) { + onnx_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x, + onnx_pads, onnx_strides, onnx_dilations, + auto_pad_type, use_nchw); + + if (AutoPadType::VALID == auto_pad_type || AutoPadType::SAME_UPPER == auto_pad_type) { + use_auto_pad = true; + nnapi_padding_code = (AutoPadType::VALID == auto_pad_type) ? ANEURALNETWORKS_PADDING_VALID + : ANEURALNETWORKS_PADDING_SAME; + } + } else { + const auto same_upper_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x, + onnx_pads, onnx_strides, onnx_dilations, + AutoPadType::SAME_UPPER, use_nchw); + if (onnx_pads == same_upper_pads) { + use_auto_pad = true; + nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME; + } + } +} + #pragma endregion helpers #pragma region op_base @@ -765,11 +827,6 @@ bool PoolOpBuilder::IsOpSupportedImpl(ModelBuilder& /* model_builder */, const N return false; } - if (helper.Get("auto_pad", "NOTSET") != "NOTSET") { - LOGS_DEFAULT(VERBOSE) << "auto_pad is not supported"; - return false; - } - if (helper.Get("kernel_shape", std::vector{1, 1}).size() != 2) { LOGS_DEFAULT(VERBOSE) << "Only pooling 2d is supported"; return false; @@ -841,33 +898,54 @@ void PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod op_type = ANEURALNETWORKS_MAX_POOL_2D; vector onnx_pads, onnx_strides, kernel_shape; + bool use_auto_pad = false; + int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID; + const auto& input_shape = shaper[input]; if (op == "AveragePool" || op == "MaxPool") { + const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET")); kernel_shape = helper.Get("kernel_shape", vector{0, 0}); onnx_strides = helper.Get("strides", vector{1, 1}); onnx_pads = helper.Get("pads", vector{0, 0, 0, 0}); + const auto weight_size_y = static_cast(kernel_shape[0]); + const auto weight_size_x = static_cast(kernel_shape[1]); + HandleAutoPad(input_shape, weight_size_y, weight_size_x, + onnx_strides, {1, 1} /* onnx_dilations */, + auto_pad_type, use_nchw, + onnx_pads, nnapi_padding_code, use_auto_pad); } else { // (op == "GlobalAveragePool" || op == "GlobalMaxPool") + use_auto_pad = true; + nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID; onnx_strides = vector{1, 1}; onnx_pads = vector{0, 0, 0, 0}; - if (model_builder.UseNCHW()) - kernel_shape = vector{static_cast(shaper[input][2]), - static_cast(shaper[input][3])}; - else - kernel_shape = vector{static_cast(shaper[input][1]), - static_cast(shaper[input][2])}; + if (use_nchw) { + kernel_shape = vector{static_cast(input_shape[2]), + static_cast(input_shape[3])}; + } else { + kernel_shape = vector{static_cast(input_shape[1]), + static_cast(input_shape[2])}; + } } int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); std::vector input_indices; input_indices.push_back(operand_indices.at(input)); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2])); + + if (use_auto_pad) { + input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code)); + } else { + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2])); + } + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1])); input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0])); input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[1])); input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[0])); input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code)); + + // TODO support API 28 input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw)); shaper.Pool(input, @@ -899,10 +977,6 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod bool ConvOpBuilder::IsOpSupportedImpl(ModelBuilder& model_builder, const Node& node) { NodeAttrHelper helper(node); - if (helper.Get("auto_pad", "NOTSET") != "NOTSET") { - LOGS_DEFAULT(VERBOSE) << "SAME_LOWER auto_pad is not supported"; - return false; - } const auto group = helper.Get("group", 1); const auto weight_name = node.InputDefs()[1]->Name(); @@ -937,7 +1011,7 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod // onnx pads are in the order top, left, bottom, right // while nnapi pads is in the order left, right, top, bottom - const auto onnx_pads = helper.Get("pads", vector{0, 0, 0, 0}); + auto onnx_pads = helper.Get("pads", vector{0, 0, 0, 0}); // onnx dilations is in the order height, width // while nnapi dilations are in the order width, height @@ -968,15 +1042,11 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod const auto& weight_tensor = initializers.at(weight); bool depthwise_conv2d = (weight_tensor.dims()[1] == 1); - std::vector input_indices; - input_indices.push_back(operand_indices.at(input)); - + // Pre-process weights if (conv2d) { - input_indices.push_back(AddInitializerInNewLayout( - model_builder, weight, L_0231)); + AddInitializerInNewLayout(model_builder, weight, L_0231); } else { // depthwise_conv2d - input_indices.push_back(AddInitializerInNewLayout( - model_builder, weight, L_1230)); + AddInitializerInNewLayout(model_builder, weight, L_1230); } bool hasBias = (node.InputDefs().size() >= 3); @@ -1007,23 +1077,50 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod } } + const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET")); + bool use_auto_pad = false; + int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME; + const auto& input_shape = shaper[input]; + const auto& kernel_shape = shaper[weight]; + const auto weight_size_y = kernel_shape[1]; + const auto weight_size_x = kernel_shape[2]; + HandleAutoPad(input_shape, weight_size_y, weight_size_x, + onnx_strides, onnx_dilations, + auto_pad_type, use_nchw, + onnx_pads, nnapi_padding_code, use_auto_pad); + + std::vector input_indices; + input_indices.push_back(operand_indices.at(input)); + input_indices.push_back(operand_indices.at(weight)); input_indices.push_back(bias_idx_val); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2])); + + if (use_auto_pad) { + input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code)); + } else { + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2])); + } + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1])); input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0])); + if (!conv2d && depthwise_conv2d) { int32_t depthwiseMultiplier = shaper[weight][3] / group; input_indices.push_back(model_builder.AddOperandFromScalar(depthwiseMultiplier)); } + int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code)); + // TODO support API 28 input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw)); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1])); - input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0])); + + if (onnx_dilations[1] != 1 || onnx_dilations[0] != 1) { + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1])); + input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0])); + } int32_t operationCode; if (conv2d) { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc index 0400c2e8d8..b716fa93b0 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc @@ -4,6 +4,9 @@ #include "helper.h" #include "shaper.h" +namespace onnxruntime { +namespace nnapi { + using std::string; using std::vector; @@ -45,8 +48,8 @@ void Shaper::Conv(const std::string& input_name, const vector& onnx_dilations, bool nchw, const std::string& output_name) { - Shape input_dimen = shape_map_.at(input_name); - Shape weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input + const Shape& input_dimen = shape_map_.at(input_name); + const Shape& weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1]; const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2]; @@ -88,8 +91,8 @@ void Shaper::DepthwiseConv(const std::string& input_name, const std::vector& onnx_dilations, bool nchw, const std::string& output_name) { - Shape input_dimen = shape_map_.at(input_name); - Shape weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output + const Shape& input_dimen = shape_map_.at(input_name); + const Shape& weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1]; const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2]; @@ -130,11 +133,11 @@ void Shaper::Pool(const std::string& input_name, const std::vector& kernel_shape, bool nchw, const std::string& output_name) { - auto input_dimen = shape_map_.at(input_name); + const Shape& input_dimen = shape_map_.at(input_name); const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1]; const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2]; - int32_t weight_size_y = kernel_shape[0]; - int32_t weight_size_x = kernel_shape[1]; + const auto weight_size_y = kernel_shape[0]; + const auto weight_size_x = kernel_shape[1]; uint32_t output_size_y, output_size_x; std::tie(output_size_y, output_size_x) = @@ -167,7 +170,7 @@ void Shaper::Pool(const std::string& input_name, void Shaper::Reshape(const std::string& input_name, const std::vector& shape, const std::string& output_name) { - auto input_dimen = shape_map_.at(input_name); + const Shape& input_dimen = shape_map_.at(input_name); int64_t input_size = Product(input_dimen); std::vector output_dimen(shape.size()); @@ -209,7 +212,7 @@ void Shaper::Reshape(const std::string& input_name, void Shaper::Transpose(const std::string& input_name, const std::vector& perm, const std::string& output_name) { - auto input_dimen = shape_map_.at(input_name); + const Shape& input_dimen = shape_map_.at(input_name); ORT_ENFORCE(perm.size() == input_dimen.size(), "Invalid perm is given!"); @@ -231,8 +234,8 @@ void Shaper::Transpose(const std::string& input_name, void Shaper::Eltwise(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) { - auto& shape1 = shape_map_.at(input1_name); - auto& shape2 = shape_map_.at(input2_name); + const Shape& shape1 = shape_map_.at(input1_name); + const Shape& shape2 = shape_map_.at(input2_name); // broadcasting support bool shape1IsBigger = shape1.size() >= shape2.size(); @@ -283,8 +286,8 @@ void Shaper::Identity(const std::string& input_name, void Shaper::FC(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) { // Currently we only support A*B'+C - auto input1_dimen = shape_map_.at(input1_name); - Shape input2_dimen = shape_map_.at(input2_name); // num_units, input_size + const Shape& input1_dimen = shape_map_.at(input1_name); + const Shape& input2_dimen = shape_map_.at(input2_name); // num_units, input_size Shape output_dimen{input1_dimen[0], input2_dimen[0]}; shape_map_[output_name] = output_dimen; @@ -301,7 +304,7 @@ void Shaper::Concat(const std::vector& input_names, const std::string& output_name) { std::vector dimens; for (const auto& input_name : input_names) { - auto& dimen = shape_map_.at(input_name); + const Shape& dimen = shape_map_.at(input_name); if (!dimens.empty()) { for (size_t i = 0; i < dimens[0].size(); i++) { if ((int32_t)i == axis) @@ -332,7 +335,7 @@ void Shaper::Concat(const std::vector& input_names, void Shaper::Squeeze(const std::string& input_name, const std::vector& axes, const std::string& output_name) { - std::vector input_dimen = shape_map_.at(input_name); + const Shape& input_dimen = shape_map_.at(input_name); int32_t input_size = input_dimen.size(); size_t axes_size = axes.size(); std::unordered_set axes_to_be_squeezed; @@ -372,7 +375,7 @@ void Shaper::UpdateShape(const std::string& name, const Shape& new_shape) { ORT_ENFORCE(shaper_finalized_, "Cannot UpdateShape while shaper is not finalized"); - const auto& old_shape = shape_map_.at(name); + const Shape& old_shape = shape_map_.at(name); if (old_shape != new_shape) { if (Product(old_shape) != 0) ORT_THROW("The shape should be same size or old shape has size 0 (dynamic shape)"); @@ -404,3 +407,6 @@ std::string Shape2String(const Shaper::Shape& shape) { os << "]"; return os.str(); } + +} // namespace nnapi +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h index 634f02d1a4..b18d1f4318 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h @@ -4,6 +4,9 @@ #include #include +namespace onnxruntime { +namespace nnapi { + class Shaper { public: using Shape = std::vector; @@ -69,3 +72,6 @@ class Shaper { }; std::string Shape2String(const Shaper::Shape& shape); + +} // namespace nnapi +} // namespace onnxruntime \ No newline at end of file