mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-04 23:59:56 +00:00
Nnapi, add auto_pad support for Conv/GlobalAveragePool/AveragePool/GlobalMaxPool/MaxPool operators (#4499)
* Split ComputePadAndOutputShape into ComputePad and ComputeOutputShape * update NNAPI conv ouput shape compute to use shared ComputeOutputShapec * move use ptr to use reference for ComputePadAndOutputShape * nnapi conv support auto_pad * add logging operator support bt target devices * update InferOutputShape/ComputePadAndOutputShape/ComputePad to use force_symmetric_auto_padding as param instead of template * make log op support for target devices optional * add auto_pad support to pool operators * ignore GetTargetDevices if using all devices * fix some typo in padding calculation * fix a bug of compute padding difference between conv and pool ops * addressed CR comments, removed NNAPI device logging and move nnapi ep autopad handling into a shared function * change helper functions to static
This commit is contained in:
parent
34f73fa1aa
commit
cf92497c16
12 changed files with 419 additions and 317 deletions
|
|
@ -152,7 +152,7 @@ Status NchwcConv::Compute(OpKernelContext* context) const {
|
|||
std::vector<int64_t> Y_dims;
|
||||
Y_dims.insert(Y_dims.begin(), {X_shape[0], W_shape[0]});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
auto* Y = context->Output(0, Y_dims);
|
||||
auto* y_data = Y->template MutableData<float>();
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
|
||||
if (it != Conv::convLayers.end()) {
|
||||
pConv = &it->second;
|
||||
if(pConv->isDepthwiseCPU == true) {
|
||||
if (pConv->isDepthwiseCPU == true) {
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
|
|
@ -103,7 +103,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
std::vector<int64_t> Y_dims;
|
||||
Y_dims.insert(Y_dims.begin(), {N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
LOGS_DEFAULT(VERBOSE) << "Y " << Y->Shape().ToString().c_str() << std::endl;
|
||||
|
||||
|
|
@ -127,7 +127,6 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
}
|
||||
|
||||
if (it == Conv::convLayers.end()) {
|
||||
|
||||
auto mm_layer = ACLCreateMemoryManager();
|
||||
|
||||
ACLNEConv tconv;
|
||||
|
|
@ -192,21 +191,21 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
// in the configure function for NEDepthwiseConvolutionLayer3x3, there is a separation based on the optimization
|
||||
#ifdef ACL_1902
|
||||
bool optimizable =
|
||||
arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(),
|
||||
aclPadStride,
|
||||
tconv.in->info()->data_type(),
|
||||
1 /* depth multiplier */,
|
||||
tconv.in->info()->data_layout());
|
||||
arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(),
|
||||
aclPadStride,
|
||||
tconv.in->info()->data_type(),
|
||||
1 /* depth multiplier */,
|
||||
tconv.in->info()->data_layout());
|
||||
#endif
|
||||
#if defined(ACL_1905) || defined(ACL_1908)
|
||||
bool optimizable =
|
||||
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
|
||||
tconv.k->info(),
|
||||
aclPadStride,
|
||||
1 /* depth multiplier */,
|
||||
arm_compute::Size2D(aclDilation0, dilations[0]));
|
||||
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
|
||||
tconv.k->info(),
|
||||
aclPadStride,
|
||||
1 /* depth multiplier */,
|
||||
arm_compute::Size2D(aclDilation0, dilations[0]));
|
||||
#endif
|
||||
if(optimizable) {
|
||||
if (optimizable) {
|
||||
//optimized depthwise convolution
|
||||
#if defined(ACL_1902) || defined(ACL_1905)
|
||||
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer3x3>();
|
||||
|
|
@ -234,9 +233,9 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
ret = Conv::convLayers.insert(std::pair<OpKernel*, ACLNEConv>((OpKernel*)this, tconv));
|
||||
return s;
|
||||
}
|
||||
#endif //DEPTHWISE_CPU
|
||||
#endif //DEPTHWISE_CPU
|
||||
} else {
|
||||
if(tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) {
|
||||
if (tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) {
|
||||
//pointwise convolution
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
|
|
@ -291,7 +290,6 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
pConv->b->allocator()->free();
|
||||
pConv->out->allocator()->free();
|
||||
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -26,258 +26,253 @@ thread_local std::map<OpKernel*, armnn::NetworkId> Conv<T>::convLayers;
|
|||
template <typename T>
|
||||
armnn::IRuntimePtr Conv<T>::run = Conv<T>::initRuntime();
|
||||
|
||||
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled){
|
||||
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled) {
|
||||
std::vector<int64_t> armnnStrides(2);
|
||||
armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1;
|
||||
armnnStrides[1] = strides[0];
|
||||
|
||||
std::vector<int64_t> armnnStrides(2);
|
||||
armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1;
|
||||
armnnStrides[1] = strides[0];
|
||||
std::vector<int64_t> armnnDilations(2);
|
||||
armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1;
|
||||
armnnDilations[1] = dilations[0];
|
||||
|
||||
std::vector<int64_t> armnnDilations(2);
|
||||
armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1;
|
||||
armnnDilations[1] = dilations[0];
|
||||
|
||||
std::vector<int64_t> armnnPads(4);
|
||||
if (pads.size() == 2) {
|
||||
if (strides.size() == 1) {
|
||||
armnnPads[0] = 0;
|
||||
armnnPads[1] = 0;
|
||||
armnnPads[2] = pads[1];
|
||||
armnnPads[3] = pads[0];
|
||||
} else {
|
||||
armnnPads[0] = pads[1];
|
||||
armnnPads[1] = pads[0];
|
||||
armnnPads[2] = pads[1];
|
||||
armnnPads[3] = pads[0];
|
||||
}
|
||||
std::vector<int64_t> armnnPads(4);
|
||||
if (pads.size() == 2) {
|
||||
if (strides.size() == 1) {
|
||||
armnnPads[0] = 0;
|
||||
armnnPads[1] = 0;
|
||||
armnnPads[2] = pads[1];
|
||||
armnnPads[3] = pads[0];
|
||||
} else {
|
||||
armnnPads[0] = pads[1];
|
||||
armnnPads[1] = pads[3];
|
||||
armnnPads[2] = pads[0];
|
||||
armnnPads[3] = pads[2];
|
||||
armnnPads[1] = pads[0];
|
||||
armnnPads[2] = pads[1];
|
||||
armnnPads[3] = pads[0];
|
||||
}
|
||||
} else {
|
||||
armnnPads[0] = pads[1];
|
||||
armnnPads[1] = pads[3];
|
||||
armnnPads[2] = pads[0];
|
||||
armnnPads[3] = pads[2];
|
||||
}
|
||||
|
||||
armnn::Convolution2dDescriptor convolutionDescriptor;
|
||||
convolutionDescriptor.m_PadLeft = armnnPads[0];
|
||||
convolutionDescriptor.m_PadRight = armnnPads[1];
|
||||
convolutionDescriptor.m_PadTop = armnnPads[2];
|
||||
convolutionDescriptor.m_PadBottom = armnnPads[3];
|
||||
convolutionDescriptor.m_StrideX = armnnStrides[0];
|
||||
convolutionDescriptor.m_StrideY = armnnStrides[1];
|
||||
convolutionDescriptor.m_DilationX = armnnDilations[0];
|
||||
convolutionDescriptor.m_DilationY = armnnDilations[1];
|
||||
convolutionDescriptor.m_BiasEnabled = biasEnabled;
|
||||
convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW;
|
||||
armnn::Convolution2dDescriptor convolutionDescriptor;
|
||||
convolutionDescriptor.m_PadLeft = armnnPads[0];
|
||||
convolutionDescriptor.m_PadRight = armnnPads[1];
|
||||
convolutionDescriptor.m_PadTop = armnnPads[2];
|
||||
convolutionDescriptor.m_PadBottom = armnnPads[3];
|
||||
convolutionDescriptor.m_StrideX = armnnStrides[0];
|
||||
convolutionDescriptor.m_StrideY = armnnStrides[1];
|
||||
convolutionDescriptor.m_DilationX = armnnDilations[0];
|
||||
convolutionDescriptor.m_DilationY = armnnDilations[1];
|
||||
convolutionDescriptor.m_BiasEnabled = biasEnabled;
|
||||
convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW;
|
||||
|
||||
return convolutionDescriptor;
|
||||
return convolutionDescriptor;
|
||||
}
|
||||
|
||||
armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor){
|
||||
armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor) {
|
||||
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor;
|
||||
depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft;
|
||||
depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight;
|
||||
depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop;
|
||||
depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom;
|
||||
depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX;
|
||||
depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY;
|
||||
depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX;
|
||||
depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY;
|
||||
depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled;
|
||||
depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout;
|
||||
|
||||
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor;
|
||||
depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft;
|
||||
depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight;
|
||||
depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop;
|
||||
depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom;
|
||||
depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX;
|
||||
depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY;
|
||||
depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX;
|
||||
depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY;
|
||||
depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled;
|
||||
depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout;
|
||||
|
||||
return depthwiseDescriptor;
|
||||
return depthwiseDescriptor;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status Conv<T>::Compute(OpKernelContext* context) const {
|
||||
size_t num_inputs = OpKernel::Node().InputDefs().size();
|
||||
const Tensor* X = context->Input<Tensor>(0);
|
||||
const Tensor* W = context->Input<Tensor>(1);
|
||||
const Tensor* B = num_inputs == 3 ? context->Input<Tensor>(2) : nullptr;
|
||||
const Tensor* W = context->Input<Tensor>(1);
|
||||
const Tensor* B = num_inputs == 3 ? context->Input<Tensor>(2) : nullptr;
|
||||
|
||||
const int64_t N = X->Shape()[0];
|
||||
const int64_t M = W->Shape()[0];
|
||||
const int64_t N = X->Shape()[0];
|
||||
const int64_t M = W->Shape()[0];
|
||||
|
||||
if (X->Shape().NumDimensions() != PREF_DIM) {
|
||||
if (X->Shape().NumDimensions() != PREF_DIM) {
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
|
||||
|
||||
std::vector<int64_t> kernel_shape;
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
|
||||
|
||||
std::vector<int64_t> pads(conv_attrs_.pads);
|
||||
if (pads.empty()) {
|
||||
pads.resize(kernel_shape.size() * 2, 0);
|
||||
}
|
||||
std::vector<int64_t> dilations(conv_attrs_.dilations);
|
||||
if (dilations.empty()) {
|
||||
dilations.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
std::vector<int64_t> strides(conv_attrs_.strides);
|
||||
if (strides.empty()) {
|
||||
strides.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
|
||||
std::vector<int64_t> Y_dims;
|
||||
Y_dims.insert(Y_dims.begin(), {N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
|
||||
bool biasEnabled = B != nullptr;
|
||||
|
||||
const T* x_data = X->template Data<T>();
|
||||
const T* k_data = W->template Data<T>();
|
||||
|
||||
const T* b_data;
|
||||
if (biasEnabled) {
|
||||
b_data = B->template Data<T>();
|
||||
}
|
||||
|
||||
T* y_data = Y->template MutableData<T>();
|
||||
|
||||
armnn::NetworkId* pNetworkId;
|
||||
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
|
||||
if (it == Conv::convLayers.end()) {
|
||||
armnn::NetworkId networkId;
|
||||
armnn::INetworkPtr myNetwork = armnn::INetwork::Create();
|
||||
|
||||
armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled);
|
||||
|
||||
armnn::IConnectableLayer* convolution_armnn;
|
||||
armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape());
|
||||
armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape());
|
||||
|
||||
if (weightShape[2] == 1 && weightShape[3] == 1) {
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
|
||||
if (conv_attrs_.group > 1) {
|
||||
if (conv_attrs_.group == inputShape[1]) {
|
||||
// depthwise convolution
|
||||
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor);
|
||||
|
||||
std::vector<int64_t> kernel_shape;
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
|
||||
|
||||
std::vector<int64_t> pads(conv_attrs_.pads);
|
||||
if (pads.empty()) {
|
||||
pads.resize(kernel_shape.size() * 2, 0);
|
||||
}
|
||||
std::vector<int64_t> dilations(conv_attrs_.dilations);
|
||||
if (dilations.empty()) {
|
||||
dilations.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
std::vector<int64_t> strides(conv_attrs_.strides);
|
||||
if (strides.empty()) {
|
||||
strides.resize(kernel_shape.size(), 1);
|
||||
}
|
||||
|
||||
std::vector<int64_t> Y_dims;
|
||||
Y_dims.insert(Y_dims.begin(), {N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
|
||||
bool biasEnabled = B != nullptr;
|
||||
|
||||
const T* x_data = X->template Data<T>();
|
||||
const T* k_data = W->template Data<T>();
|
||||
|
||||
const T* b_data;
|
||||
if (biasEnabled) {
|
||||
b_data = B->template Data<T>();
|
||||
}
|
||||
|
||||
T* y_data = Y->template MutableData<T>();
|
||||
|
||||
armnn::NetworkId* pNetworkId;
|
||||
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
|
||||
if (it == Conv::convLayers.end()) {
|
||||
|
||||
armnn::NetworkId networkId;
|
||||
armnn::INetworkPtr myNetwork = armnn::INetwork::Create();
|
||||
|
||||
armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled);
|
||||
|
||||
armnn::IConnectableLayer *convolution_armnn;
|
||||
armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape());
|
||||
armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape());
|
||||
|
||||
if (weightShape[2] == 1 && weightShape[3] == 1) {
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
|
||||
if (conv_attrs_.group > 1) {
|
||||
|
||||
if (conv_attrs_.group == inputShape[1]) {
|
||||
// depthwise convolution
|
||||
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor);
|
||||
|
||||
weightShape[1] = weightShape[0];
|
||||
weightShape[0] = 1;
|
||||
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
|
||||
armnn::ConstTensor weights(weightsInfo, k_data);
|
||||
|
||||
if (biasEnabled) {
|
||||
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
|
||||
armnn::ConstTensor bias(biasDesc, b_data);
|
||||
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
|
||||
weights,
|
||||
armnn::Optional<armnn::ConstTensor>(bias),
|
||||
"depthwise_convolution_armnn");
|
||||
} else {
|
||||
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
|
||||
weights,
|
||||
armnn::EmptyOptional(),
|
||||
"depthwise_convolution_armnn");
|
||||
}
|
||||
} else {
|
||||
// NCHWc convolution
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
// normal convolution
|
||||
weightShape[1] = weightShape[0];
|
||||
weightShape[0] = 1;
|
||||
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
|
||||
armnn::ConstTensor weights(weightsInfo, k_data);
|
||||
|
||||
if (biasEnabled) {
|
||||
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
|
||||
armnn::ConstTensor bias(biasDesc, b_data);
|
||||
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
|
||||
weights,
|
||||
armnn::Optional<armnn::ConstTensor>(bias),
|
||||
"convolution_armnn");
|
||||
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
|
||||
weights,
|
||||
armnn::Optional<armnn::ConstTensor>(bias),
|
||||
"depthwise_convolution_armnn");
|
||||
} else {
|
||||
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
|
||||
weights,
|
||||
armnn::EmptyOptional(),
|
||||
"convolution_armnn");
|
||||
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
|
||||
weights,
|
||||
armnn::EmptyOptional(),
|
||||
"depthwise_convolution_armnn");
|
||||
}
|
||||
} else {
|
||||
// NCHWc convolution
|
||||
Status s = onnxruntime::Conv<T>::Compute(context);
|
||||
return s;
|
||||
}
|
||||
|
||||
bool armnn_activ_enabled = false;
|
||||
armnn::ActivationDescriptor desc;
|
||||
desc.m_A = conv_attrs_.alpha;
|
||||
|
||||
if (activation_type == "Relu") {
|
||||
desc.m_Function = armnn::ActivationFunction::ReLu;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "LeakyRelu") {
|
||||
desc.m_Function = armnn::ActivationFunction::LeakyReLu;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "Tanh") {
|
||||
desc.m_Function = armnn::ActivationFunction::TanH;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "Sigmoid") {
|
||||
desc.m_Function = armnn::ActivationFunction::Sigmoid;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (!activation_type.empty()) {
|
||||
ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type);
|
||||
}
|
||||
|
||||
armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn");
|
||||
|
||||
armnn::IConnectableLayer *InputLayer = myNetwork->AddInputLayer(0);
|
||||
armnn::IConnectableLayer *OutputLayer = myNetwork->AddOutputLayer(0);
|
||||
|
||||
InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0));
|
||||
if (armnn_activ_enabled) {
|
||||
convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
|
||||
activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
|
||||
}
|
||||
else {
|
||||
convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
|
||||
}
|
||||
|
||||
//Set the tensors in the network.
|
||||
armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
|
||||
InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
|
||||
|
||||
armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32);
|
||||
convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
|
||||
|
||||
if (armnn_activ_enabled) {
|
||||
activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
|
||||
}
|
||||
|
||||
// Optimise ArmNN network
|
||||
armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec());
|
||||
|
||||
if (optNet == nullptr) {
|
||||
return onnxruntime::Conv<T>::Compute(context);
|
||||
}
|
||||
|
||||
// Load graph into runtime
|
||||
Conv::run->LoadNetwork(networkId, std::move(optNet));
|
||||
|
||||
std::pair<ConvLayersIterator, bool> ret;
|
||||
ret = Conv::convLayers.insert(std::pair<OpKernel*, armnn::NetworkId>((OpKernel*)this, networkId));
|
||||
pNetworkId = &ret.first->second;
|
||||
|
||||
} else {
|
||||
pNetworkId = &it->second;
|
||||
// normal convolution
|
||||
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
|
||||
armnn::ConstTensor weights(weightsInfo, k_data);
|
||||
|
||||
if (biasEnabled) {
|
||||
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
|
||||
armnn::ConstTensor bias(biasDesc, b_data);
|
||||
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
|
||||
weights,
|
||||
armnn::Optional<armnn::ConstTensor>(bias),
|
||||
"convolution_armnn");
|
||||
} else {
|
||||
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
|
||||
weights,
|
||||
armnn::EmptyOptional(),
|
||||
"convolution_armnn");
|
||||
}
|
||||
}
|
||||
|
||||
armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0),
|
||||
x_data)}};
|
||||
armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0),
|
||||
y_data)}};
|
||||
bool armnn_activ_enabled = false;
|
||||
armnn::ActivationDescriptor desc;
|
||||
desc.m_A = conv_attrs_.alpha;
|
||||
|
||||
// Execute network
|
||||
Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors);
|
||||
if (activation_type == "Relu") {
|
||||
desc.m_Function = armnn::ActivationFunction::ReLu;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "LeakyRelu") {
|
||||
desc.m_Function = armnn::ActivationFunction::LeakyReLu;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "Tanh") {
|
||||
desc.m_Function = armnn::ActivationFunction::TanH;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (activation_type == "Sigmoid") {
|
||||
desc.m_Function = armnn::ActivationFunction::Sigmoid;
|
||||
armnn_activ_enabled = true;
|
||||
} else if (!activation_type.empty()) {
|
||||
ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn");
|
||||
|
||||
armnn::IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
|
||||
armnn::IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
|
||||
|
||||
InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0));
|
||||
if (armnn_activ_enabled) {
|
||||
convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
|
||||
activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
|
||||
} else {
|
||||
convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
|
||||
}
|
||||
|
||||
//Set the tensors in the network.
|
||||
armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
|
||||
InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
|
||||
|
||||
armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32);
|
||||
convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
|
||||
|
||||
if (armnn_activ_enabled) {
|
||||
activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
|
||||
}
|
||||
|
||||
// Optimise ArmNN network
|
||||
armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec());
|
||||
|
||||
if (optNet == nullptr) {
|
||||
return onnxruntime::Conv<T>::Compute(context);
|
||||
}
|
||||
|
||||
// Load graph into runtime
|
||||
Conv::run->LoadNetwork(networkId, std::move(optNet));
|
||||
|
||||
std::pair<ConvLayersIterator, bool> ret;
|
||||
ret = Conv::convLayers.insert(std::pair<OpKernel*, armnn::NetworkId>((OpKernel*)this, networkId));
|
||||
pNetworkId = &ret.first->second;
|
||||
|
||||
} else {
|
||||
pNetworkId = &it->second;
|
||||
}
|
||||
|
||||
armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0),
|
||||
x_data)}};
|
||||
armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0),
|
||||
y_data)}};
|
||||
|
||||
// Execute network
|
||||
Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
||||
|
|
@ -296,5 +291,5 @@ ONNX_OPERATOR_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Conv<float>);
|
||||
|
||||
} // namespace armnn
|
||||
} // namespace armnn_ep
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -70,11 +70,11 @@ inline AutoPadType StringToAutoPadType(const std::string& str) {
|
|||
|
||||
// helper function
|
||||
|
||||
template <bool ForceSymmetricAutoPadding>
|
||||
Status ComputePad(const int64_t in_dim,
|
||||
const int64_t stride, const int64_t kernel, const int64_t dilation,
|
||||
AutoPadType pad_type,
|
||||
int64_t& pad_head, int64_t& pad_tail) {
|
||||
inline Status ComputePad(const int64_t in_dim,
|
||||
const int64_t stride, const int64_t kernel, const int64_t dilation,
|
||||
AutoPadType pad_type,
|
||||
int64_t& pad_head, int64_t& pad_tail,
|
||||
bool force_symmetric_auto_padding = false) {
|
||||
switch (pad_type) {
|
||||
case AutoPadType::NOTSET:
|
||||
break;
|
||||
|
|
@ -91,7 +91,7 @@ Status ComputePad(const int64_t in_dim,
|
|||
int64_t legacy_target_size = (in_dim + stride - 1) / stride;
|
||||
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_dim;
|
||||
// make sure padding is symmetric
|
||||
if (ForceSymmetricAutoPadding) {
|
||||
if (force_symmetric_auto_padding) {
|
||||
// Inlining math::roundUpPow2() from util/math.h to avoid bringing in the transitive dependencies.
|
||||
pad_needed = (pad_needed + 1) & ~1;
|
||||
}
|
||||
|
|
@ -117,14 +117,14 @@ inline int64_t ComputeOutputShape(const int64_t in_dim,
|
|||
return static_cast<int64_t>(static_cast<float>(in_dim + pad_head + pad_tail - dkernel) / stride + 1);
|
||||
}
|
||||
|
||||
template <bool ForceSymmetricAutoPadding>
|
||||
Status ComputePadAndOutputShape(const int64_t in_dim,
|
||||
const int64_t stride, const int64_t kernel, const int64_t dilation,
|
||||
AutoPadType pad_type,
|
||||
int64_t& pad_head, int64_t& pad_tail,
|
||||
int64_t& out_dim) {
|
||||
inline Status ComputePadAndOutputShape(const int64_t in_dim,
|
||||
const int64_t stride, const int64_t kernel, const int64_t dilation,
|
||||
AutoPadType pad_type,
|
||||
int64_t& pad_head, int64_t& pad_tail,
|
||||
int64_t& out_dim,
|
||||
bool force_symmetric_auto_padding = false) {
|
||||
ORT_RETURN_IF_ERROR(
|
||||
ComputePad<ForceSymmetricAutoPadding>(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail));
|
||||
ComputePad(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail, force_symmetric_auto_padding));
|
||||
out_dim = ComputeOutputShape(in_dim, stride, kernel, dilation, pad_head, pad_tail);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
|
|||
|
||||
std::vector<int64_t> Y_dims({N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, Y_dims);
|
||||
TensorShape output_shape = Y->Shape().Slice(2);
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ Status Conv<float>::Compute(OpKernelContext* context) const {
|
|||
|
||||
std::vector<int64_t> Y_dims({N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
TensorShape output_shape = Y->Shape().Slice(2);
|
||||
|
||||
|
|
|
|||
|
|
@ -104,34 +104,34 @@ struct ConvAttributes {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
template <bool ForceSymmetricAutoPadding = false>
|
||||
Status InferOutputShape(const TensorShape& input_shape,
|
||||
const std::vector<int64_t>& kernel_shape,
|
||||
const std::vector<int64_t>& strides_p,
|
||||
const std::vector<int64_t>& dilations_p,
|
||||
std::vector<int64_t>* pads_p,
|
||||
std::vector<int64_t>* output_shape) const {
|
||||
std::vector<int64_t>& pads_p,
|
||||
std::vector<int64_t>& output_shape,
|
||||
bool force_symmetric_auto_padding = false) const {
|
||||
size_t rank = input_shape.NumDimensions();
|
||||
for (size_t dim = 0; dim < rank; ++dim) {
|
||||
if (dim >= strides_p.size() || dim >= kernel_shape.size() ||
|
||||
dim >= dilations_p.size() || dim >= pads_p->size() ||
|
||||
rank + dim >= pads_p->size()) {
|
||||
dim >= dilations_p.size() || dim >= pads_p.size() ||
|
||||
rank + dim >= pads_p.size()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Out of bound access to array");
|
||||
}
|
||||
int64_t dim_size = 0;
|
||||
ORT_RETURN_IF_ERROR(ComputePadAndOutputShape<ForceSymmetricAutoPadding>(
|
||||
input_shape[dim],
|
||||
strides_p[dim],
|
||||
kernel_shape[dim],
|
||||
dilations_p[dim],
|
||||
auto_pad,
|
||||
pads_p->at(dim),
|
||||
pads_p->at(input_shape.NumDimensions() + dim),
|
||||
dim_size));
|
||||
ORT_RETURN_IF_ERROR(ComputePadAndOutputShape(input_shape[dim],
|
||||
strides_p[dim],
|
||||
kernel_shape[dim],
|
||||
dilations_p[dim],
|
||||
auto_pad,
|
||||
pads_p.at(dim),
|
||||
pads_p.at(input_shape.NumDimensions() + dim),
|
||||
dim_size,
|
||||
force_symmetric_auto_padding));
|
||||
if (dim_size <= 0) {
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid input shape: " + input_shape.ToString());
|
||||
}
|
||||
output_shape->push_back(dim_size);
|
||||
output_shape.push_back(dim_size);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
|
|||
|
||||
std::vector<int64_t> Y_dims({N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
TensorShape output_shape = Y->Shape().Slice(2);
|
||||
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ Status QLinearConv::Compute(OpKernelContext* context) const {
|
|||
|
||||
std::vector<int64_t> Y_dims({N, M});
|
||||
TensorShape input_shape = X->Shape().Slice(2);
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
|
||||
Tensor* Y = context->Output(0, TensorShape(Y_dims));
|
||||
TensorShape output_shape = Y->Shape().Slice(2);
|
||||
|
||||
|
|
|
|||
|
|
@ -89,8 +89,8 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
|
|||
|
||||
std::vector<int64_t> y_dims;
|
||||
y_dims.insert(y_dims.begin(), {N, M});
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape<true>(x_shape.Slice(2), kernel_shape,
|
||||
strides, dilations, &pads, &y_dims));
|
||||
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(x_shape.Slice(2), kernel_shape,
|
||||
strides, dilations, pads, y_dims, true));
|
||||
s_.y_dims = y_dims;
|
||||
Tensor* Y = context->Output(0, TensorShape(s_.y_dims));
|
||||
y_data = reinterpret_cast<CudaT*>(Y->template MutableData<T>());
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <core/common/logging/logging.h>
|
||||
#include <core/common/safeint.h>
|
||||
#include <core/providers/common.h>
|
||||
#include <onnx/onnx_pb.h>
|
||||
|
||||
#include "helper.h"
|
||||
|
|
@ -258,6 +259,67 @@ uint32_t AddInitializerTransposed(ModelBuilder& model_builder,
|
|||
return operand_idx;
|
||||
}
|
||||
|
||||
static vector<int32_t> ComputeConvPads(
|
||||
const Shape& input_dimen,
|
||||
const uint32_t weight_size_y, const uint32_t weight_size_x,
|
||||
const std::vector<int32_t>& onnx_pads, const std::vector<int32_t>& onnx_strides, const std::vector<int32_t>& onnx_dilations,
|
||||
AutoPadType auto_pad_type, bool nchw) {
|
||||
const int32_t input_size_y = nchw ? input_dimen[2] : input_dimen[1];
|
||||
const int32_t input_size_x = nchw ? input_dimen[3] : input_dimen[2];
|
||||
const int32_t stride_y = onnx_strides[0];
|
||||
const int32_t stride_x = onnx_strides[1];
|
||||
const int32_t dilation_y = onnx_dilations[0];
|
||||
const int32_t dilation_x = onnx_dilations[1];
|
||||
|
||||
int64_t padding_top = onnx_pads[0];
|
||||
int64_t padding_bottom = onnx_pads[2];
|
||||
int64_t padding_left = onnx_pads[1];
|
||||
int64_t padding_right = onnx_pads[3];
|
||||
|
||||
ORT_THROW_IF_ERROR(ComputePad(input_size_y,
|
||||
stride_y, weight_size_y, dilation_y,
|
||||
auto_pad_type,
|
||||
padding_top, padding_bottom));
|
||||
ORT_THROW_IF_ERROR(ComputePad(input_size_x,
|
||||
stride_x, weight_size_x, dilation_x,
|
||||
auto_pad_type,
|
||||
padding_left, padding_right));
|
||||
|
||||
return {static_cast<int32_t>(padding_top), static_cast<int32_t>(padding_left),
|
||||
static_cast<int32_t>(padding_bottom), static_cast<int32_t>(padding_right)};
|
||||
}
|
||||
|
||||
static void HandleAutoPad(const Shape& input_shape,
|
||||
const uint32_t weight_size_y,
|
||||
const uint32_t weight_size_x,
|
||||
const vector<int32_t>& onnx_strides,
|
||||
const vector<int32_t>& onnx_dilations,
|
||||
AutoPadType auto_pad_type,
|
||||
bool use_nchw,
|
||||
vector<int32_t>& onnx_pads,
|
||||
int32_t& nnapi_padding_code,
|
||||
bool& use_auto_pad) {
|
||||
if (auto_pad_type != AutoPadType::NOTSET) {
|
||||
onnx_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x,
|
||||
onnx_pads, onnx_strides, onnx_dilations,
|
||||
auto_pad_type, use_nchw);
|
||||
|
||||
if (AutoPadType::VALID == auto_pad_type || AutoPadType::SAME_UPPER == auto_pad_type) {
|
||||
use_auto_pad = true;
|
||||
nnapi_padding_code = (AutoPadType::VALID == auto_pad_type) ? ANEURALNETWORKS_PADDING_VALID
|
||||
: ANEURALNETWORKS_PADDING_SAME;
|
||||
}
|
||||
} else {
|
||||
const auto same_upper_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x,
|
||||
onnx_pads, onnx_strides, onnx_dilations,
|
||||
AutoPadType::SAME_UPPER, use_nchw);
|
||||
if (onnx_pads == same_upper_pads) {
|
||||
use_auto_pad = true;
|
||||
nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma endregion helpers
|
||||
|
||||
#pragma region op_base
|
||||
|
|
@ -765,11 +827,6 @@ bool PoolOpBuilder::IsOpSupportedImpl(ModelBuilder& /* model_builder */, const N
|
|||
return false;
|
||||
}
|
||||
|
||||
if (helper.Get("auto_pad", "NOTSET") != "NOTSET") {
|
||||
LOGS_DEFAULT(VERBOSE) << "auto_pad is not supported";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (helper.Get("kernel_shape", std::vector<int32_t>{1, 1}).size() != 2) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Only pooling 2d is supported";
|
||||
return false;
|
||||
|
|
@ -841,33 +898,54 @@ void PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
|
|||
op_type = ANEURALNETWORKS_MAX_POOL_2D;
|
||||
|
||||
vector<int32_t> onnx_pads, onnx_strides, kernel_shape;
|
||||
bool use_auto_pad = false;
|
||||
int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID;
|
||||
const auto& input_shape = shaper[input];
|
||||
if (op == "AveragePool" || op == "MaxPool") {
|
||||
const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
|
||||
kernel_shape = helper.Get("kernel_shape", vector<int32_t>{0, 0});
|
||||
onnx_strides = helper.Get("strides", vector<int>{1, 1});
|
||||
onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
|
||||
const auto weight_size_y = static_cast<uint32_t>(kernel_shape[0]);
|
||||
const auto weight_size_x = static_cast<uint32_t>(kernel_shape[1]);
|
||||
HandleAutoPad(input_shape, weight_size_y, weight_size_x,
|
||||
onnx_strides, {1, 1} /* onnx_dilations */,
|
||||
auto_pad_type, use_nchw,
|
||||
onnx_pads, nnapi_padding_code, use_auto_pad);
|
||||
} else { // (op == "GlobalAveragePool" || op == "GlobalMaxPool")
|
||||
use_auto_pad = true;
|
||||
nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID;
|
||||
onnx_strides = vector<int32_t>{1, 1};
|
||||
onnx_pads = vector<int32_t>{0, 0, 0, 0};
|
||||
if (model_builder.UseNCHW())
|
||||
kernel_shape = vector<int32_t>{static_cast<int32_t>(shaper[input][2]),
|
||||
static_cast<int32_t>(shaper[input][3])};
|
||||
else
|
||||
kernel_shape = vector<int32_t>{static_cast<int32_t>(shaper[input][1]),
|
||||
static_cast<int32_t>(shaper[input][2])};
|
||||
if (use_nchw) {
|
||||
kernel_shape = vector<int32_t>{static_cast<int32_t>(input_shape[2]),
|
||||
static_cast<int32_t>(input_shape[3])};
|
||||
} else {
|
||||
kernel_shape = vector<int32_t>{static_cast<int32_t>(input_shape[1]),
|
||||
static_cast<int32_t>(input_shape[2])};
|
||||
}
|
||||
}
|
||||
|
||||
int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
|
||||
|
||||
if (use_auto_pad) {
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code));
|
||||
} else {
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
|
||||
}
|
||||
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code));
|
||||
|
||||
// TODO support API 28
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw));
|
||||
|
||||
shaper.Pool(input,
|
||||
|
|
@ -899,10 +977,6 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
|
|||
|
||||
bool ConvOpBuilder::IsOpSupportedImpl(ModelBuilder& model_builder, const Node& node) {
|
||||
NodeAttrHelper helper(node);
|
||||
if (helper.Get("auto_pad", "NOTSET") != "NOTSET") {
|
||||
LOGS_DEFAULT(VERBOSE) << "SAME_LOWER auto_pad is not supported";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto group = helper.Get("group", 1);
|
||||
const auto weight_name = node.InputDefs()[1]->Name();
|
||||
|
|
@ -937,7 +1011,7 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
|
|||
|
||||
// onnx pads are in the order top, left, bottom, right
|
||||
// while nnapi pads is in the order left, right, top, bottom
|
||||
const auto onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
|
||||
auto onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
|
||||
|
||||
// onnx dilations is in the order height, width
|
||||
// while nnapi dilations are in the order width, height
|
||||
|
|
@ -968,15 +1042,11 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
|
|||
const auto& weight_tensor = initializers.at(weight);
|
||||
bool depthwise_conv2d = (weight_tensor.dims()[1] == 1);
|
||||
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input));
|
||||
|
||||
// Pre-process weights
|
||||
if (conv2d) {
|
||||
input_indices.push_back(AddInitializerInNewLayout(
|
||||
model_builder, weight, L_0231));
|
||||
AddInitializerInNewLayout(model_builder, weight, L_0231);
|
||||
} else { // depthwise_conv2d
|
||||
input_indices.push_back(AddInitializerInNewLayout(
|
||||
model_builder, weight, L_1230));
|
||||
AddInitializerInNewLayout(model_builder, weight, L_1230);
|
||||
}
|
||||
|
||||
bool hasBias = (node.InputDefs().size() >= 3);
|
||||
|
|
@ -1007,23 +1077,50 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
|
|||
}
|
||||
}
|
||||
|
||||
const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
|
||||
bool use_auto_pad = false;
|
||||
int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME;
|
||||
const auto& input_shape = shaper[input];
|
||||
const auto& kernel_shape = shaper[weight];
|
||||
const auto weight_size_y = kernel_shape[1];
|
||||
const auto weight_size_x = kernel_shape[2];
|
||||
HandleAutoPad(input_shape, weight_size_y, weight_size_x,
|
||||
onnx_strides, onnx_dilations,
|
||||
auto_pad_type, use_nchw,
|
||||
onnx_pads, nnapi_padding_code, use_auto_pad);
|
||||
|
||||
std::vector<uint32_t> input_indices;
|
||||
input_indices.push_back(operand_indices.at(input));
|
||||
input_indices.push_back(operand_indices.at(weight));
|
||||
input_indices.push_back(bias_idx_val);
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
|
||||
|
||||
if (use_auto_pad) {
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code));
|
||||
} else {
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
|
||||
}
|
||||
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0]));
|
||||
|
||||
if (!conv2d && depthwise_conv2d) {
|
||||
int32_t depthwiseMultiplier = shaper[weight][3] / group;
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(depthwiseMultiplier));
|
||||
}
|
||||
|
||||
int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code));
|
||||
|
||||
// TODO support API 28
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0]));
|
||||
|
||||
if (onnx_dilations[1] != 1 || onnx_dilations[0] != 1) {
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1]));
|
||||
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0]));
|
||||
}
|
||||
|
||||
int32_t operationCode;
|
||||
if (conv2d) {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
#include "helper.h"
|
||||
#include "shaper.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace nnapi {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
|
|
@ -45,8 +48,8 @@ void Shaper::Conv(const std::string& input_name,
|
|||
const vector<int32_t>& onnx_dilations,
|
||||
bool nchw,
|
||||
const std::string& output_name) {
|
||||
Shape input_dimen = shape_map_.at(input_name);
|
||||
Shape weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
const Shape& weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input
|
||||
|
||||
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
|
||||
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
|
||||
|
|
@ -88,8 +91,8 @@ void Shaper::DepthwiseConv(const std::string& input_name,
|
|||
const std::vector<int32_t>& onnx_dilations,
|
||||
bool nchw,
|
||||
const std::string& output_name) {
|
||||
Shape input_dimen = shape_map_.at(input_name);
|
||||
Shape weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
const Shape& weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output
|
||||
|
||||
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
|
||||
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
|
||||
|
|
@ -130,11 +133,11 @@ void Shaper::Pool(const std::string& input_name,
|
|||
const std::vector<int32_t>& kernel_shape,
|
||||
bool nchw,
|
||||
const std::string& output_name) {
|
||||
auto input_dimen = shape_map_.at(input_name);
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
|
||||
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
|
||||
int32_t weight_size_y = kernel_shape[0];
|
||||
int32_t weight_size_x = kernel_shape[1];
|
||||
const auto weight_size_y = kernel_shape[0];
|
||||
const auto weight_size_x = kernel_shape[1];
|
||||
|
||||
uint32_t output_size_y, output_size_x;
|
||||
std::tie(output_size_y, output_size_x) =
|
||||
|
|
@ -167,7 +170,7 @@ void Shaper::Pool(const std::string& input_name,
|
|||
void Shaper::Reshape(const std::string& input_name,
|
||||
const std::vector<int32_t>& shape,
|
||||
const std::string& output_name) {
|
||||
auto input_dimen = shape_map_.at(input_name);
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
int64_t input_size = Product(input_dimen);
|
||||
std::vector<uint32_t> output_dimen(shape.size());
|
||||
|
||||
|
|
@ -209,7 +212,7 @@ void Shaper::Reshape(const std::string& input_name,
|
|||
void Shaper::Transpose(const std::string& input_name,
|
||||
const std::vector<int32_t>& perm,
|
||||
const std::string& output_name) {
|
||||
auto input_dimen = shape_map_.at(input_name);
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
|
||||
ORT_ENFORCE(perm.size() == input_dimen.size(), "Invalid perm is given!");
|
||||
|
||||
|
|
@ -231,8 +234,8 @@ void Shaper::Transpose(const std::string& input_name,
|
|||
void Shaper::Eltwise(const std::string& input1_name,
|
||||
const std::string& input2_name,
|
||||
const std::string& output_name) {
|
||||
auto& shape1 = shape_map_.at(input1_name);
|
||||
auto& shape2 = shape_map_.at(input2_name);
|
||||
const Shape& shape1 = shape_map_.at(input1_name);
|
||||
const Shape& shape2 = shape_map_.at(input2_name);
|
||||
|
||||
// broadcasting support
|
||||
bool shape1IsBigger = shape1.size() >= shape2.size();
|
||||
|
|
@ -283,8 +286,8 @@ void Shaper::Identity(const std::string& input_name,
|
|||
void Shaper::FC(const std::string& input1_name, const std::string& input2_name,
|
||||
const std::string& output_name) {
|
||||
// Currently we only support A*B'+C
|
||||
auto input1_dimen = shape_map_.at(input1_name);
|
||||
Shape input2_dimen = shape_map_.at(input2_name); // num_units, input_size
|
||||
const Shape& input1_dimen = shape_map_.at(input1_name);
|
||||
const Shape& input2_dimen = shape_map_.at(input2_name); // num_units, input_size
|
||||
Shape output_dimen{input1_dimen[0], input2_dimen[0]};
|
||||
shape_map_[output_name] = output_dimen;
|
||||
|
||||
|
|
@ -301,7 +304,7 @@ void Shaper::Concat(const std::vector<std::string>& input_names,
|
|||
const std::string& output_name) {
|
||||
std::vector<Shape> dimens;
|
||||
for (const auto& input_name : input_names) {
|
||||
auto& dimen = shape_map_.at(input_name);
|
||||
const Shape& dimen = shape_map_.at(input_name);
|
||||
if (!dimens.empty()) {
|
||||
for (size_t i = 0; i < dimens[0].size(); i++) {
|
||||
if ((int32_t)i == axis)
|
||||
|
|
@ -332,7 +335,7 @@ void Shaper::Concat(const std::vector<std::string>& input_names,
|
|||
void Shaper::Squeeze(const std::string& input_name,
|
||||
const std::vector<int32_t>& axes,
|
||||
const std::string& output_name) {
|
||||
std::vector<uint32_t> input_dimen = shape_map_.at(input_name);
|
||||
const Shape& input_dimen = shape_map_.at(input_name);
|
||||
int32_t input_size = input_dimen.size();
|
||||
size_t axes_size = axes.size();
|
||||
std::unordered_set<int32_t> axes_to_be_squeezed;
|
||||
|
|
@ -372,7 +375,7 @@ void Shaper::UpdateShape(const std::string& name, const Shape& new_shape) {
|
|||
ORT_ENFORCE(shaper_finalized_,
|
||||
"Cannot UpdateShape while shaper is not finalized");
|
||||
|
||||
const auto& old_shape = shape_map_.at(name);
|
||||
const Shape& old_shape = shape_map_.at(name);
|
||||
if (old_shape != new_shape) {
|
||||
if (Product(old_shape) != 0)
|
||||
ORT_THROW("The shape should be same size or old shape has size 0 (dynamic shape)");
|
||||
|
|
@ -404,3 +407,6 @@ std::string Shape2String(const Shaper::Shape& shape) {
|
|||
os << "]";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -4,6 +4,9 @@
|
|||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace nnapi {
|
||||
|
||||
class Shaper {
|
||||
public:
|
||||
using Shape = std::vector<uint32_t>;
|
||||
|
|
@ -69,3 +72,6 @@ class Shaper {
|
|||
};
|
||||
|
||||
std::string Shape2String(const Shaper::Shape& shape);
|
||||
|
||||
} // namespace nnapi
|
||||
} // namespace onnxruntime
|
||||
Loading…
Reference in a new issue