Nnapi, add auto_pad support for Conv/GlobalAveragePool/AveragePool/GlobalMaxPool/MaxPool operators (#4499)

* Split ComputePadAndOutputShape into ComputePad and ComputeOutputShape

* update NNAPI conv ouput shape compute to use shared ComputeOutputShapec

* move use ptr to use reference for ComputePadAndOutputShape

* nnapi conv support auto_pad

* add logging operator support bt target devices

* update InferOutputShape/ComputePadAndOutputShape/ComputePad to use force_symmetric_auto_padding as param instead of template

* make log op support for target devices optional

* add auto_pad support to pool operators

* ignore GetTargetDevices if using all devices

* fix some typo in padding calculation

* fix a bug of compute padding difference between conv and pool ops

* addressed CR comments, removed NNAPI device logging and move nnapi ep autopad handling into a shared function

* change helper functions to static
This commit is contained in:
gwang-msft 2020-07-15 00:21:42 -07:00 committed by GitHub
parent 34f73fa1aa
commit cf92497c16
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 419 additions and 317 deletions

View file

@ -152,7 +152,7 @@ Status NchwcConv::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims;
Y_dims.insert(Y_dims.begin(), {X_shape[0], W_shape[0]});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
auto* Y = context->Output(0, Y_dims);
auto* y_data = Y->template MutableData<float>();

View file

@ -60,7 +60,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
if (it != Conv::convLayers.end()) {
pConv = &it->second;
if(pConv->isDepthwiseCPU == true) {
if (pConv->isDepthwiseCPU == true) {
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
@ -103,7 +103,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims;
Y_dims.insert(Y_dims.begin(), {N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
LOGS_DEFAULT(VERBOSE) << "Y " << Y->Shape().ToString().c_str() << std::endl;
@ -127,7 +127,6 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
}
if (it == Conv::convLayers.end()) {
auto mm_layer = ACLCreateMemoryManager();
ACLNEConv tconv;
@ -192,21 +191,21 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
// in the configure function for NEDepthwiseConvolutionLayer3x3, there is a separation based on the optimization
#ifdef ACL_1902
bool optimizable =
arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(),
aclPadStride,
tconv.in->info()->data_type(),
1 /* depth multiplier */,
tconv.in->info()->data_layout());
arm_compute::NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(tconv.in->info()->tensor_shape(),
aclPadStride,
tconv.in->info()->data_type(),
1 /* depth multiplier */,
tconv.in->info()->data_layout());
#endif
#if defined(ACL_1905) || defined(ACL_1908)
bool optimizable =
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
tconv.k->info(),
aclPadStride,
1 /* depth multiplier */,
arm_compute::Size2D(aclDilation0, dilations[0]));
arm_compute::NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(tconv.in->info(),
tconv.k->info(),
aclPadStride,
1 /* depth multiplier */,
arm_compute::Size2D(aclDilation0, dilations[0]));
#endif
if(optimizable) {
if (optimizable) {
//optimized depthwise convolution
#if defined(ACL_1902) || defined(ACL_1905)
auto layer = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer3x3>();
@ -234,9 +233,9 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
ret = Conv::convLayers.insert(std::pair<OpKernel*, ACLNEConv>((OpKernel*)this, tconv));
return s;
}
#endif //DEPTHWISE_CPU
#endif //DEPTHWISE_CPU
} else {
if(tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) {
if (tconv.k->info()->tensor_shape()[0] == 1 && tconv.k->info()->tensor_shape()[1] == 1) {
//pointwise convolution
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
@ -291,7 +290,6 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
pConv->b->allocator()->free();
pConv->out->allocator()->free();
return Status::OK();
}
#else

View file

@ -26,258 +26,253 @@ thread_local std::map<OpKernel*, armnn::NetworkId> Conv<T>::convLayers;
template <typename T>
armnn::IRuntimePtr Conv<T>::run = Conv<T>::initRuntime();
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled){
armnn::Convolution2dDescriptor createConvDescriptor(std::vector<int64_t> pads, std::vector<int64_t> dilations, std::vector<int64_t> strides, bool biasEnabled) {
std::vector<int64_t> armnnStrides(2);
armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1;
armnnStrides[1] = strides[0];
std::vector<int64_t> armnnStrides(2);
armnnStrides[0] = (strides.size() == 2) ? strides[1] : 1;
armnnStrides[1] = strides[0];
std::vector<int64_t> armnnDilations(2);
armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1;
armnnDilations[1] = dilations[0];
std::vector<int64_t> armnnDilations(2);
armnnDilations[0] = (dilations.size() == 2) ? dilations[1] : 1;
armnnDilations[1] = dilations[0];
std::vector<int64_t> armnnPads(4);
if (pads.size() == 2) {
if (strides.size() == 1) {
armnnPads[0] = 0;
armnnPads[1] = 0;
armnnPads[2] = pads[1];
armnnPads[3] = pads[0];
} else {
armnnPads[0] = pads[1];
armnnPads[1] = pads[0];
armnnPads[2] = pads[1];
armnnPads[3] = pads[0];
}
std::vector<int64_t> armnnPads(4);
if (pads.size() == 2) {
if (strides.size() == 1) {
armnnPads[0] = 0;
armnnPads[1] = 0;
armnnPads[2] = pads[1];
armnnPads[3] = pads[0];
} else {
armnnPads[0] = pads[1];
armnnPads[1] = pads[3];
armnnPads[2] = pads[0];
armnnPads[3] = pads[2];
armnnPads[1] = pads[0];
armnnPads[2] = pads[1];
armnnPads[3] = pads[0];
}
} else {
armnnPads[0] = pads[1];
armnnPads[1] = pads[3];
armnnPads[2] = pads[0];
armnnPads[3] = pads[2];
}
armnn::Convolution2dDescriptor convolutionDescriptor;
convolutionDescriptor.m_PadLeft = armnnPads[0];
convolutionDescriptor.m_PadRight = armnnPads[1];
convolutionDescriptor.m_PadTop = armnnPads[2];
convolutionDescriptor.m_PadBottom = armnnPads[3];
convolutionDescriptor.m_StrideX = armnnStrides[0];
convolutionDescriptor.m_StrideY = armnnStrides[1];
convolutionDescriptor.m_DilationX = armnnDilations[0];
convolutionDescriptor.m_DilationY = armnnDilations[1];
convolutionDescriptor.m_BiasEnabled = biasEnabled;
convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW;
armnn::Convolution2dDescriptor convolutionDescriptor;
convolutionDescriptor.m_PadLeft = armnnPads[0];
convolutionDescriptor.m_PadRight = armnnPads[1];
convolutionDescriptor.m_PadTop = armnnPads[2];
convolutionDescriptor.m_PadBottom = armnnPads[3];
convolutionDescriptor.m_StrideX = armnnStrides[0];
convolutionDescriptor.m_StrideY = armnnStrides[1];
convolutionDescriptor.m_DilationX = armnnDilations[0];
convolutionDescriptor.m_DilationY = armnnDilations[1];
convolutionDescriptor.m_BiasEnabled = biasEnabled;
convolutionDescriptor.m_DataLayout = armnn::DataLayout::NCHW;
return convolutionDescriptor;
return convolutionDescriptor;
}
armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor){
armnn::DepthwiseConvolution2dDescriptor createDepthwiseDescriptor(armnn::Convolution2dDescriptor convolutionDescriptor) {
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor;
depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft;
depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight;
depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop;
depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom;
depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX;
depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY;
depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX;
depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY;
depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled;
depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout;
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor;
depthwiseDescriptor.m_PadLeft = convolutionDescriptor.m_PadLeft;
depthwiseDescriptor.m_PadRight = convolutionDescriptor.m_PadRight;
depthwiseDescriptor.m_PadTop = convolutionDescriptor.m_PadTop;
depthwiseDescriptor.m_PadBottom = convolutionDescriptor.m_PadBottom;
depthwiseDescriptor.m_StrideX = convolutionDescriptor.m_StrideX;
depthwiseDescriptor.m_StrideY = convolutionDescriptor.m_StrideY;
depthwiseDescriptor.m_DilationX = convolutionDescriptor.m_DilationX;
depthwiseDescriptor.m_DilationY = convolutionDescriptor.m_DilationY;
depthwiseDescriptor.m_BiasEnabled = convolutionDescriptor.m_BiasEnabled;
depthwiseDescriptor.m_DataLayout = convolutionDescriptor.m_DataLayout;
return depthwiseDescriptor;
return depthwiseDescriptor;
}
template <typename T>
Status Conv<T>::Compute(OpKernelContext* context) const {
size_t num_inputs = OpKernel::Node().InputDefs().size();
const Tensor* X = context->Input<Tensor>(0);
const Tensor* W = context->Input<Tensor>(1);
const Tensor* B = num_inputs == 3 ? context->Input<Tensor>(2) : nullptr;
const Tensor* W = context->Input<Tensor>(1);
const Tensor* B = num_inputs == 3 ? context->Input<Tensor>(2) : nullptr;
const int64_t N = X->Shape()[0];
const int64_t M = W->Shape()[0];
const int64_t N = X->Shape()[0];
const int64_t M = W->Shape()[0];
if (X->Shape().NumDimensions() != PREF_DIM) {
if (X->Shape().NumDimensions() != PREF_DIM) {
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
std::vector<int64_t> kernel_shape;
ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
std::vector<int64_t> pads(conv_attrs_.pads);
if (pads.empty()) {
pads.resize(kernel_shape.size() * 2, 0);
}
std::vector<int64_t> dilations(conv_attrs_.dilations);
if (dilations.empty()) {
dilations.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> strides(conv_attrs_.strides);
if (strides.empty()) {
strides.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> Y_dims;
Y_dims.insert(Y_dims.begin(), {N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
bool biasEnabled = B != nullptr;
const T* x_data = X->template Data<T>();
const T* k_data = W->template Data<T>();
const T* b_data;
if (biasEnabled) {
b_data = B->template Data<T>();
}
T* y_data = Y->template MutableData<T>();
armnn::NetworkId* pNetworkId;
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
if (it == Conv::convLayers.end()) {
armnn::NetworkId networkId;
armnn::INetworkPtr myNetwork = armnn::INetwork::Create();
armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled);
armnn::IConnectableLayer* convolution_armnn;
armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape());
armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape());
if (weightShape[2] == 1 && weightShape[3] == 1) {
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
if (conv_attrs_.group > 1) {
if (conv_attrs_.group == inputShape[1]) {
// depthwise convolution
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor);
std::vector<int64_t> kernel_shape;
ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
std::vector<int64_t> pads(conv_attrs_.pads);
if (pads.empty()) {
pads.resize(kernel_shape.size() * 2, 0);
}
std::vector<int64_t> dilations(conv_attrs_.dilations);
if (dilations.empty()) {
dilations.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> strides(conv_attrs_.strides);
if (strides.empty()) {
strides.resize(kernel_shape.size(), 1);
}
std::vector<int64_t> Y_dims;
Y_dims.insert(Y_dims.begin(), {N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
bool biasEnabled = B != nullptr;
const T* x_data = X->template Data<T>();
const T* k_data = W->template Data<T>();
const T* b_data;
if (biasEnabled) {
b_data = B->template Data<T>();
}
T* y_data = Y->template MutableData<T>();
armnn::NetworkId* pNetworkId;
ConvLayersIterator it = Conv::convLayers.find((OpKernel*)this);
if (it == Conv::convLayers.end()) {
armnn::NetworkId networkId;
armnn::INetworkPtr myNetwork = armnn::INetwork::Create();
armnn::Convolution2dDescriptor convolutionDescriptor = createConvDescriptor(pads, dilations, strides, biasEnabled);
armnn::IConnectableLayer *convolution_armnn;
armnn::TensorShape inputShape = ArmNNTensorShape(X->Shape());
armnn::TensorShape weightShape = ArmNNTensorShape(W->Shape());
if (weightShape[2] == 1 && weightShape[3] == 1) {
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
if (conv_attrs_.group > 1) {
if (conv_attrs_.group == inputShape[1]) {
// depthwise convolution
armnn::DepthwiseConvolution2dDescriptor depthwiseDescriptor = createDepthwiseDescriptor(convolutionDescriptor);
weightShape[1] = weightShape[0];
weightShape[0] = 1;
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
armnn::ConstTensor weights(weightsInfo, k_data);
if (biasEnabled) {
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
armnn::ConstTensor bias(biasDesc, b_data);
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
weights,
armnn::Optional<armnn::ConstTensor>(bias),
"depthwise_convolution_armnn");
} else {
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
weights,
armnn::EmptyOptional(),
"depthwise_convolution_armnn");
}
} else {
// NCHWc convolution
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
} else {
// normal convolution
weightShape[1] = weightShape[0];
weightShape[0] = 1;
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
armnn::ConstTensor weights(weightsInfo, k_data);
if (biasEnabled) {
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
armnn::ConstTensor bias(biasDesc, b_data);
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
weights,
armnn::Optional<armnn::ConstTensor>(bias),
"convolution_armnn");
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
weights,
armnn::Optional<armnn::ConstTensor>(bias),
"depthwise_convolution_armnn");
} else {
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
weights,
armnn::EmptyOptional(),
"convolution_armnn");
convolution_armnn = myNetwork->AddDepthwiseConvolution2dLayer(depthwiseDescriptor,
weights,
armnn::EmptyOptional(),
"depthwise_convolution_armnn");
}
} else {
// NCHWc convolution
Status s = onnxruntime::Conv<T>::Compute(context);
return s;
}
bool armnn_activ_enabled = false;
armnn::ActivationDescriptor desc;
desc.m_A = conv_attrs_.alpha;
if (activation_type == "Relu") {
desc.m_Function = armnn::ActivationFunction::ReLu;
armnn_activ_enabled = true;
} else if (activation_type == "LeakyRelu") {
desc.m_Function = armnn::ActivationFunction::LeakyReLu;
armnn_activ_enabled = true;
} else if (activation_type == "Tanh") {
desc.m_Function = armnn::ActivationFunction::TanH;
armnn_activ_enabled = true;
} else if (activation_type == "Sigmoid") {
desc.m_Function = armnn::ActivationFunction::Sigmoid;
armnn_activ_enabled = true;
} else if (!activation_type.empty()) {
ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type);
}
armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn");
armnn::IConnectableLayer *InputLayer = myNetwork->AddInputLayer(0);
armnn::IConnectableLayer *OutputLayer = myNetwork->AddOutputLayer(0);
InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0));
if (armnn_activ_enabled) {
convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
}
else {
convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
}
//Set the tensors in the network.
armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32);
convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
if (armnn_activ_enabled) {
activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
}
// Optimise ArmNN network
armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec());
if (optNet == nullptr) {
return onnxruntime::Conv<T>::Compute(context);
}
// Load graph into runtime
Conv::run->LoadNetwork(networkId, std::move(optNet));
std::pair<ConvLayersIterator, bool> ret;
ret = Conv::convLayers.insert(std::pair<OpKernel*, armnn::NetworkId>((OpKernel*)this, networkId));
pNetworkId = &ret.first->second;
} else {
pNetworkId = &it->second;
// normal convolution
armnn::TensorInfo weightsInfo(weightShape, armnn::DataType::Float32);
armnn::ConstTensor weights(weightsInfo, k_data);
if (biasEnabled) {
armnn::TensorInfo biasDesc(ArmNNTensorShape(B->Shape()), armnn::DataType::Float32);
armnn::ConstTensor bias(biasDesc, b_data);
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
weights,
armnn::Optional<armnn::ConstTensor>(bias),
"convolution_armnn");
} else {
convolution_armnn = myNetwork->AddConvolution2dLayer(convolutionDescriptor,
weights,
armnn::EmptyOptional(),
"convolution_armnn");
}
}
armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0),
x_data)}};
armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0),
y_data)}};
bool armnn_activ_enabled = false;
armnn::ActivationDescriptor desc;
desc.m_A = conv_attrs_.alpha;
// Execute network
Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors);
if (activation_type == "Relu") {
desc.m_Function = armnn::ActivationFunction::ReLu;
armnn_activ_enabled = true;
} else if (activation_type == "LeakyRelu") {
desc.m_Function = armnn::ActivationFunction::LeakyReLu;
armnn_activ_enabled = true;
} else if (activation_type == "Tanh") {
desc.m_Function = armnn::ActivationFunction::TanH;
armnn_activ_enabled = true;
} else if (activation_type == "Sigmoid") {
desc.m_Function = armnn::ActivationFunction::Sigmoid;
armnn_activ_enabled = true;
} else if (!activation_type.empty()) {
ORT_NOT_IMPLEMENTED("Not implemented fused activation: ", activation_type);
}
return Status::OK();
armnn::IConnectableLayer* activation = myNetwork->AddActivationLayer(desc, "activation_armnn");
armnn::IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
armnn::IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
InputLayer->GetOutputSlot(0).Connect(convolution_armnn->GetInputSlot(0));
if (armnn_activ_enabled) {
convolution_armnn->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
activation->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
} else {
convolution_armnn->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
}
//Set the tensors in the network.
armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
armnn::TensorInfo outputTensorInfo(ArmNNTensorShape(Y->Shape()), armnn::DataType::Float32);
convolution_armnn->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
if (armnn_activ_enabled) {
activation->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
}
// Optimise ArmNN network
armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*myNetwork, {armnn::Compute::CpuAcc}, Conv::run->GetDeviceSpec());
if (optNet == nullptr) {
return onnxruntime::Conv<T>::Compute(context);
}
// Load graph into runtime
Conv::run->LoadNetwork(networkId, std::move(optNet));
std::pair<ConvLayersIterator, bool> ret;
ret = Conv::convLayers.insert(std::pair<OpKernel*, armnn::NetworkId>((OpKernel*)this, networkId));
pNetworkId = &ret.first->second;
} else {
pNetworkId = &it->second;
}
armnn::InputTensors inputTensors{{0, armnn::ConstTensor(Conv::run->GetInputTensorInfo(*pNetworkId, 0),
x_data)}};
armnn::OutputTensors outputTensors{{0, armnn::Tensor(Conv::run->GetOutputTensorInfo(*pNetworkId, 0),
y_data)}};
// Execute network
Conv::run->EnqueueWorkload(*pNetworkId, inputTensors, outputTensors);
return Status::OK();
}
ONNX_OPERATOR_VERSIONED_KERNEL_EX(
@ -296,5 +291,5 @@ ONNX_OPERATOR_KERNEL_EX(
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
Conv<float>);
} // namespace armnn
} // namespace armnn_ep
} // namespace onnxruntime

View file

@ -70,11 +70,11 @@ inline AutoPadType StringToAutoPadType(const std::string& str) {
// helper function
template <bool ForceSymmetricAutoPadding>
Status ComputePad(const int64_t in_dim,
const int64_t stride, const int64_t kernel, const int64_t dilation,
AutoPadType pad_type,
int64_t& pad_head, int64_t& pad_tail) {
inline Status ComputePad(const int64_t in_dim,
const int64_t stride, const int64_t kernel, const int64_t dilation,
AutoPadType pad_type,
int64_t& pad_head, int64_t& pad_tail,
bool force_symmetric_auto_padding = false) {
switch (pad_type) {
case AutoPadType::NOTSET:
break;
@ -91,7 +91,7 @@ Status ComputePad(const int64_t in_dim,
int64_t legacy_target_size = (in_dim + stride - 1) / stride;
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_dim;
// make sure padding is symmetric
if (ForceSymmetricAutoPadding) {
if (force_symmetric_auto_padding) {
// Inlining math::roundUpPow2() from util/math.h to avoid bringing in the transitive dependencies.
pad_needed = (pad_needed + 1) & ~1;
}
@ -117,14 +117,14 @@ inline int64_t ComputeOutputShape(const int64_t in_dim,
return static_cast<int64_t>(static_cast<float>(in_dim + pad_head + pad_tail - dkernel) / stride + 1);
}
template <bool ForceSymmetricAutoPadding>
Status ComputePadAndOutputShape(const int64_t in_dim,
const int64_t stride, const int64_t kernel, const int64_t dilation,
AutoPadType pad_type,
int64_t& pad_head, int64_t& pad_tail,
int64_t& out_dim) {
inline Status ComputePadAndOutputShape(const int64_t in_dim,
const int64_t stride, const int64_t kernel, const int64_t dilation,
AutoPadType pad_type,
int64_t& pad_head, int64_t& pad_tail,
int64_t& out_dim,
bool force_symmetric_auto_padding = false) {
ORT_RETURN_IF_ERROR(
ComputePad<ForceSymmetricAutoPadding>(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail));
ComputePad(in_dim, stride, kernel, dilation, pad_type, pad_head, pad_tail, force_symmetric_auto_padding));
out_dim = ComputeOutputShape(in_dim, stride, kernel, dilation, pad_head, pad_tail);
return Status::OK();
}

View file

@ -50,7 +50,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims({N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, Y_dims);
TensorShape output_shape = Y->Shape().Slice(2);
@ -188,7 +188,7 @@ Status Conv<float>::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims({N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
TensorShape output_shape = Y->Shape().Slice(2);

View file

@ -104,34 +104,34 @@ struct ConvAttributes {
return Status::OK();
}
template <bool ForceSymmetricAutoPadding = false>
Status InferOutputShape(const TensorShape& input_shape,
const std::vector<int64_t>& kernel_shape,
const std::vector<int64_t>& strides_p,
const std::vector<int64_t>& dilations_p,
std::vector<int64_t>* pads_p,
std::vector<int64_t>* output_shape) const {
std::vector<int64_t>& pads_p,
std::vector<int64_t>& output_shape,
bool force_symmetric_auto_padding = false) const {
size_t rank = input_shape.NumDimensions();
for (size_t dim = 0; dim < rank; ++dim) {
if (dim >= strides_p.size() || dim >= kernel_shape.size() ||
dim >= dilations_p.size() || dim >= pads_p->size() ||
rank + dim >= pads_p->size()) {
dim >= dilations_p.size() || dim >= pads_p.size() ||
rank + dim >= pads_p.size()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Out of bound access to array");
}
int64_t dim_size = 0;
ORT_RETURN_IF_ERROR(ComputePadAndOutputShape<ForceSymmetricAutoPadding>(
input_shape[dim],
strides_p[dim],
kernel_shape[dim],
dilations_p[dim],
auto_pad,
pads_p->at(dim),
pads_p->at(input_shape.NumDimensions() + dim),
dim_size));
ORT_RETURN_IF_ERROR(ComputePadAndOutputShape(input_shape[dim],
strides_p[dim],
kernel_shape[dim],
dilations_p[dim],
auto_pad,
pads_p.at(dim),
pads_p.at(input_shape.NumDimensions() + dim),
dim_size,
force_symmetric_auto_padding));
if (dim_size <= 0) {
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid input shape: " + input_shape.ToString());
}
output_shape->push_back(dim_size);
output_shape.push_back(dim_size);
}
return Status::OK();
}

View file

@ -71,7 +71,7 @@ Status ConvInteger::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims({N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
TensorShape output_shape = Y->Shape().Slice(2);

View file

@ -97,7 +97,7 @@ Status QLinearConv::Compute(OpKernelContext* context) const {
std::vector<int64_t> Y_dims({N, M});
TensorShape input_shape = X->Shape().Slice(2);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, &pads, &Y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
Tensor* Y = context->Output(0, TensorShape(Y_dims));
TensorShape output_shape = Y->Shape().Slice(2);

View file

@ -89,8 +89,8 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
std::vector<int64_t> y_dims;
y_dims.insert(y_dims.begin(), {N, M});
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape<true>(x_shape.Slice(2), kernel_shape,
strides, dilations, &pads, &y_dims));
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShape(x_shape.Slice(2), kernel_shape,
strides, dilations, pads, y_dims, true));
s_.y_dims = y_dims;
Tensor* Y = context->Output(0, TensorShape(s_.y_dims));
y_data = reinterpret_cast<CudaT*>(Y->template MutableData<T>());

View file

@ -3,6 +3,7 @@
#include <core/common/logging/logging.h>
#include <core/common/safeint.h>
#include <core/providers/common.h>
#include <onnx/onnx_pb.h>
#include "helper.h"
@ -258,6 +259,67 @@ uint32_t AddInitializerTransposed(ModelBuilder& model_builder,
return operand_idx;
}
static vector<int32_t> ComputeConvPads(
const Shape& input_dimen,
const uint32_t weight_size_y, const uint32_t weight_size_x,
const std::vector<int32_t>& onnx_pads, const std::vector<int32_t>& onnx_strides, const std::vector<int32_t>& onnx_dilations,
AutoPadType auto_pad_type, bool nchw) {
const int32_t input_size_y = nchw ? input_dimen[2] : input_dimen[1];
const int32_t input_size_x = nchw ? input_dimen[3] : input_dimen[2];
const int32_t stride_y = onnx_strides[0];
const int32_t stride_x = onnx_strides[1];
const int32_t dilation_y = onnx_dilations[0];
const int32_t dilation_x = onnx_dilations[1];
int64_t padding_top = onnx_pads[0];
int64_t padding_bottom = onnx_pads[2];
int64_t padding_left = onnx_pads[1];
int64_t padding_right = onnx_pads[3];
ORT_THROW_IF_ERROR(ComputePad(input_size_y,
stride_y, weight_size_y, dilation_y,
auto_pad_type,
padding_top, padding_bottom));
ORT_THROW_IF_ERROR(ComputePad(input_size_x,
stride_x, weight_size_x, dilation_x,
auto_pad_type,
padding_left, padding_right));
return {static_cast<int32_t>(padding_top), static_cast<int32_t>(padding_left),
static_cast<int32_t>(padding_bottom), static_cast<int32_t>(padding_right)};
}
static void HandleAutoPad(const Shape& input_shape,
const uint32_t weight_size_y,
const uint32_t weight_size_x,
const vector<int32_t>& onnx_strides,
const vector<int32_t>& onnx_dilations,
AutoPadType auto_pad_type,
bool use_nchw,
vector<int32_t>& onnx_pads,
int32_t& nnapi_padding_code,
bool& use_auto_pad) {
if (auto_pad_type != AutoPadType::NOTSET) {
onnx_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x,
onnx_pads, onnx_strides, onnx_dilations,
auto_pad_type, use_nchw);
if (AutoPadType::VALID == auto_pad_type || AutoPadType::SAME_UPPER == auto_pad_type) {
use_auto_pad = true;
nnapi_padding_code = (AutoPadType::VALID == auto_pad_type) ? ANEURALNETWORKS_PADDING_VALID
: ANEURALNETWORKS_PADDING_SAME;
}
} else {
const auto same_upper_pads = ComputeConvPads(input_shape, weight_size_y, weight_size_x,
onnx_pads, onnx_strides, onnx_dilations,
AutoPadType::SAME_UPPER, use_nchw);
if (onnx_pads == same_upper_pads) {
use_auto_pad = true;
nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME;
}
}
}
#pragma endregion helpers
#pragma region op_base
@ -765,11 +827,6 @@ bool PoolOpBuilder::IsOpSupportedImpl(ModelBuilder& /* model_builder */, const N
return false;
}
if (helper.Get("auto_pad", "NOTSET") != "NOTSET") {
LOGS_DEFAULT(VERBOSE) << "auto_pad is not supported";
return false;
}
if (helper.Get("kernel_shape", std::vector<int32_t>{1, 1}).size() != 2) {
LOGS_DEFAULT(VERBOSE) << "Only pooling 2d is supported";
return false;
@ -841,33 +898,54 @@ void PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
op_type = ANEURALNETWORKS_MAX_POOL_2D;
vector<int32_t> onnx_pads, onnx_strides, kernel_shape;
bool use_auto_pad = false;
int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID;
const auto& input_shape = shaper[input];
if (op == "AveragePool" || op == "MaxPool") {
const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
kernel_shape = helper.Get("kernel_shape", vector<int32_t>{0, 0});
onnx_strides = helper.Get("strides", vector<int>{1, 1});
onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
const auto weight_size_y = static_cast<uint32_t>(kernel_shape[0]);
const auto weight_size_x = static_cast<uint32_t>(kernel_shape[1]);
HandleAutoPad(input_shape, weight_size_y, weight_size_x,
onnx_strides, {1, 1} /* onnx_dilations */,
auto_pad_type, use_nchw,
onnx_pads, nnapi_padding_code, use_auto_pad);
} else { // (op == "GlobalAveragePool" || op == "GlobalMaxPool")
use_auto_pad = true;
nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID;
onnx_strides = vector<int32_t>{1, 1};
onnx_pads = vector<int32_t>{0, 0, 0, 0};
if (model_builder.UseNCHW())
kernel_shape = vector<int32_t>{static_cast<int32_t>(shaper[input][2]),
static_cast<int32_t>(shaper[input][3])};
else
kernel_shape = vector<int32_t>{static_cast<int32_t>(shaper[input][1]),
static_cast<int32_t>(shaper[input][2])};
if (use_nchw) {
kernel_shape = vector<int32_t>{static_cast<int32_t>(input_shape[2]),
static_cast<int32_t>(input_shape[3])};
} else {
kernel_shape = vector<int32_t>{static_cast<int32_t>(input_shape[1]),
static_cast<int32_t>(input_shape[2])};
}
}
int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
if (use_auto_pad) {
input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code));
} else {
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
}
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(kernel_shape[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code));
// TODO support API 28
input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw));
shaper.Pool(input,
@ -899,10 +977,6 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
bool ConvOpBuilder::IsOpSupportedImpl(ModelBuilder& model_builder, const Node& node) {
NodeAttrHelper helper(node);
if (helper.Get("auto_pad", "NOTSET") != "NOTSET") {
LOGS_DEFAULT(VERBOSE) << "SAME_LOWER auto_pad is not supported";
return false;
}
const auto group = helper.Get("group", 1);
const auto weight_name = node.InputDefs()[1]->Name();
@ -937,7 +1011,7 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
// onnx pads are in the order top, left, bottom, right
// while nnapi pads is in the order left, right, top, bottom
const auto onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
auto onnx_pads = helper.Get("pads", vector<int>{0, 0, 0, 0});
// onnx dilations is in the order height, width
// while nnapi dilations are in the order width, height
@ -968,15 +1042,11 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
const auto& weight_tensor = initializers.at(weight);
bool depthwise_conv2d = (weight_tensor.dims()[1] == 1);
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input));
// Pre-process weights
if (conv2d) {
input_indices.push_back(AddInitializerInNewLayout(
model_builder, weight, L_0231));
AddInitializerInNewLayout(model_builder, weight, L_0231);
} else { // depthwise_conv2d
input_indices.push_back(AddInitializerInNewLayout(
model_builder, weight, L_1230));
AddInitializerInNewLayout(model_builder, weight, L_1230);
}
bool hasBias = (node.InputDefs().size() >= 3);
@ -1007,23 +1077,50 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
}
}
const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
bool use_auto_pad = false;
int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_SAME;
const auto& input_shape = shaper[input];
const auto& kernel_shape = shaper[weight];
const auto weight_size_y = kernel_shape[1];
const auto weight_size_x = kernel_shape[2];
HandleAutoPad(input_shape, weight_size_y, weight_size_x,
onnx_strides, onnx_dilations,
auto_pad_type, use_nchw,
onnx_pads, nnapi_padding_code, use_auto_pad);
std::vector<uint32_t> input_indices;
input_indices.push_back(operand_indices.at(input));
input_indices.push_back(operand_indices.at(weight));
input_indices.push_back(bias_idx_val);
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
if (use_auto_pad) {
input_indices.push_back(model_builder.AddOperandFromScalar(nnapi_padding_code));
} else {
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[3]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[0]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_pads[2]));
}
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_strides[0]));
if (!conv2d && depthwise_conv2d) {
int32_t depthwiseMultiplier = shaper[weight][3] / group;
input_indices.push_back(model_builder.AddOperandFromScalar(depthwiseMultiplier));
}
int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code));
// TODO support API 28
input_indices.push_back(model_builder.AddOperandFromScalar(use_nchw));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0]));
if (onnx_dilations[1] != 1 || onnx_dilations[0] != 1) {
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[1]));
input_indices.push_back(model_builder.AddOperandFromScalar(onnx_dilations[0]));
}
int32_t operationCode;
if (conv2d) {

View file

@ -4,6 +4,9 @@
#include "helper.h"
#include "shaper.h"
namespace onnxruntime {
namespace nnapi {
using std::string;
using std::vector;
@ -45,8 +48,8 @@ void Shaper::Conv(const std::string& input_name,
const vector<int32_t>& onnx_dilations,
bool nchw,
const std::string& output_name) {
Shape input_dimen = shape_map_.at(input_name);
Shape weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input
const Shape& input_dimen = shape_map_.at(input_name);
const Shape& weight_dimen = shape_map_.at(weight_name); // num_output, height, width, num_input
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
@ -88,8 +91,8 @@ void Shaper::DepthwiseConv(const std::string& input_name,
const std::vector<int32_t>& onnx_dilations,
bool nchw,
const std::string& output_name) {
Shape input_dimen = shape_map_.at(input_name);
Shape weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output
const Shape& input_dimen = shape_map_.at(input_name);
const Shape& weight_dimen = shape_map_.at(weight_name); // 1, height, width, num_output
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
@ -130,11 +133,11 @@ void Shaper::Pool(const std::string& input_name,
const std::vector<int32_t>& kernel_shape,
bool nchw,
const std::string& output_name) {
auto input_dimen = shape_map_.at(input_name);
const Shape& input_dimen = shape_map_.at(input_name);
const auto input_size_y = nchw ? input_dimen[2] : input_dimen[1];
const auto input_size_x = nchw ? input_dimen[3] : input_dimen[2];
int32_t weight_size_y = kernel_shape[0];
int32_t weight_size_x = kernel_shape[1];
const auto weight_size_y = kernel_shape[0];
const auto weight_size_x = kernel_shape[1];
uint32_t output_size_y, output_size_x;
std::tie(output_size_y, output_size_x) =
@ -167,7 +170,7 @@ void Shaper::Pool(const std::string& input_name,
void Shaper::Reshape(const std::string& input_name,
const std::vector<int32_t>& shape,
const std::string& output_name) {
auto input_dimen = shape_map_.at(input_name);
const Shape& input_dimen = shape_map_.at(input_name);
int64_t input_size = Product(input_dimen);
std::vector<uint32_t> output_dimen(shape.size());
@ -209,7 +212,7 @@ void Shaper::Reshape(const std::string& input_name,
void Shaper::Transpose(const std::string& input_name,
const std::vector<int32_t>& perm,
const std::string& output_name) {
auto input_dimen = shape_map_.at(input_name);
const Shape& input_dimen = shape_map_.at(input_name);
ORT_ENFORCE(perm.size() == input_dimen.size(), "Invalid perm is given!");
@ -231,8 +234,8 @@ void Shaper::Transpose(const std::string& input_name,
void Shaper::Eltwise(const std::string& input1_name,
const std::string& input2_name,
const std::string& output_name) {
auto& shape1 = shape_map_.at(input1_name);
auto& shape2 = shape_map_.at(input2_name);
const Shape& shape1 = shape_map_.at(input1_name);
const Shape& shape2 = shape_map_.at(input2_name);
// broadcasting support
bool shape1IsBigger = shape1.size() >= shape2.size();
@ -283,8 +286,8 @@ void Shaper::Identity(const std::string& input_name,
void Shaper::FC(const std::string& input1_name, const std::string& input2_name,
const std::string& output_name) {
// Currently we only support A*B'+C
auto input1_dimen = shape_map_.at(input1_name);
Shape input2_dimen = shape_map_.at(input2_name); // num_units, input_size
const Shape& input1_dimen = shape_map_.at(input1_name);
const Shape& input2_dimen = shape_map_.at(input2_name); // num_units, input_size
Shape output_dimen{input1_dimen[0], input2_dimen[0]};
shape_map_[output_name] = output_dimen;
@ -301,7 +304,7 @@ void Shaper::Concat(const std::vector<std::string>& input_names,
const std::string& output_name) {
std::vector<Shape> dimens;
for (const auto& input_name : input_names) {
auto& dimen = shape_map_.at(input_name);
const Shape& dimen = shape_map_.at(input_name);
if (!dimens.empty()) {
for (size_t i = 0; i < dimens[0].size(); i++) {
if ((int32_t)i == axis)
@ -332,7 +335,7 @@ void Shaper::Concat(const std::vector<std::string>& input_names,
void Shaper::Squeeze(const std::string& input_name,
const std::vector<int32_t>& axes,
const std::string& output_name) {
std::vector<uint32_t> input_dimen = shape_map_.at(input_name);
const Shape& input_dimen = shape_map_.at(input_name);
int32_t input_size = input_dimen.size();
size_t axes_size = axes.size();
std::unordered_set<int32_t> axes_to_be_squeezed;
@ -372,7 +375,7 @@ void Shaper::UpdateShape(const std::string& name, const Shape& new_shape) {
ORT_ENFORCE(shaper_finalized_,
"Cannot UpdateShape while shaper is not finalized");
const auto& old_shape = shape_map_.at(name);
const Shape& old_shape = shape_map_.at(name);
if (old_shape != new_shape) {
if (Product(old_shape) != 0)
ORT_THROW("The shape should be same size or old shape has size 0 (dynamic shape)");
@ -404,3 +407,6 @@ std::string Shape2String(const Shaper::Shape& shape) {
os << "]";
return os.str();
}
} // namespace nnapi
} // namespace onnxruntime

View file

@ -4,6 +4,9 @@
#include <unordered_map>
#include <vector>
namespace onnxruntime {
namespace nnapi {
class Shaper {
public:
using Shape = std::vector<uint32_t>;
@ -69,3 +72,6 @@ class Shaper {
};
std::string Shape2String(const Shaper::Shape& shape);
} // namespace nnapi
} // namespace onnxruntime