[NNAPI] Refactor Resize as layout insensitive (#13412)

### Description
<!-- Describe your changes. -->



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
This commit is contained in:
Cheng 2022-10-25 16:50:05 +08:00 committed by GitHub
parent 93f7a97a6d
commit ea1bdb162f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 22 deletions

View file

@ -828,14 +828,18 @@ onnxruntime::Node& NodeFromApiNode(onnx_layout_transformation::api::NodeRef& nod
namespace layout_transformer {
const std::unordered_set<std::string_view>& GetORTLayoutSensitiveOps() {
static std::unordered_set<std::string_view> ort_layout_senstive_ops = []() {
static std::unordered_set<std::string_view> ort_layout_sensitive_ops = []() {
const auto& layout_sensitive_ops = onnx_layout_transformation::GetLayoutSensitiveOps();
#if !defined(USE_CUDA) && !defined(USE_ROCM)
std::unordered_set<std::string_view> ort_specific_ops = {"FusedConv", "QLinearAveragePool", "QLinearGlobalAveragePool"};
#else
std::unordered_set<std::string_view> ort_specific_ops = {"Resize", "FusedConv", "QLinearAveragePool", "QLinearGlobalAveragePool"};
#endif
ort_specific_ops.insert(layout_sensitive_ops.cbegin(), layout_sensitive_ops.cend());
return ort_specific_ops;
}();
return ort_layout_senstive_ops;
return ort_layout_sensitive_ops;
}
Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvider& execution_provider) {

View file

@ -81,8 +81,11 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
const auto& output = node_unit.Outputs()[0].node_arg.Name();
auto input = inputs[0].node_arg.Name();
bool use_nchw = model_builder.UseNCHW();
ORT_RETURN_IF_ERROR(IsOpInRequiredLayout(use_nchw, node_unit));
const auto& output_shape = shaper[output];
const auto& input_shape = shaper[input];
const bool input_is_nchw = output_shape[1] == input_shape[1]; // not Channel last
// Check if the quantization scale and ZP is correct
if (IsQuantizedOp(node_unit)) {
@ -104,10 +107,9 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
// if the node domain is NHWC it means all the node inputs are converted to NHWC format by the layout transformer.
// pick the index for height and width based on the format.
int h_idx = use_nchw ? 2 : 1;
int w_idx = use_nchw ? 3 : 2;
int h_idx = input_is_nchw ? 2 : 1;
int w_idx = input_is_nchw ? 3 : 2;
const auto& output_shape = shaper[output];
int32_t output_h = output_shape[h_idx];
int32_t output_w = output_shape[w_idx];
@ -117,8 +119,8 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
ADD_SCALAR_OPERAND(model_builder, input_indices, output_h);
if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_2) {
// using nchw is only available on API level 29
ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw);
// using nchw is only available on API level 29+
ADD_SCALAR_OPERAND(model_builder, input_indices, input_is_nchw);
}
// Currently we only support align_corners and half_pixel on bilinear resize
@ -224,14 +226,16 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
LOGS_DEFAULT(VERBOSE) << "Input sizes of Resize must be known";
return false;
}
bool input_is_nchw = false;
// haven't a good solution to check layout when scale is 1.0F
// We want to check if the scales or sizes are not trying to resize on N/C channels here
if (inputs.size() == 3) { // we are using scales
const auto& scales_tensor = *initializers.at(inputs[2].node_arg.Name());
Initializer unpacked_tensor(scales_tensor);
Initializer const unpacked_tensor(scales_tensor);
auto scales_data = unpacked_tensor.DataAsSpan<float>();
float scale_n = scales_data[0];
float scale_c = IsNodeLayoutNHWC(node_unit) ? scales_data[3] : scales_data[1];
input_is_nchw = scales_data[1] == 1.0F;
float const scale_n = scales_data[0];
float const scale_c = input_is_nchw ? scales_data[1] : scales_data[3];
if (scale_n != 1.0f || scale_c != 1.0f) {
LOGS_DEFAULT(VERBOSE) << "Scales of N/C channel should be 1"
<< "Resize of N/C channels are not supported"
@ -243,8 +247,10 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
const auto& sizes_name = inputs[3].node_arg.Name();
const auto& sizes_tensor = *initializers.at(sizes_name);
Initializer unpacked_tensor(sizes_tensor);
int channel_idx = IsNodeLayoutNHWC(node_unit) ? 3 : 1;
auto sizes_data = unpacked_tensor.DataAsSpan<int64_t>();
input_is_nchw = sizes_data[1] == input_shape[1];
int channel_idx = input_is_nchw ? 1 : 3;
uint32_t size_n = SafeInt<uint32_t>(sizes_data[0]);
uint32_t size_c = SafeInt<uint32_t>(sizes_data[channel_idx]);
if (size_n != input_shape[0] || size_c != input_shape[channel_idx]) {
@ -255,6 +261,11 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
return false;
}
}
if (input_is_nchw && params.android_feature_level <= ANEURALNETWORKS_FEATURE_LEVEL_2) {
LOGS_DEFAULT(VERBOSE) << "android_feature_level below 29 does not support nchw Resize.";
return false;
}
}
return true;

View file

@ -321,29 +321,49 @@ TEST(NnapiExecutionProviderTest, TestQDQConv) {
{ExpectedEPNodeAssignment::All});
}
TEST(NnapiExecutionProviderTest, TestQDQResize) {
TEST(NnapiExecutionProviderTest, TestQDQResizeNCHW) {
// NNAPI EP does not support the default setting of Resize Op
// Use bi-linear and asymmetric for NNAPI EP only
// Setting verify_entire_graph_use_ep for this test as false. This is because layout transformation adds
// Transpose (NCHW -> NHWC) nodes. Post tranformation graph looks like this Transpose -> DQ -> Resize -> Q -> Transpose
// NNAPI does not pick the first Transpose as its input is graph/partition input
// See https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc#L305
// onnxruntime::nnapi::IsInternalQuantizationSupported
auto Mode = ExpectedEPNodeAssignment::None;
#if defined(__ANDROID__)
const auto* nnapi = NnApiImplementation();
if (nnapi->nnapi_runtime_feature_level >= ANEURALNETWORKS_FEATURE_LEVEL_3) {
Mode = ExpectedEPNodeAssignment::All;
}
#endif
RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
{1, 3, 32, 32} /* sizes_data */,
"linear" /* mode */,
"asymmetric" /* coordinate_transformation_mode */),
"nnapi_qdq_test_graph_resize",
{ExpectedEPNodeAssignment::Some});
{Mode});
}
TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSetting) {
TEST(NnapiExecutionProviderTest, TestQDQResizeNHWC) {
// NNAPI EP does not support the default setting of Resize Op
// Use bi-linear and asymmetric for NNAPI EP only
RunQDQModelTest(BuildQDQResizeTestCase({1, 64, 64, 3} /* input_shape */,
{1, 32, 32, 3} /* sizes_data */,
"linear" /* mode */,
"asymmetric" /* coordinate_transformation_mode */),
"nnapi_qdq_test_graph_resize",
{ExpectedEPNodeAssignment::All});
}
TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSettingNCHW) {
RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
{1, 3, 32, 32} /* sizes_data */),
"nnapi_qdq_test_graph_resize_unsupported",
{ExpectedEPNodeAssignment::None});
}
TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSettingNHWC) {
RunQDQModelTest(BuildQDQResizeTestCase({1, 64, 64, 3} /* input_shape */,
{1, 32, 32, 3} /* sizes_data */),
"nnapi_qdq_test_graph_resize_unsupported",
{ExpectedEPNodeAssignment::None});
}
TEST(NnapiExecutionProviderTest, TestQDQAveragePool) {
// NNAPI use different rounding, which may cause ~1% difference in the result
RunQDQModelTest(BuildQDQAveragePoolTestCase<uint8_t /* InputType */,