diff --git a/onnxruntime/core/optimizer/transpose_optimizer/optimizer_api_impl.cc b/onnxruntime/core/optimizer/transpose_optimizer/optimizer_api_impl.cc index 1fcfa33010..31f6f89cb0 100644 --- a/onnxruntime/core/optimizer/transpose_optimizer/optimizer_api_impl.cc +++ b/onnxruntime/core/optimizer/transpose_optimizer/optimizer_api_impl.cc @@ -828,14 +828,18 @@ onnxruntime::Node& NodeFromApiNode(onnx_layout_transformation::api::NodeRef& nod namespace layout_transformer { const std::unordered_set& GetORTLayoutSensitiveOps() { - static std::unordered_set ort_layout_senstive_ops = []() { + static std::unordered_set ort_layout_sensitive_ops = []() { const auto& layout_sensitive_ops = onnx_layout_transformation::GetLayoutSensitiveOps(); +#if !defined(USE_CUDA) && !defined(USE_ROCM) + std::unordered_set ort_specific_ops = {"FusedConv", "QLinearAveragePool", "QLinearGlobalAveragePool"}; +#else std::unordered_set ort_specific_ops = {"Resize", "FusedConv", "QLinearAveragePool", "QLinearGlobalAveragePool"}; +#endif ort_specific_ops.insert(layout_sensitive_ops.cbegin(), layout_sensitive_ops.cend()); return ort_specific_ops; }(); - return ort_layout_senstive_ops; + return ort_layout_sensitive_ops; } Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvider& execution_provider) { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc index ce553b671a..0190080dbe 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc @@ -81,8 +81,11 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const const auto& output = node_unit.Outputs()[0].node_arg.Name(); auto input = inputs[0].node_arg.Name(); - bool use_nchw = model_builder.UseNCHW(); - ORT_RETURN_IF_ERROR(IsOpInRequiredLayout(use_nchw, node_unit)); + + const auto& output_shape = shaper[output]; + const auto& input_shape = shaper[input]; + + const bool input_is_nchw = output_shape[1] == input_shape[1]; // not Channel last // Check if the quantization scale and ZP is correct if (IsQuantizedOp(node_unit)) { @@ -104,10 +107,9 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const // if the node domain is NHWC it means all the node inputs are converted to NHWC format by the layout transformer. // pick the index for height and width based on the format. - int h_idx = use_nchw ? 2 : 1; - int w_idx = use_nchw ? 3 : 2; + int h_idx = input_is_nchw ? 2 : 1; + int w_idx = input_is_nchw ? 3 : 2; - const auto& output_shape = shaper[output]; int32_t output_h = output_shape[h_idx]; int32_t output_w = output_shape[w_idx]; @@ -117,8 +119,8 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const ADD_SCALAR_OPERAND(model_builder, input_indices, output_h); if (android_feature_level > ANEURALNETWORKS_FEATURE_LEVEL_2) { - // using nchw is only available on API level 29 - ADD_SCALAR_OPERAND(model_builder, input_indices, use_nchw); + // using nchw is only available on API level 29+ + ADD_SCALAR_OPERAND(model_builder, input_indices, input_is_nchw); } // Currently we only support align_corners and half_pixel on bilinear resize @@ -224,14 +226,16 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers LOGS_DEFAULT(VERBOSE) << "Input sizes of Resize must be known"; return false; } - + bool input_is_nchw = false; + // haven't a good solution to check layout when scale is 1.0F // We want to check if the scales or sizes are not trying to resize on N/C channels here if (inputs.size() == 3) { // we are using scales const auto& scales_tensor = *initializers.at(inputs[2].node_arg.Name()); - Initializer unpacked_tensor(scales_tensor); + Initializer const unpacked_tensor(scales_tensor); auto scales_data = unpacked_tensor.DataAsSpan(); - float scale_n = scales_data[0]; - float scale_c = IsNodeLayoutNHWC(node_unit) ? scales_data[3] : scales_data[1]; + input_is_nchw = scales_data[1] == 1.0F; + float const scale_n = scales_data[0]; + float const scale_c = input_is_nchw ? scales_data[1] : scales_data[3]; if (scale_n != 1.0f || scale_c != 1.0f) { LOGS_DEFAULT(VERBOSE) << "Scales of N/C channel should be 1" << "Resize of N/C channels are not supported" @@ -243,8 +247,10 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers const auto& sizes_name = inputs[3].node_arg.Name(); const auto& sizes_tensor = *initializers.at(sizes_name); Initializer unpacked_tensor(sizes_tensor); - int channel_idx = IsNodeLayoutNHWC(node_unit) ? 3 : 1; auto sizes_data = unpacked_tensor.DataAsSpan(); + + input_is_nchw = sizes_data[1] == input_shape[1]; + int channel_idx = input_is_nchw ? 1 : 3; uint32_t size_n = SafeInt(sizes_data[0]); uint32_t size_c = SafeInt(sizes_data[channel_idx]); if (size_n != input_shape[0] || size_c != input_shape[channel_idx]) { @@ -255,6 +261,11 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers return false; } } + + if (input_is_nchw && params.android_feature_level <= ANEURALNETWORKS_FEATURE_LEVEL_2) { + LOGS_DEFAULT(VERBOSE) << "android_feature_level below 29 does not support nchw Resize."; + return false; + } } return true; diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc index 1c52726de8..dbf71d00b4 100644 --- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc +++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc @@ -321,29 +321,49 @@ TEST(NnapiExecutionProviderTest, TestQDQConv) { {ExpectedEPNodeAssignment::All}); } -TEST(NnapiExecutionProviderTest, TestQDQResize) { +TEST(NnapiExecutionProviderTest, TestQDQResizeNCHW) { // NNAPI EP does not support the default setting of Resize Op // Use bi-linear and asymmetric for NNAPI EP only - // Setting verify_entire_graph_use_ep for this test as false. This is because layout transformation adds - // Transpose (NCHW -> NHWC) nodes. Post tranformation graph looks like this Transpose -> DQ -> Resize -> Q -> Transpose - // NNAPI does not pick the first Transpose as its input is graph/partition input - // See https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc#L305 - // onnxruntime::nnapi::IsInternalQuantizationSupported + auto Mode = ExpectedEPNodeAssignment::None; +#if defined(__ANDROID__) + const auto* nnapi = NnApiImplementation(); + if (nnapi->nnapi_runtime_feature_level >= ANEURALNETWORKS_FEATURE_LEVEL_3) { + Mode = ExpectedEPNodeAssignment::All; + } +#endif RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */, {1, 3, 32, 32} /* sizes_data */, "linear" /* mode */, "asymmetric" /* coordinate_transformation_mode */), "nnapi_qdq_test_graph_resize", - {ExpectedEPNodeAssignment::Some}); + {Mode}); } -TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSetting) { +TEST(NnapiExecutionProviderTest, TestQDQResizeNHWC) { + // NNAPI EP does not support the default setting of Resize Op + // Use bi-linear and asymmetric for NNAPI EP only + RunQDQModelTest(BuildQDQResizeTestCase({1, 64, 64, 3} /* input_shape */, + {1, 32, 32, 3} /* sizes_data */, + "linear" /* mode */, + "asymmetric" /* coordinate_transformation_mode */), + "nnapi_qdq_test_graph_resize", + {ExpectedEPNodeAssignment::All}); +} + +TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSettingNCHW) { RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */, {1, 3, 32, 32} /* sizes_data */), "nnapi_qdq_test_graph_resize_unsupported", {ExpectedEPNodeAssignment::None}); } +TEST(NnapiExecutionProviderTest, TestQDQResize_UnsupportedDefaultSettingNHWC) { + RunQDQModelTest(BuildQDQResizeTestCase({1, 64, 64, 3} /* input_shape */, + {1, 32, 32, 3} /* sizes_data */), + "nnapi_qdq_test_graph_resize_unsupported", + {ExpectedEPNodeAssignment::None}); +} + TEST(NnapiExecutionProviderTest, TestQDQAveragePool) { // NNAPI use different rounding, which may cause ~1% difference in the result RunQDQModelTest(BuildQDQAveragePoolTestCase