From bd08f11a5895194e05657cedc5b4e0b1e594a034 Mon Sep 17 00:00:00 2001 From: Yi-Hong Lyu Date: Thu, 24 Feb 2022 06:27:11 +0800 Subject: [PATCH] Upsample support NHWC (#10554) Implement bilinear interpolation for Upsample (Resize) 4-D input with the outermost and innermost scale (usually channel of NHWC) as 1. Besides, I revert the HandleResize back to the original implementation for TransposeOptimizerTests.TestResize* tests. --- .../transpose_optimizer.cc | 66 ++- .../core/providers/cpu/tensor/upsample.cc | 100 +++-- .../optimizer/transpose_optimizer_test.cc | 408 +++++++++--------- .../providers/cpu/tensor/upsample_op_test.cc | 39 +- 4 files changed, 340 insertions(+), 273 deletions(-) diff --git a/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc b/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc index 6acec0ef17..06b834e0cf 100644 --- a/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc +++ b/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc @@ -967,41 +967,35 @@ static void PermuteInput(api::GraphRef& graph, api::NodeRef& node, size_t i, con node.SetInput(i, gather_output); } -// static bool HandleResize(HandlerArgs& args) { -// auto inputs = args.node.Inputs(); -// int64_t rank_int = gsl::narrow_cast(args.perm.size()); -// -// auto p = ChannelFirstToLastPerm(rank_int); -// auto& perm = p == args.perm ? args.perm : args.perm_inv; -// auto& perm_inv = p == args.perm ? args.perm_inv : args.perm; -// -// if (args.ctx.opset < 11) { -// PermuteInput(args.ctx.graph, args.node, 1, perm); -// } else { -// if (inputs[1] != "") { -// std::vector double_perm_inv = perm; -// double_perm_inv.reserve(2 * args.perm.size()); -// for (int64_t p1 : perm) { -// double_perm_inv.push_back(p1 + rank_int); -// } -// PermuteInput(args.ctx.graph, args.node, 1, double_perm_inv); -// } -// for (size_t i = 2; i < inputs.size(); ++i) { -// if (inputs[i] != "") { -// PermuteInput(args.ctx.graph, args.node, i, perm); -// } -// } -// } -// -// TransposeFirstInput(args.ctx, args.node, perm); -// TransposeOutputs(args.ctx, args.node, perm_inv); -// -// SwapNodeOpTypeAndDomain(args.ctx.graph, args.node, args.node.OpType(), "com.microsoft.nhwc"); -// -// return true; -// } +static bool HandleResize(HandlerArgs& args) { + auto inputs = args.node.Inputs(); + int64_t rank_int = gsl::narrow_cast(args.perm.size()); -// constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize}; + if (args.ctx.opset < 11) { + PermuteInput(args.ctx.graph, args.node, 1, args.perm_inv); + } else { + if (inputs[1] != "") { + std::vector double_perm_inv = args.perm_inv; + double_perm_inv.reserve(2 * args.perm_inv.size()); + for (int64_t p : args.perm_inv) { + double_perm_inv.push_back(p + rank_int); + } + PermuteInput(args.ctx.graph, args.node, 1, double_perm_inv); + } + for (size_t i = 2; i < inputs.size(); ++i) { + if (inputs[i] != "") { + PermuteInput(args.ctx.graph, args.node, i, args.perm_inv); + } + } + } + + TransposeFirstInput(args.ctx, args.node, args.perm_inv); + TransposeOutputs(args.ctx, args.node, args.perm); + + return true; +} + +constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize}; static bool HandlePad(HandlerArgs& args) { size_t rank = args.perm.size(); @@ -1640,9 +1634,7 @@ static const std::unordered_map handler_ma {"Split", split_handler}, {"Shape", shape_handler}, {"Pad", pad_handler}, - // Todo: renable resize handler after adding NHWC support in upsample op on cpu - // https://github.com/microsoft/onnxruntime/issues/9857 - // {"Resize", resize_handler}, + {"Resize", resize_handler}, {"ReduceSum", reduce_sum_handler}, {"ReduceLogSum", reduce_op_handler}, diff --git a/onnxruntime/core/providers/cpu/tensor/upsample.cc b/onnxruntime/core/providers/cpu/tensor/upsample.cc index 61e4d28cf0..dd292b6e83 100644 --- a/onnxruntime/core/providers/cpu/tensor/upsample.cc +++ b/onnxruntime/core/providers/cpu/tensor/upsample.cc @@ -420,13 +420,15 @@ struct BilinearParams { // that amounts to 'Bilinear' Upsampling/Resizing in the sense that it assumes // the scale values for the outermost 2 dimensions are 1. // This is the common use-case where the 4-D input (batched multi-channel images) -// is usually of shape [N, C, H, W] and the scales are [1.0, 1.0, height_scale, width_scale] -static BilinearParams SetupUpsampleBilinear(int64_t input_height, - int64_t input_width, - int64_t output_height, - int64_t output_width, - float height_scale, - float width_scale, +// is usually of shapes: +// - [N, C, H, W] and the scales are [1.0, 1.0, height_scale, width_scale] +// - [N, H, W, C] and the scales are [1.0, height_scale, width_scale, 1.0] +static BilinearParams SetupUpsampleBilinear(const int64_t input_height, + const int64_t input_width, + const int64_t output_height, + const int64_t output_width, + const float height_scale, + const float width_scale, const std::vector& roi, AllocatorPtr& alloc, const GetOriginalCoordinateFunc& get_original_coordinate) { @@ -523,26 +525,25 @@ static BilinearParams SetupUpsampleBilinear(int64_t input_height, } template -void UpsampleBilinear(int64_t batch_size, - int64_t num_channels, - int64_t input_height, - int64_t input_width, - int64_t output_height, - int64_t output_width, - float height_scale, - float width_scale, +void UpsampleBilinear(const int64_t batch_size, + const int64_t num_channels, + const int64_t input_height, + const int64_t input_width, + const int64_t output_height, + const int64_t output_width, + const float height_scale, + const float width_scale, const std::vector& roi, - bool use_extrapolation, - float extrapolation_value, - const T* XdataBase, - T* YdataBase, + const bool use_extrapolation, + const float extrapolation_value, + const T* const XdataBase, + T* const YdataBase, AllocatorPtr& alloc, const GetOriginalCoordinateFunc& get_original_coordinate, concurrency::ThreadPool* tp) { BilinearParams p = SetupUpsampleBilinear(input_height, input_width, output_height, output_width, height_scale, width_scale, roi, alloc, get_original_coordinate); - for (int64_t n = 0; n < batch_size; ++n) { concurrency::ThreadPool::TrySimpleParallelFor( tp, num_channels, @@ -1065,22 +1066,65 @@ Status Upsample::BaseCompute(OpKernelContext* context, case UpsampleMode::LINEAR: { // Supports 'bilinear' and 'trilinear' sampling only - //'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1 + //'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1 or + // 4-D input with outermost and innermost scales as 1 if (dims.size() == 2 || dims.size() == 4) { bool is_2D = dims.size() == 2; - const int64_t batch_size = is_2D ? 1 : dims[0]; - const int64_t num_channels = is_2D ? 1 : dims[1]; - const int64_t input_height = is_2D ? dims[0] : dims[2]; - const int64_t input_width = is_2D ? dims[1] : dims[3]; + int64_t batch_size; + int64_t num_channels; + int64_t input_height; + int64_t input_width; - const int64_t output_height = is_2D ? output_dims[0] : output_dims[2]; - const int64_t output_width = is_2D ? output_dims[1] : output_dims[3]; + int64_t output_height; + int64_t output_width; + + float height_scale; + float width_scale; + + if (is_2D) { + batch_size = 1; + num_channels = 1; + input_height = dims[0]; + input_width = dims[1]; + + output_height = output_dims[0]; + output_width = output_dims[1]; + + height_scale = scales[0]; + width_scale = scales[1]; + } else { + if (scales[1] == 1.0f) { + batch_size = dims[0]; + num_channels = dims[1]; + input_height = dims[2]; + input_width = dims[3]; + + output_height = output_dims[2]; + output_width = output_dims[3]; + + height_scale = scales[2]; + width_scale = scales[3]; + } else { + ORT_ENFORCE(scales[3] == 1.0f, "4-D input with innermost scale (usually channel of NHWC) as 1."); + + batch_size = dims[0]; + num_channels = dims[3]; + input_height = dims[1]; + input_width = dims[2]; + + output_height = output_dims[1]; + output_width = output_dims[2]; + + height_scale = scales[1]; + width_scale = scales[2]; + } + } AllocatorPtr alloc; ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc)); UpsampleBilinear(batch_size, num_channels, input_height, input_width, output_height, output_width, - is_2D ? scales[0] : scales[2], is_2D ? scales[1] : scales[3], roi, + height_scale, width_scale, roi, use_extrapolation_, extrapolation_value_, X->Data(), Y->MutableData(), alloc, get_original_coordinate_, output_height * output_width > 64 ? context->GetOperatorThreadPool() : nullptr); diff --git a/onnxruntime/test/optimizer/transpose_optimizer_test.cc b/onnxruntime/test/optimizer/transpose_optimizer_test.cc index 855baaf575..3e756c1b7d 100644 --- a/onnxruntime/test/optimizer/transpose_optimizer_test.cc +++ b/onnxruntime/test/optimizer/transpose_optimizer_test.cc @@ -291,212 +291,209 @@ TEST(TransposeOptimizerTests, TestPadNonconst) { /*opset_version*/ 11); } -// Todo: renable tests on resize transformer after adding NHWC support in upsample op on cpu -// https://github.com/microsoft/onnxruntime/issues/9857 +TEST(TransposeOptimizerTests, TestResize) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* const_1 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); -// TEST(TransposeOptimizerTests, TestResize) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* const_1 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// builder.AddNode("Resize", {transpose_1_out_0, const_1}, {resize_1_out_0}); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 10); -// } -// -// TEST(TransposeOptimizerTests, TestResizeOpset11) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* const_1 = builder.MakeInitializer({8}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); -// auto* const_2 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// builder.AddNode("Resize", {transpose_1_out_0, const_1, const_2}, {resize_1_out_0}); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 11); -// } -// -// TEST(TransposeOptimizerTests, TestResizeOpset15) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* const_1 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// auto empty_arg = NodeArg("", nullptr); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// builder.AddNode("Resize", {transpose_1_out_0, &empty_arg, const_1}, {resize_1_out_0}); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 15); -// } -// -// TEST(TransposeOptimizerTests, TestResizeSizeRoi) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* const_1 = builder.MakeInitializer({8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); -// auto* const_2 = builder.MakeInitializer({4}, {10, 9, 8, 7}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// auto empty_arg = NodeArg("", nullptr); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, const_1, &empty_arg, const_2}, {resize_1_out_0}); -// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 15); -// } -// -// TEST(TransposeOptimizerTests, TestResizeRoiScalesZeroRank0) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input = builder.MakeInput({1, 512, 512, 3}, -// std::numeric_limits::min(), -// std::numeric_limits::max()); -// auto* resize_in_roi = builder.MakeInitializer({0}, {}); -// auto* resize_in_scales = builder.MakeInitializer({0}, {}); -// auto* resize_in_sizes = builder.MakeInitializer({4}, {1, 256, 32, 32}); -// -// auto* transpose1_out_transposed = builder.MakeIntermediate(); -// auto* resize_out_Y = builder.MakeIntermediate(); -// auto* output = builder.MakeOutput(); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input}, {transpose1_out_transposed}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// builder.AddNode("Resize", -// {transpose1_out_transposed, resize_in_roi, resize_in_scales, resize_in_sizes}, -// {resize_out_Y}); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_out_Y}, {output}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1); -// } -// -// TEST(TransposeOptimizerTests, TestResizeNonconst) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* input1_arg = MakeInput(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); -// auto* input2_arg = MakeInput(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0}); -// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 11); -// } -// -// TEST(TransposeOptimizerTests, TestResizeNonconstOpset13) { -// auto build_test_case_1 = [&](ModelTestBuilder& builder) { -// auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); -// auto* input1_arg = MakeInput(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); -// auto* input2_arg = MakeInput(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f}); -// auto* transpose_1_out_0 = builder.MakeIntermediate(); -// auto* resize_1_out_0 = builder.MakeIntermediate(); -// auto* transpose_2_out_0 = builder.MakeOutput(); -// -// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); -// transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); -// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0}); -// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); -// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); -// transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); -// }; -// -// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { -// int transpose_cost = EstimateTransposeCost(session.GetGraph()); -// EXPECT_EQ(transpose_cost, 0); -// }; -// -// TransformerTester(build_test_case_1, -// check_optimized_graph_1, -// TransformerLevel::Default, -// TransformerLevel::Level1, -// /*opset_version*/ 13); -// } + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + builder.AddNode("Resize", {transpose_1_out_0, const_1}, {resize_1_out_0}); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 10); +} + +TEST(TransposeOptimizerTests, TestResizeOpset11) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* const_1 = builder.MakeInitializer({8}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto* const_2 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + builder.AddNode("Resize", {transpose_1_out_0, const_1, const_2}, {resize_1_out_0}); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 11); +} + +TEST(TransposeOptimizerTests, TestResizeOpset15) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* const_1 = builder.MakeInitializer({4}, {0.3f, 2.5f, 1.0f, 0.7f}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + auto empty_arg = NodeArg("", nullptr); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + builder.AddNode("Resize", {transpose_1_out_0, &empty_arg, const_1}, {resize_1_out_0}); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 15); +} + +TEST(TransposeOptimizerTests, TestResizeSizeRoi) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* const_1 = builder.MakeInitializer({8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); + auto* const_2 = builder.MakeInitializer({4}, {10, 9, 8, 7}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + auto empty_arg = NodeArg("", nullptr); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, const_1, &empty_arg, const_2}, {resize_1_out_0}); + resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 15); +} + +TEST(TransposeOptimizerTests, TestResizeRoiScalesZeroRank0) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input = builder.MakeInput({1, 512, 512, 3}, + std::numeric_limits::min(), + std::numeric_limits::max()); + auto* resize_in_roi = builder.MakeInitializer({0}, {}); + auto* resize_in_scales = builder.MakeInitializer({0}, {}); + auto* resize_in_sizes = builder.MakeInitializer({4}, {1, 256, 32, 32}); + + auto* transpose1_out_transposed = builder.MakeIntermediate(); + auto* resize_out_Y = builder.MakeIntermediate(); + auto* output = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input}, {transpose1_out_transposed}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + builder.AddNode("Resize", + {transpose1_out_transposed, resize_in_roi, resize_in_scales, resize_in_sizes}, + {resize_out_Y}); + auto& transpose_2 = builder.AddNode("Transpose", {resize_out_Y}, {output}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1); +} + +TEST(TransposeOptimizerTests, TestResizeNonconst) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* input1_arg = MakeInput(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); + auto* input2_arg = MakeInput(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0}); + resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 11); +} + +TEST(TransposeOptimizerTests, TestResizeNonconstOpset13) { + auto build_test_case_1 = [&](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0); + auto* input1_arg = MakeInput(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f}); + auto* input2_arg = MakeInput(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f}); + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* resize_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0}); + resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize"); + auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); + }; + + TransformerTester(build_test_case_1, + check_optimized_graph_1, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ 13); +} TEST(TransposeOptimizerTests, TestAdd) { auto build_test_case_1 = [&](ModelTestBuilder& builder) { @@ -3882,6 +3879,5 @@ TEST(TransposeOptimizerTests, RegressionTest_GitHubIssue10305) { ASSERT_STATUS_OK(session_object.Load(model_uri)); ASSERT_STATUS_OK(session_object.Initialize()); // optimizers run during initialization } - } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc b/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc index 9174b0fc15..ede3120efb 100644 --- a/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/upsample_op_test.cc @@ -292,7 +292,42 @@ TEST(UpsampleOpTest, UpsampleOp4DBilinearTest) { 7.0f, 7.5f, 8.0f, 8.5f, 9.0f, 9.0f, 9.0f, 9.0f}; test.AddOutput("Y", {N, C, (int64_t)(H * scales[2]), (int64_t)(W * scales[3])}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch +} + +TEST(UpsampleOpTest, UpsampleOp4DNhwcBilinearTest) { + OpTester test("Upsample"); + + std::vector scales{1.0f, 2.0f, 4.0f, 1.0f}; + test.AddAttribute("mode", "linear"); + test.AddAttribute("scales", scales); + + constexpr int64_t N = 2, H = 2, W = 3, C = 1; + std::vector X = {1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f}; + + test.AddInput("X", {N, H, W, C}, X); + + std::vector Y = { + 1.0f, 1.25f, 1.5f, 1.75f, 2.0f, 2.25f, 2.5f, 2.75f, 3.0f, 3.0f, 3.0f, 3.0f, + 2.5f, 2.75f, 3.0f, 3.25f, 3.5f, 3.75f, 4.0f, 4.25f, 4.5f, 4.5f, 4.5f, 4.5f, + 4.0f, 4.25f, 4.5f, 4.75f, 5.0f, 5.25f, 5.5f, 5.75f, 6.0f, 6.0f, 6.0f, 6.0f, + 4.0f, 4.25f, 4.5f, 4.75f, 5.0f, 5.25f, 5.5f, 5.75f, 6.0f, 6.0f, 6.0f, 6.0f, + + 7.0f, 7.25f, 7.5f, 7.75f, 8.0f, 8.25f, 8.5f, 8.75f, 9.0f, 9.0f, 9.0f, 9.0f, + 8.5f, 8.75f, 9.0f, 9.25f, 9.5f, 9.75f, 10.0f, 10.25f, 10.5f, 10.5f, 10.5f, 10.5f, + 10.0f, 10.25f, 10.5f, 10.75f, 11.0f, 11.25f, 11.5f, 11.75f, 12.0f, 12.0f, 12.0f, 12.0f, + 10.0f, 10.25f, 10.5f, 10.75f, 11.0f, 11.25f, 11.5f, 11.75f, 12.0f, 12.0f, 12.0f, 12.0f}; + + test.AddOutput("Y", {N, (int64_t)(H * scales[1]), (int64_t)(W * scales[2]), C}, Y); + //CUDA: result mismatch due to not implementing NHWC support + //TensorRT: results mismatch + //ROCm: results mismatch + test.Run(OpTester::ExpectResult::kExpectSuccess, "", + {kCudaExecutionProvider, kTensorrtExecutionProvider, kRocmExecutionProvider}); } TEST(UpsampleOpTest, UpsampleOp2DBilinearTest) { @@ -315,7 +350,7 @@ TEST(UpsampleOpTest, UpsampleOp2DBilinearTest) { 3.0f, 3.5f, 4.0f, 4.5f, 5.0f, 5.0f, 5.0f, 5.0f}; test.AddOutput("Y", {(int64_t)(H * scales[0]), (int64_t)(W * scales[1])}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch } TEST(UpsampleOpTest, UpsampleOp4DBilinearTest_ScalesNoOp) {