Upsample support NHWC (#10554)

Implement bilinear interpolation for Upsample (Resize) 4-D input with the
outermost and innermost scale (usually channel of NHWC) as 1.

Besides, I revert the HandleResize back to the original implementation for
TransposeOptimizerTests.TestResize* tests.
This commit is contained in:
Yi-Hong Lyu 2022-02-24 06:27:11 +08:00 committed by GitHub
parent e0d1d6906a
commit bd08f11a58
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 340 additions and 273 deletions

View file

@ -967,41 +967,35 @@ static void PermuteInput(api::GraphRef& graph, api::NodeRef& node, size_t i, con
node.SetInput(i, gather_output);
}
// static bool HandleResize(HandlerArgs& args) {
// auto inputs = args.node.Inputs();
// int64_t rank_int = gsl::narrow_cast<int64_t>(args.perm.size());
//
// auto p = ChannelFirstToLastPerm(rank_int);
// auto& perm = p == args.perm ? args.perm : args.perm_inv;
// auto& perm_inv = p == args.perm ? args.perm_inv : args.perm;
//
// if (args.ctx.opset < 11) {
// PermuteInput(args.ctx.graph, args.node, 1, perm);
// } else {
// if (inputs[1] != "") {
// std::vector<int64_t> double_perm_inv = perm;
// double_perm_inv.reserve(2 * args.perm.size());
// for (int64_t p1 : perm) {
// double_perm_inv.push_back(p1 + rank_int);
// }
// PermuteInput(args.ctx.graph, args.node, 1, double_perm_inv);
// }
// for (size_t i = 2; i < inputs.size(); ++i) {
// if (inputs[i] != "") {
// PermuteInput(args.ctx.graph, args.node, i, perm);
// }
// }
// }
//
// TransposeFirstInput(args.ctx, args.node, perm);
// TransposeOutputs(args.ctx, args.node, perm_inv);
//
// SwapNodeOpTypeAndDomain(args.ctx.graph, args.node, args.node.OpType(), "com.microsoft.nhwc");
//
// return true;
// }
static bool HandleResize(HandlerArgs& args) {
auto inputs = args.node.Inputs();
int64_t rank_int = gsl::narrow_cast<int64_t>(args.perm.size());
// constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize};
if (args.ctx.opset < 11) {
PermuteInput(args.ctx.graph, args.node, 1, args.perm_inv);
} else {
if (inputs[1] != "") {
std::vector<int64_t> double_perm_inv = args.perm_inv;
double_perm_inv.reserve(2 * args.perm_inv.size());
for (int64_t p : args.perm_inv) {
double_perm_inv.push_back(p + rank_int);
}
PermuteInput(args.ctx.graph, args.node, 1, double_perm_inv);
}
for (size_t i = 2; i < inputs.size(); ++i) {
if (inputs[i] != "") {
PermuteInput(args.ctx.graph, args.node, i, args.perm_inv);
}
}
}
TransposeFirstInput(args.ctx, args.node, args.perm_inv);
TransposeOutputs(args.ctx, args.node, args.perm);
return true;
}
constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize};
static bool HandlePad(HandlerArgs& args) {
size_t rank = args.perm.size();
@ -1640,9 +1634,7 @@ static const std::unordered_map<std::string_view, const HandlerInfo&> handler_ma
{"Split", split_handler},
{"Shape", shape_handler},
{"Pad", pad_handler},
// Todo: renable resize handler after adding NHWC support in upsample op on cpu
// https://github.com/microsoft/onnxruntime/issues/9857
// {"Resize", resize_handler},
{"Resize", resize_handler},
{"ReduceSum", reduce_sum_handler},
{"ReduceLogSum", reduce_op_handler},

View file

@ -420,13 +420,15 @@ struct BilinearParams {
// that amounts to 'Bilinear' Upsampling/Resizing in the sense that it assumes
// the scale values for the outermost 2 dimensions are 1.
// This is the common use-case where the 4-D input (batched multi-channel images)
// is usually of shape [N, C, H, W] and the scales are [1.0, 1.0, height_scale, width_scale]
static BilinearParams SetupUpsampleBilinear(int64_t input_height,
int64_t input_width,
int64_t output_height,
int64_t output_width,
float height_scale,
float width_scale,
// is usually of shapes:
// - [N, C, H, W] and the scales are [1.0, 1.0, height_scale, width_scale]
// - [N, H, W, C] and the scales are [1.0, height_scale, width_scale, 1.0]
static BilinearParams SetupUpsampleBilinear(const int64_t input_height,
const int64_t input_width,
const int64_t output_height,
const int64_t output_width,
const float height_scale,
const float width_scale,
const std::vector<float>& roi,
AllocatorPtr& alloc,
const GetOriginalCoordinateFunc& get_original_coordinate) {
@ -523,26 +525,25 @@ static BilinearParams SetupUpsampleBilinear(int64_t input_height,
}
template <typename T>
void UpsampleBilinear(int64_t batch_size,
int64_t num_channels,
int64_t input_height,
int64_t input_width,
int64_t output_height,
int64_t output_width,
float height_scale,
float width_scale,
void UpsampleBilinear(const int64_t batch_size,
const int64_t num_channels,
const int64_t input_height,
const int64_t input_width,
const int64_t output_height,
const int64_t output_width,
const float height_scale,
const float width_scale,
const std::vector<float>& roi,
bool use_extrapolation,
float extrapolation_value,
const T* XdataBase,
T* YdataBase,
const bool use_extrapolation,
const float extrapolation_value,
const T* const XdataBase,
T* const YdataBase,
AllocatorPtr& alloc,
const GetOriginalCoordinateFunc& get_original_coordinate,
concurrency::ThreadPool* tp) {
BilinearParams p = SetupUpsampleBilinear(input_height, input_width, output_height, output_width,
height_scale, width_scale, roi,
alloc, get_original_coordinate);
for (int64_t n = 0; n < batch_size; ++n) {
concurrency::ThreadPool::TrySimpleParallelFor(
tp, num_channels,
@ -1065,22 +1066,65 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context,
case UpsampleMode::LINEAR: {
// Supports 'bilinear' and 'trilinear' sampling only
//'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1
//'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1 or
// 4-D input with outermost and innermost scales as 1
if (dims.size() == 2 || dims.size() == 4) {
bool is_2D = dims.size() == 2;
const int64_t batch_size = is_2D ? 1 : dims[0];
const int64_t num_channels = is_2D ? 1 : dims[1];
const int64_t input_height = is_2D ? dims[0] : dims[2];
const int64_t input_width = is_2D ? dims[1] : dims[3];
int64_t batch_size;
int64_t num_channels;
int64_t input_height;
int64_t input_width;
const int64_t output_height = is_2D ? output_dims[0] : output_dims[2];
const int64_t output_width = is_2D ? output_dims[1] : output_dims[3];
int64_t output_height;
int64_t output_width;
float height_scale;
float width_scale;
if (is_2D) {
batch_size = 1;
num_channels = 1;
input_height = dims[0];
input_width = dims[1];
output_height = output_dims[0];
output_width = output_dims[1];
height_scale = scales[0];
width_scale = scales[1];
} else {
if (scales[1] == 1.0f) {
batch_size = dims[0];
num_channels = dims[1];
input_height = dims[2];
input_width = dims[3];
output_height = output_dims[2];
output_width = output_dims[3];
height_scale = scales[2];
width_scale = scales[3];
} else {
ORT_ENFORCE(scales[3] == 1.0f, "4-D input with innermost scale (usually channel of NHWC) as 1.");
batch_size = dims[0];
num_channels = dims[3];
input_height = dims[1];
input_width = dims[2];
output_height = output_dims[1];
output_width = output_dims[2];
height_scale = scales[1];
width_scale = scales[2];
}
}
AllocatorPtr alloc;
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
UpsampleBilinear(batch_size, num_channels, input_height, input_width, output_height, output_width,
is_2D ? scales[0] : scales[2], is_2D ? scales[1] : scales[3], roi,
height_scale, width_scale, roi,
use_extrapolation_, extrapolation_value_, X->Data<T>(),
Y->MutableData<T>(), alloc, get_original_coordinate_,
output_height * output_width > 64 ? context->GetOperatorThreadPool() : nullptr);

View file

@ -291,212 +291,209 @@ TEST(TransposeOptimizerTests, TestPadNonconst) {
/*opset_version*/ 11);
}
// Todo: renable tests on resize transformer after adding NHWC support in upsample op on cpu
// https://github.com/microsoft/onnxruntime/issues/9857
TEST(TransposeOptimizerTests, TestResize) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* const_1 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
// TEST(TransposeOptimizerTests, TestResize) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* const_1 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// builder.AddNode("Resize", {transpose_1_out_0, const_1}, {resize_1_out_0});
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 10);
// }
//
// TEST(TransposeOptimizerTests, TestResizeOpset11) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* const_1 = builder.MakeInitializer<float>({8}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
// auto* const_2 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// builder.AddNode("Resize", {transpose_1_out_0, const_1, const_2}, {resize_1_out_0});
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 11);
// }
//
// TEST(TransposeOptimizerTests, TestResizeOpset15) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* const_1 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
// auto empty_arg = NodeArg("", nullptr);
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// builder.AddNode("Resize", {transpose_1_out_0, &empty_arg, const_1}, {resize_1_out_0});
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 15);
// }
//
// TEST(TransposeOptimizerTests, TestResizeSizeRoi) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* const_1 = builder.MakeInitializer<float>({8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
// auto* const_2 = builder.MakeInitializer<int64_t>({4}, {10, 9, 8, 7});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
// auto empty_arg = NodeArg("", nullptr);
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, const_1, &empty_arg, const_2}, {resize_1_out_0});
// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 15);
// }
//
// TEST(TransposeOptimizerTests, TestResizeRoiScalesZeroRank0) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input = builder.MakeInput<uint8_t>({1, 512, 512, 3},
// std::numeric_limits<uint8_t>::min(),
// std::numeric_limits<uint8_t>::max());
// auto* resize_in_roi = builder.MakeInitializer<float>({0}, {});
// auto* resize_in_scales = builder.MakeInitializer<float>({0}, {});
// auto* resize_in_sizes = builder.MakeInitializer<int64_t>({4}, {1, 256, 32, 32});
//
// auto* transpose1_out_transposed = builder.MakeIntermediate();
// auto* resize_out_Y = builder.MakeIntermediate();
// auto* output = builder.MakeOutput();
//
// auto& transpose_1 = builder.AddNode("Transpose", {input}, {transpose1_out_transposed});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// builder.AddNode("Resize",
// {transpose1_out_transposed, resize_in_roi, resize_in_scales, resize_in_sizes},
// {resize_out_Y});
// auto& transpose_2 = builder.AddNode("Transpose", {resize_out_Y}, {output});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1);
// }
//
// TEST(TransposeOptimizerTests, TestResizeNonconst) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* input1_arg = MakeInput<float>(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
// auto* input2_arg = MakeInput<float>(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0});
// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 11);
// }
//
// TEST(TransposeOptimizerTests, TestResizeNonconstOpset13) {
// auto build_test_case_1 = [&](ModelTestBuilder& builder) {
// auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
// auto* input1_arg = MakeInput<float>(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
// auto* input2_arg = MakeInput<float>(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f});
// auto* transpose_1_out_0 = builder.MakeIntermediate();
// auto* resize_1_out_0 = builder.MakeIntermediate();
// auto* transpose_2_out_0 = builder.MakeOutput();
//
// auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
// transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
// auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0});
// resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
// auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
// transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
// };
//
// auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
// int transpose_cost = EstimateTransposeCost(session.GetGraph());
// EXPECT_EQ(transpose_cost, 0);
// };
//
// TransformerTester(build_test_case_1,
// check_optimized_graph_1,
// TransformerLevel::Default,
// TransformerLevel::Level1,
// /*opset_version*/ 13);
// }
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
builder.AddNode("Resize", {transpose_1_out_0, const_1}, {resize_1_out_0});
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 10);
}
TEST(TransposeOptimizerTests, TestResizeOpset11) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* const_1 = builder.MakeInitializer<float>({8}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto* const_2 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
builder.AddNode("Resize", {transpose_1_out_0, const_1, const_2}, {resize_1_out_0});
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 11);
}
TEST(TransposeOptimizerTests, TestResizeOpset15) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* const_1 = builder.MakeInitializer<float>({4}, {0.3f, 2.5f, 1.0f, 0.7f});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
auto empty_arg = NodeArg("", nullptr);
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
builder.AddNode("Resize", {transpose_1_out_0, &empty_arg, const_1}, {resize_1_out_0});
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 15);
}
TEST(TransposeOptimizerTests, TestResizeSizeRoi) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* const_1 = builder.MakeInitializer<float>({8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
auto* const_2 = builder.MakeInitializer<int64_t>({4}, {10, 9, 8, 7});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
auto empty_arg = NodeArg("", nullptr);
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, const_1, &empty_arg, const_2}, {resize_1_out_0});
resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 15);
}
TEST(TransposeOptimizerTests, TestResizeRoiScalesZeroRank0) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input = builder.MakeInput<uint8_t>({1, 512, 512, 3},
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
auto* resize_in_roi = builder.MakeInitializer<float>({0}, {});
auto* resize_in_scales = builder.MakeInitializer<float>({0}, {});
auto* resize_in_sizes = builder.MakeInitializer<int64_t>({4}, {1, 256, 32, 32});
auto* transpose1_out_transposed = builder.MakeIntermediate();
auto* resize_out_Y = builder.MakeIntermediate();
auto* output = builder.MakeOutput();
auto& transpose_1 = builder.AddNode("Transpose", {input}, {transpose1_out_transposed});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
builder.AddNode("Resize",
{transpose1_out_transposed, resize_in_roi, resize_in_scales, resize_in_sizes},
{resize_out_Y});
auto& transpose_2 = builder.AddNode("Transpose", {resize_out_Y}, {output});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1);
}
TEST(TransposeOptimizerTests, TestResizeNonconst) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* input1_arg = MakeInput<float>(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
auto* input2_arg = MakeInput<float>(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0});
resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 11);
}
TEST(TransposeOptimizerTests, TestResizeNonconstOpset13) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
auto* input1_arg = MakeInput<float>(builder, {{8}}, {8}, {0.1f, 0.2f, 0.3f, 0.4f, 0.9f, 0.8f, 0.7f, 0.6f});
auto* input2_arg = MakeInput<float>(builder, {{4}}, {4}, {0.3f, 2.5f, 1.0f, 0.7f});
auto* transpose_1_out_0 = builder.MakeIntermediate();
auto* resize_1_out_0 = builder.MakeIntermediate();
auto* transpose_2_out_0 = builder.MakeOutput();
auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0});
transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
auto& resize_1 = builder.AddNode("Resize", {transpose_1_out_0, input1_arg, input2_arg}, {resize_1_out_0});
resize_1.AddAttribute("coordinate_transformation_mode", "tf_crop_and_resize");
auto& transpose_2 = builder.AddNode("Transpose", {resize_1_out_0}, {transpose_2_out_0});
transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
};
auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) {
int transpose_cost = EstimateTransposeCost(session.GetGraph());
EXPECT_EQ(transpose_cost, 0);
};
TransformerTester(build_test_case_1,
check_optimized_graph_1,
TransformerLevel::Default,
TransformerLevel::Level1,
/*opset_version*/ 13);
}
TEST(TransposeOptimizerTests, TestAdd) {
auto build_test_case_1 = [&](ModelTestBuilder& builder) {
@ -3882,6 +3879,5 @@ TEST(TransposeOptimizerTests, RegressionTest_GitHubIssue10305) {
ASSERT_STATUS_OK(session_object.Load(model_uri));
ASSERT_STATUS_OK(session_object.Initialize()); // optimizers run during initialization
}
} // namespace test
} // namespace onnxruntime

View file

@ -292,7 +292,42 @@ TEST(UpsampleOpTest, UpsampleOp4DBilinearTest) {
7.0f, 7.5f, 8.0f, 8.5f, 9.0f, 9.0f, 9.0f, 9.0f};
test.AddOutput<float>("Y", {N, C, (int64_t)(H * scales[2]), (int64_t)(W * scales[3])}, Y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch
}
TEST(UpsampleOpTest, UpsampleOp4DNhwcBilinearTest) {
OpTester test("Upsample");
std::vector<float> scales{1.0f, 2.0f, 4.0f, 1.0f};
test.AddAttribute("mode", "linear");
test.AddAttribute("scales", scales);
constexpr int64_t N = 2, H = 2, W = 3, C = 1;
std::vector<float> X = {1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f,
7.0f, 8.0f, 9.0f,
10.0f, 11.0f, 12.0f};
test.AddInput<float>("X", {N, H, W, C}, X);
std::vector<float> Y = {
1.0f, 1.25f, 1.5f, 1.75f, 2.0f, 2.25f, 2.5f, 2.75f, 3.0f, 3.0f, 3.0f, 3.0f,
2.5f, 2.75f, 3.0f, 3.25f, 3.5f, 3.75f, 4.0f, 4.25f, 4.5f, 4.5f, 4.5f, 4.5f,
4.0f, 4.25f, 4.5f, 4.75f, 5.0f, 5.25f, 5.5f, 5.75f, 6.0f, 6.0f, 6.0f, 6.0f,
4.0f, 4.25f, 4.5f, 4.75f, 5.0f, 5.25f, 5.5f, 5.75f, 6.0f, 6.0f, 6.0f, 6.0f,
7.0f, 7.25f, 7.5f, 7.75f, 8.0f, 8.25f, 8.5f, 8.75f, 9.0f, 9.0f, 9.0f, 9.0f,
8.5f, 8.75f, 9.0f, 9.25f, 9.5f, 9.75f, 10.0f, 10.25f, 10.5f, 10.5f, 10.5f, 10.5f,
10.0f, 10.25f, 10.5f, 10.75f, 11.0f, 11.25f, 11.5f, 11.75f, 12.0f, 12.0f, 12.0f, 12.0f,
10.0f, 10.25f, 10.5f, 10.75f, 11.0f, 11.25f, 11.5f, 11.75f, 12.0f, 12.0f, 12.0f, 12.0f};
test.AddOutput<float>("Y", {N, (int64_t)(H * scales[1]), (int64_t)(W * scales[2]), C}, Y);
//CUDA: result mismatch due to not implementing NHWC support
//TensorRT: results mismatch
//ROCm: results mismatch
test.Run(OpTester::ExpectResult::kExpectSuccess, "",
{kCudaExecutionProvider, kTensorrtExecutionProvider, kRocmExecutionProvider});
}
TEST(UpsampleOpTest, UpsampleOp2DBilinearTest) {
@ -315,7 +350,7 @@ TEST(UpsampleOpTest, UpsampleOp2DBilinearTest) {
3.0f, 3.5f, 4.0f, 4.5f, 5.0f, 5.0f, 5.0f, 5.0f};
test.AddOutput<float>("Y", {(int64_t)(H * scales[0]), (int64_t)(W * scales[1])}, Y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: results mismatch
}
TEST(UpsampleOpTest, UpsampleOp4DBilinearTest_ScalesNoOp) {