From 383b1e207c772f6afd0344e4a7dd73e68f63a5be Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Wed, 30 Sep 2020 15:33:33 -0700 Subject: [PATCH] Fix bug in the Resize operator kernels (#5303) --- .../core/providers/cpu/tensor/upsample.cc | 47 ++++++++--- .../core/providers/cuda/tensor/resize_impl.cu | 79 +++++++++++++------ .../providers/cpu/tensor/resize_op_test.cc | 33 +++++++- 3 files changed, 117 insertions(+), 42 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/upsample.cc b/onnxruntime/core/providers/cpu/tensor/upsample.cc index 4e9871af43..5f443e87ae 100644 --- a/onnxruntime/core/providers/cpu/tensor/upsample.cc +++ b/onnxruntime/core/providers/cpu/tensor/upsample.cc @@ -114,6 +114,17 @@ Status UpsampleNearest(const T* input, auto CalculateInputMapping = [n_dim, &input_shape, &output_shape, &input_dim_factor, &scales, &roi, extrapolation_enabled, &get_original_coordinate, &get_nearest_pixel]( std::vector& input_mapping, const int64_t axis) { + // When scale is 1.0, there is a one-to-one mapping between the dimension + // in the input and the output and there is no need to apply the co-ordinate + // transformation which should only be done when there is "resizing" required + if (scales[axis] == 1.0f) { + for (int64_t dim = 0; dim < output_shape[axis]; dim++) { + input_mapping[dim] = dim * input_dim_factor[axis]; + } + return; + } + + // scale != 1.0 const int64_t input_size = input_dim_factor[0] * input_shape[0]; for (int64_t dim = 0; dim < output_shape[axis]; dim++) { float original_dim = get_original_coordinate(static_cast(dim), scales[axis], static_cast(output_shape[axis]), @@ -211,7 +222,7 @@ Status UpsampleNearest(const T* input, break; } output_dim_counter[dim_idx] = 0; - input_idx += input_mappings[dim_idx][0 /* output_dim_counter[dim_idx] */ ]; + input_idx += input_mappings[dim_idx][0 /* output_dim_counter[dim_idx] */]; } } @@ -331,9 +342,11 @@ void UpsampleBilinear(int64_t batch_size, auto roi_y_start = roi.size() / 2 - 2; auto roi_y_end = roi.size() - 2; for (int64_t y = 0; y < output_height; ++y) { - float in_y = get_original_coordinate(static_cast(y), height_scale, - static_cast(output_height), static_cast(input_height), - roi[roi_y_start], roi[roi_y_end]); + float in_y = height_scale == 1 ? static_cast(y) + : get_original_coordinate(static_cast(y), height_scale, + static_cast(output_height), + static_cast(input_height), + roi[roi_y_start], roi[roi_y_end]); y_original.emplace_back(in_y); in_y = std::max(0.0f, std::min(in_y, static_cast(input_height - 1))); @@ -354,9 +367,12 @@ void UpsampleBilinear(int64_t batch_size, auto roi_x_start = roi.size() / 2 - 1; auto roi_x_end = roi.size() - 1; for (int64_t x = 0; x < output_width; ++x) { - float in_x = get_original_coordinate(static_cast(x), width_scale, - static_cast(output_width), static_cast(input_width), - roi[roi_x_start], roi[roi_x_end]); + float in_x = width_scale == 1 ? static_cast(x) + : get_original_coordinate(static_cast(x), + width_scale, + static_cast(output_width), + static_cast(input_width), + roi[roi_x_start], roi[roi_x_end]); x_original.emplace_back(in_x); in_x = std::max(0.0f, std::min(in_x, static_cast(input_width - 1))); @@ -485,9 +501,11 @@ void ResizeBiCubic( // generate coefficients in y direction for (int64_t y = 0; y < output_height; ++y) { - float in_y = get_original_coordinate(static_cast(y), height_scale, - static_cast(output_height), static_cast(input_height), - roi[roi_y_start], roi[roi_y_end]); + float in_y = height_scale == 1 ? static_cast(y) + : get_original_coordinate(static_cast(y), height_scale, + static_cast(output_height), + static_cast(input_height), + roi[roi_y_start], roi[roi_y_end]); y_original.emplace_back(in_y); auto s = y_original[y] - std::floor(y_original[y]); if (cubic_coeffs.find(s) == cubic_coeffs.end()) { @@ -498,9 +516,12 @@ void ResizeBiCubic( // generate coefficients in x direction for (int64_t x = 0; x < output_width; ++x) { - float in_x = get_original_coordinate(static_cast(x), width_scale, - static_cast(output_width), static_cast(input_width), - roi[roi_x_start], roi[roi_x_end]); + float in_x = width_scale == 1 ? static_cast(x) + : get_original_coordinate(static_cast(x), + width_scale, + static_cast(output_width), + static_cast(input_width), + roi[roi_x_start], roi[roi_x_end]); x_original.emplace_back(in_x); auto s = x_original[x] - std::floor(x_original[x]); if (cubic_coeffs.find(s) == cubic_coeffs.end()) { diff --git a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu index 5bb720aebf..c3521d1644 100644 --- a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu +++ b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu @@ -149,21 +149,37 @@ __global__ void _ResizeNearestMappingKernel2D( CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, output_height + output_width); if (id >= 0 && id < output_height) { // for Height int dim = id; - float orig_coord = transform_coordinate(static_cast(dim), scales_height, static_cast(output_height), - static_cast(input_height), roi_start_height, roi_end_height); - dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_height - 1))); - dim = calc_nearest_pixel(orig_coord, scales_height < 1); - if (dim >= input_height) dim = input_height - 1; - if (dim < 0) dim = 0; + + // only apply co-ordinate transformation if scale != 1.0 + if (scales_height == 1.0f) { + dims_mapping[id].extrapolate_ = 0; + } else { + float orig_coord = transform_coordinate(static_cast(dim), scales_height, static_cast(output_height), + static_cast(input_height), roi_start_height, roi_end_height); + dims_mapping[id].extrapolate_ = static_cast( + extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_height - 1))); + dim = calc_nearest_pixel(orig_coord, scales_height < 1); + if (dim >= input_height) dim = input_height - 1; + if (dim < 0) dim = 0; + } + dims_mapping[id].origin_ = dim; } else { int dim = id - output_height; - float orig_coord = transform_coordinate(static_cast(dim), scales_width, static_cast(output_width), - static_cast(input_width), roi_start_width, roi_end_width); - dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_width - 1))); - dim = calc_nearest_pixel(orig_coord, scales_width < 1); - if (dim >= input_width) dim = input_width - 1; - if (dim < 0) dim = 0; + + // only apply co-ordinate transformation if scale != 1.0 + if (scales_width == 1.0f) { + dims_mapping[id].extrapolate_ = 0; + } else { + float orig_coord = transform_coordinate(static_cast(dim), scales_width, static_cast(output_width), + static_cast(input_width), roi_start_width, roi_end_width); + dims_mapping[id].extrapolate_ = static_cast( + extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_width - 1))); + dim = calc_nearest_pixel(orig_coord, scales_width < 1); + if (dim >= input_width) dim = input_width - 1; + if (dim < 0) dim = 0; + } + dims_mapping[id].origin_ = dim; return; } @@ -190,12 +206,19 @@ __global__ void _ResizeNearestMappingKernel( } if (id >= dim_sum && id < dim_sum + output_shape[axis]) { int dim = id - dim_sum; - float orig_coord = transform_coordinate(static_cast(dim), scales[axis], static_cast(output_shape[axis]), - static_cast(input_shape[axis]), roi[axis], roi[axis + rank]); - dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_shape[axis] - 1))); - dim = calc_nearest_pixel(orig_coord, scales[axis] < 1); - if (dim >= input_shape[axis]) dim = input_shape[axis] - 1; - if (dim < 0) dim = 0; + + // only apply co-ordinate transformation if scale != 1.0 + if (scales[axis] == 1.0f) { + dims_mapping[id].extrapolate_ = 0; + } else { + float orig_coord = transform_coordinate(static_cast(dim), scales[axis], static_cast(output_shape[axis]), + static_cast(input_shape[axis]), roi[axis], roi[axis + rank]); + dims_mapping[id].extrapolate_ = static_cast(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast(input_shape[axis] - 1))); + dim = calc_nearest_pixel(orig_coord, scales[axis] < 1); + if (dim >= input_shape[axis]) dim = input_shape[axis] - 1; + if (dim < 0) dim = 0; + } + dims_mapping[id].origin_ = dim; return; } @@ -271,18 +294,20 @@ __global__ void _ResizeBilinearCoordinateMapping( BilinearMappingInfo* dims_mapping) { CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, SumHW); if (id < output_height) { // y = id - float input_y = transform_coordinate(static_cast(id), scale_height, - static_cast(output_height), static_cast(input_height), - roi_height_start, roi_height_end); + float input_y = scale_height == 1 ? static_cast(id) : + transform_coordinate(static_cast(id), scale_height, + static_cast(output_height), static_cast(input_height), + roi_height_start, roi_height_end); dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_y < 0 || input_y > static_cast(input_height - 1))); input_y = max(0.0f, min(input_y, static_cast(input_height - 1))); int y_int = static_cast(input_y); dims_mapping[id].origin_ = y_int; dims_mapping[id].weight_ = (y_int >= input_height - 1) ? 0.5f : input_y - y_int; } else { //x = id - output_height - float input_x = transform_coordinate(static_cast(id - output_height), scale_width, - static_cast(output_width), static_cast(input_width), - roi_width_start, roi_width_end); + float input_x = scale_width == 1 ? static_cast(id - output_height) : + transform_coordinate(static_cast(id - output_height), scale_width, + static_cast(output_width), static_cast(input_width), + roi_width_start, roi_width_end); dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_x < 0 || input_x > static_cast(input_width - 1))); input_x = max(0.0f, min(input_x, static_cast(input_width - 1))); int x_int = static_cast(input_x); @@ -370,9 +395,11 @@ __global__ void _ResizeCubicCoordinateMapping( bool is_y_axis = (id < output_height); int max_input_coord = static_cast(is_y_axis ? input_height : input_width); - float input_coordinat = transform_coordinate( + float scale = is_y_axis ? scale_height : scale_width; + float input_coordinat = scale == 1 ? (is_y_axis ? id : id - output_height) : + transform_coordinate( static_cast(is_y_axis ? id : id - output_height), - (is_y_axis ? scale_height : scale_width), + scale, static_cast(is_y_axis ? output_height : output_width), static_cast(max_input_coord), (is_y_axis ? roi_height_start : roi_width_start), diff --git a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc index 72f7b03074..33a5d7f302 100644 --- a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc @@ -135,7 +135,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_pytorch_half_pixel) std::vector Y = {1.6666666f, 7.0f, 12.333333f}; test.AddOutput("Y", {sizes[0], sizes[1]}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch } TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_asymmetric) { @@ -438,7 +438,6 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample5dTest_WithSizes_CeilMode) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider}); } - TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) { OpTester test("Resize", 11); @@ -473,6 +472,34 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) { test.Run(); } +TEST(ResizeOpTest, ResizeOpNearest_OneToOneMappingBetweenInputAndOutputDataDims) { + OpTester test("Resize", 11); + + std::vector roi{}; + // There is one-to-one mapping in the outermost dim. + // This test is to ensure that the co-ordinate transformation is not applied to the + // outermost dim as there is no "resizing". + // If it were applied using the provided attributes ,it would result in result mismatch + std::vector scales{1.0f, 0.5f}; + + test.AddAttribute("mode", "nearest"); + test.AddAttribute("coordinate_transformation_mode", "tf_half_pixel_for_nn"); + test.AddAttribute("nearest_mode", "ceil"); + + const int64_t C = 2, D = 3; + std::vector X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + + test.AddInput("X", {C, D}, X); + test.AddInput("roi", {0}, roi); + test.AddInput("scales", {2}, scales); + + // would produce {5.0f, 5.0f} if co-ordinate transformation was applied + // to the outermost dim + std::vector Y = {2.0f, 5.0f}; + test.AddOutput("Y", {2, 1}, Y); + test.Run(); +} + // custom OpTester to make 'scales' or 'sizes' a constant initializer if needed class ResizeOpTester : public OpTester { public: @@ -573,7 +600,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Nearest2xOptimization_Sizes) { 3.0f, 3.0f, 4.0f, 4.0f}; test.AddOutput("Y", {N, C, sizes[2], sizes[3]}, Y); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch }; run_test(false);