Fix bug in the Resize operator kernels (#5303)

2026-07-13 18:08:13 +00:00 · 2020-09-30 15:33:33 -07:00 · 2020-09-30 15:33:33 -07:00 · 383b1e207c
commit 383b1e207c
parent 3f00b8db8f
3 changed files with 117 additions and 42 deletions
--- a/onnxruntime/core/providers/cpu/tensor/upsample.cc
+++ b/onnxruntime/core/providers/cpu/tensor/upsample.cc
@ -114,6 +114,17 @@ Status UpsampleNearest(const T* input,
  auto CalculateInputMapping =
      [n_dim, &input_shape, &output_shape, &input_dim_factor, &scales, &roi, extrapolation_enabled, &get_original_coordinate, &get_nearest_pixel](
          std::vector<int64_t>& input_mapping, const int64_t axis) {
+        // When scale is 1.0, there is a one-to-one mapping between the dimension
+        // in the input and the output and there is no need to apply the co-ordinate
+        // transformation which should only be done when there is "resizing" required
+        if (scales[axis] == 1.0f) {
+          for (int64_t dim = 0; dim < output_shape[axis]; dim++) {
+            input_mapping[dim] = dim * input_dim_factor[axis];
+          }
+          return;
+        }
+
+        // scale != 1.0
        const int64_t input_size = input_dim_factor[0] * input_shape[0];
        for (int64_t dim = 0; dim < output_shape[axis]; dim++) {
          float original_dim = get_original_coordinate(static_cast<float>(dim), scales[axis], static_cast<float>(output_shape[axis]),
@ -211,7 +222,7 @@ Status UpsampleNearest(const T* input,
        break;
      }
      output_dim_counter[dim_idx] = 0;
-      input_idx += input_mappings[dim_idx][0 /* output_dim_counter[dim_idx] */ ];
+      input_idx += input_mappings[dim_idx][0 /* output_dim_counter[dim_idx] */];
    }
  }

@ -331,9 +342,11 @@ void UpsampleBilinear(int64_t batch_size,
  auto roi_y_start = roi.size() / 2 - 2;
  auto roi_y_end = roi.size() - 2;
  for (int64_t y = 0; y < output_height; ++y) {
-    float in_y = get_original_coordinate(static_cast<float>(y), height_scale,
-                                         static_cast<float>(output_height), static_cast<float>(input_height),
-                                         roi[roi_y_start], roi[roi_y_end]);
+    float in_y = height_scale == 1 ? static_cast<float>(y)
+                                   : get_original_coordinate(static_cast<float>(y), height_scale,
+                                                             static_cast<float>(output_height),
+                                                             static_cast<float>(input_height),
+                                                             roi[roi_y_start], roi[roi_y_end]);
    y_original.emplace_back(in_y);
    in_y = std::max(0.0f, std::min(in_y, static_cast<float>(input_height - 1)));

@ -354,9 +367,12 @@ void UpsampleBilinear(int64_t batch_size,
  auto roi_x_start = roi.size() / 2 - 1;
  auto roi_x_end = roi.size() - 1;
  for (int64_t x = 0; x < output_width; ++x) {
-    float in_x = get_original_coordinate(static_cast<float>(x), width_scale,
-                                         static_cast<float>(output_width), static_cast<float>(input_width),
-                                         roi[roi_x_start], roi[roi_x_end]);
+    float in_x = width_scale == 1 ? static_cast<float>(x)
+                                  : get_original_coordinate(static_cast<float>(x),
+                                                            width_scale,
+                                                            static_cast<float>(output_width),
+                                                            static_cast<float>(input_width),
+                                                            roi[roi_x_start], roi[roi_x_end]);
    x_original.emplace_back(in_x);
    in_x = std::max(0.0f, std::min(in_x, static_cast<float>(input_width - 1)));

@ -485,9 +501,11 @@ void ResizeBiCubic(

  // generate coefficients in y direction
  for (int64_t y = 0; y < output_height; ++y) {
-    float in_y = get_original_coordinate(static_cast<float>(y), height_scale,
-                                         static_cast<float>(output_height), static_cast<float>(input_height),
-                                         roi[roi_y_start], roi[roi_y_end]);
+    float in_y = height_scale == 1 ? static_cast<float>(y)
+                                   : get_original_coordinate(static_cast<float>(y), height_scale,
+                                                             static_cast<float>(output_height),
+                                                             static_cast<float>(input_height),
+                                                             roi[roi_y_start], roi[roi_y_end]);
    y_original.emplace_back(in_y);
    auto s = y_original[y] - std::floor(y_original[y]);
    if (cubic_coeffs.find(s) == cubic_coeffs.end()) {
@ -498,9 +516,12 @@ void ResizeBiCubic(

  // generate coefficients in x direction
  for (int64_t x = 0; x < output_width; ++x) {
-    float in_x = get_original_coordinate(static_cast<float>(x), width_scale,
-                                         static_cast<float>(output_width), static_cast<float>(input_width),
-                                         roi[roi_x_start], roi[roi_x_end]);
+    float in_x = width_scale == 1 ? static_cast<float>(x)
+                                  : get_original_coordinate(static_cast<float>(x),
+                                                            width_scale,
+                                                            static_cast<float>(output_width),
+                                                            static_cast<float>(input_width),
+                                                            roi[roi_x_start], roi[roi_x_end]);
    x_original.emplace_back(in_x);
    auto s = x_original[x] - std::floor(x_original[x]);
    if (cubic_coeffs.find(s) == cubic_coeffs.end()) {
--- a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
+++ b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
@ -149,21 +149,37 @@ __global__ void _ResizeNearestMappingKernel2D(
  CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, output_height + output_width);
  if (id >= 0 && id < output_height) {  // for Height
    int dim = id;
-    float orig_coord = transform_coordinate(static_cast<float>(dim), scales_height, static_cast<float>(output_height),
-                                            static_cast<float>(input_height), roi_start_height, roi_end_height);
-    dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_height - 1)));
-    dim = calc_nearest_pixel(orig_coord, scales_height < 1);
-    if (dim >= input_height) dim = input_height - 1;
-    if (dim < 0) dim = 0;
+
+    // only apply co-ordinate transformation if scale != 1.0
+    if (scales_height == 1.0f) {
+        dims_mapping[id].extrapolate_ = 0;
+    } else {
+      float orig_coord = transform_coordinate(static_cast<float>(dim), scales_height, static_cast<float>(output_height),
+                                              static_cast<float>(input_height), roi_start_height, roi_end_height);
+      dims_mapping[id].extrapolate_ = static_cast<int>(
+          extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_height - 1)));
+      dim = calc_nearest_pixel(orig_coord, scales_height < 1);
+      if (dim >= input_height) dim = input_height - 1;
+      if (dim < 0) dim = 0;    
+    }
+
    dims_mapping[id].origin_ = dim;
  } else {
    int dim = id - output_height;
-    float orig_coord = transform_coordinate(static_cast<float>(dim), scales_width, static_cast<float>(output_width),
-                                            static_cast<float>(input_width), roi_start_width, roi_end_width);
-    dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_width - 1)));
-    dim = calc_nearest_pixel(orig_coord, scales_width < 1);
-    if (dim >= input_width) dim = input_width - 1;
-    if (dim < 0) dim = 0;
+
+    // only apply co-ordinate transformation if scale != 1.0
+    if (scales_width == 1.0f) {
+      dims_mapping[id].extrapolate_ = 0;
+    } else {
+      float orig_coord = transform_coordinate(static_cast<float>(dim), scales_width, static_cast<float>(output_width),
+                                              static_cast<float>(input_width), roi_start_width, roi_end_width);
+      dims_mapping[id].extrapolate_ = static_cast<int>(
+          extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_width - 1)));
+      dim = calc_nearest_pixel(orig_coord, scales_width < 1);
+      if (dim >= input_width) dim = input_width - 1;
+      if (dim < 0) dim = 0; 
+    }
+
    dims_mapping[id].origin_ = dim;
    return;
  }
@ -190,12 +206,19 @@ __global__ void _ResizeNearestMappingKernel(
    }
    if (id >= dim_sum && id < dim_sum + output_shape[axis]) {
      int dim = id - dim_sum;
-      float orig_coord = transform_coordinate(static_cast<float>(dim), scales[axis], static_cast<float>(output_shape[axis]),
-                                              static_cast<float>(input_shape[axis]), roi[axis], roi[axis + rank]);
-      dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_shape[axis] - 1)));
-      dim = calc_nearest_pixel(orig_coord, scales[axis] < 1);
-      if (dim >= input_shape[axis]) dim = input_shape[axis] - 1;
-      if (dim < 0) dim = 0;
+
+      // only apply co-ordinate transformation if scale != 1.0
+      if (scales[axis] == 1.0f) {
+        dims_mapping[id].extrapolate_ = 0;
+      } else {
+        float orig_coord = transform_coordinate(static_cast<float>(dim), scales[axis], static_cast<float>(output_shape[axis]),
+                                                static_cast<float>(input_shape[axis]), roi[axis], roi[axis + rank]);
+        dims_mapping[id].extrapolate_ = static_cast<int>(extrapolation_enabled && (orig_coord < 0.f || orig_coord > static_cast<float>(input_shape[axis] - 1)));
+        dim = calc_nearest_pixel(orig_coord, scales[axis] < 1);
+        if (dim >= input_shape[axis]) dim = input_shape[axis] - 1;
+        if (dim < 0) dim = 0;      
+      }
+
      dims_mapping[id].origin_ = dim;
      return;
    }
@ -271,18 +294,20 @@ __global__ void _ResizeBilinearCoordinateMapping(
    BilinearMappingInfo* dims_mapping) {
  CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, SumHW);
  if (id < output_height) {  //  y = id
-    float input_y = transform_coordinate(static_cast<float>(id), scale_height,
-                                         static_cast<float>(output_height), static_cast<float>(input_height),
-                                         roi_height_start, roi_height_end);
+    float input_y = scale_height == 1 ? static_cast<float>(id) :
+                                        transform_coordinate(static_cast<float>(id), scale_height,
+                                        static_cast<float>(output_height), static_cast<float>(input_height),
+                                        roi_height_start, roi_height_end);
    dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_y < 0 || input_y > static_cast<float>(input_height - 1)));
    input_y = max(0.0f, min(input_y, static_cast<float>(input_height - 1)));
    int y_int = static_cast<int>(input_y);
    dims_mapping[id].origin_ = y_int;
    dims_mapping[id].weight_ = (y_int >= input_height - 1) ? 0.5f : input_y - y_int;
  } else {  //x = id - output_height
-    float input_x = transform_coordinate(static_cast<float>(id - output_height), scale_width,
-                                         static_cast<float>(output_width), static_cast<float>(input_width),
-                                         roi_width_start, roi_width_end);
+    float input_x = scale_width == 1 ? static_cast<float>(id - output_height) :
+                                       transform_coordinate(static_cast<float>(id - output_height), scale_width,
+                                       static_cast<float>(output_width), static_cast<float>(input_width),
+                                       roi_width_start, roi_width_end);
    dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_x < 0 || input_x > static_cast<float>(input_width - 1)));
    input_x = max(0.0f, min(input_x, static_cast<float>(input_width - 1)));
    int x_int = static_cast<int>(input_x);
@ -370,9 +395,11 @@ __global__ void _ResizeCubicCoordinateMapping(
  bool is_y_axis = (id < output_height);
  int max_input_coord = static_cast<int>(is_y_axis ? input_height : input_width);

-  float input_coordinat = transform_coordinate(
+  float scale = is_y_axis ? scale_height : scale_width;
+  float input_coordinat = scale == 1 ? (is_y_axis ? id : id - output_height) :
+      transform_coordinate(
      static_cast<float>(is_y_axis ? id : id - output_height),
-      (is_y_axis ? scale_height : scale_width),
+      scale,
      static_cast<float>(is_y_axis ? output_height : output_width),
      static_cast<float>(max_input_coord),
      (is_y_axis ? roi_height_start : roi_width_start),
--- a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc
@ -135,7 +135,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_pytorch_half_pixel)
  std::vector<float> Y = {1.6666666f, 7.0f, 12.333333f};

  test.AddOutput<float>("Y", {sizes[0], sizes[1]}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: results mismatch
 }

 TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_asymmetric) {
@ -438,7 +438,6 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample5dTest_WithSizes_CeilMode) {
  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider});
 }

-
 TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) {
  OpTester test("Resize", 11);

@ -473,6 +472,34 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) {
  test.Run();
 }

+TEST(ResizeOpTest, ResizeOpNearest_OneToOneMappingBetweenInputAndOutputDataDims) {
+  OpTester test("Resize", 11);
+
+  std::vector<float> roi{};
+  // There is one-to-one mapping in the outermost dim.
+  // This test is to ensure that the co-ordinate transformation is not applied to the
+  // outermost dim as there is no "resizing".
+  // If it were applied using the provided attributes ,it would result in result mismatch
+  std::vector<float> scales{1.0f, 0.5f};
+
+  test.AddAttribute("mode", "nearest");
+  test.AddAttribute("coordinate_transformation_mode", "tf_half_pixel_for_nn");
+  test.AddAttribute("nearest_mode", "ceil");
+
+  const int64_t C = 2, D = 3;
+  std::vector<float> X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+
+  test.AddInput<float>("X", {C, D}, X);
+  test.AddInput<float>("roi", {0}, roi);
+  test.AddInput<float>("scales", {2}, scales);
+
+  // would produce {5.0f, 5.0f} if co-ordinate transformation was applied
+  // to the outermost dim
+  std::vector<float> Y = {2.0f, 5.0f};
+  test.AddOutput<float>("Y", {2, 1}, Y);
+  test.Run();
+}
+
 // custom OpTester to make 'scales' or 'sizes' a constant initializer if needed
 class ResizeOpTester : public OpTester {
 public:
@ -573,7 +600,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Nearest2xOptimization_Sizes) {
                            3.0f, 3.0f, 4.0f, 4.0f};

    test.AddOutput<float>("Y", {N, C, sizes[2], sizes[3]}, Y);
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: results mismatch
  };

  run_test(false);