enable exclude outside for resize op (#2203)

* enable exclude outside for resize mode * fix centos error * updates per review + plus more data types for resize * fix typo in error message * reset wrong fix
2026-07-16 18:31:27 +00:00 · 2019-10-21 11:18:33 -07:00 · 2019-10-21 11:18:33 -07:00 · dc5efbf5ce
commit dc5efbf5ce
parent 3cda9f717b
5 changed files with 66 additions and 11 deletions
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@ -422,6 +422,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, int32_t_float_float, OneHot);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, int64_t_float_float, OneHot);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, float, Resize);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, int32_t, Resize);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, Resize);

 Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
  static const BuildKernelCreateInfoFn function_table[] = {
@ -1080,7 +1082,12 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
                                                                  int32_t_float_float, OneHot)>,
 	    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11,
                                                                  int64_t_float_float, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, float, Resize)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 
+                                                                  float, Resize)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 
+                                                                  int32_t, Resize)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 
+                                                                  uint8_t, Resize)>,
  };

  for (auto& function_table_entry : function_table) {
--- a/onnxruntime/core/providers/cpu/tensor/resize.cc
+++ b/onnxruntime/core/providers/cpu/tensor/resize.cc
@ -36,4 +36,18 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL(
    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
    Resize<float>);

+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    Resize,
+    11,
+    int32_t,
+    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<int32_t>()),
+    Resize<int32_t>);
+
+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    Resize,
+    11,
+    uint8_t,
+    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<uint8_t>()),
+    Resize<uint8_t>);
+
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/cpu/tensor/upsample.cc
+++ b/onnxruntime/core/providers/cpu/tensor/upsample.cc
@ -420,6 +420,7 @@ float CubicInterpolation1D(const T* Xdata,
                           int64_t input_height,
                           int64_t input_width,
                           std::array<float, CubicModeGridLength>& coeff_array,
+                           float coeff_sum,
                           std::unordered_map<int64_t, float>& cache) {
  // When calculating cubic interpolation we move the 4*4 grid across the original data and therefore there is
  // opportunity to cache the results for previously seen combinations.
@ -434,7 +435,7 @@ float CubicInterpolation1D(const T* Xdata,
  float result = 0;
  for (int i = 0, j = -1; i < static_cast<int>(CubicModeGridLength); i++, j++) {
    auto orig_data = GetDataForCoordinate(Xdata, x + j, y, input_height, input_width);
-    result += static_cast<float>(coeff_array[i] * orig_data);
+    result += coeff_array[i]/coeff_sum * orig_data;
  }
  cache[grid_start_pos] = result;

@ -452,6 +453,7 @@ void ResizeBiCubic(
    float cubic_coeff_a,
    bool use_extrapolation,
    float extrapolation_value,
+    bool exclude_outside,
    const std::vector<float>& roi,
    const T* Xdata,
    T* Ydata,
@ -494,6 +496,12 @@ void ResizeBiCubic(
    }
  }

+  // setup up temp arrays to hold coefficients when exclude_outside is set to true
+  std::array<float, CubicModeGridLength> y_coeff_holder;
+  std::array<float, CubicModeGridLength> x_coeff_holder;
+  float y_coeff_sum = 1;
+  float x_coeff_sum = 1;
+
  for (int64_t n = 0; n < batch_size; n++) {
    for (int64_t c = 0; c < num_channels; c++) {
      for (int64_t y = 0; y < output_height; ++y) {
@ -509,7 +517,19 @@ void ResizeBiCubic(
        }

        auto y_int = static_cast<int64_t>(std::floor(in_y));
-        auto& coeff_y = cubic_coeffs[in_y - y_int];
+        auto& coeff_y = exclude_outside ? y_coeff_holder : cubic_coeffs[in_y - y_int];
+        y_coeff_sum = 1;
+
+        if (exclude_outside) {
+          // When true, the weight of sampling locations outside the grid will be set to 0
+          // and the weight will be renormalized so that their sum is 1.0
+          y_coeff_sum = 0;
+          auto& orig_y_coeffs = cubic_coeffs[in_y - y_int];
+          for (int64_t i = 0, y_val = y_int - 1; y_val <= y_int + 2; y_val++, i++) {
+            y_coeff_holder[i] = (y_val < 0 || y_val >= static_cast<float>(input_height)) ? 0.0f : orig_y_coeffs[i];
+            y_coeff_sum += y_coeff_holder[i];
+          }
+        }

        for (int64_t x = 0; x < output_width; ++x) {
          auto in_x = x_original[x];
@ -523,7 +543,19 @@ void ResizeBiCubic(

          auto x_int = static_cast<int64_t>(std::floor(in_x));
          auto s_x = static_cast<float>(in_x - x_int);
-          auto& coeff_x = cubic_coeffs[s_x];
+          auto& coeff_x = exclude_outside ? x_coeff_holder : cubic_coeffs[s_x];
+          x_coeff_sum = 1;
+
+          if (exclude_outside) {
+            // When true, the weight of sampling locations outside the grid will be set to 0
+            // and the weight will be renormalized so that their sum is 1.0
+            x_coeff_sum = 0;
+            auto& orig_x_coeff = cubic_coeffs[s_x];
+            for (int64_t i = 0, x_val = x_int - 1; x_val <= x_int + 2; x_val++, i++) {
+              x_coeff_holder[i] = (x_val < 0 || x_val >= static_cast<float>(input_width)) ? 0.0f : orig_x_coeff[i];
+              x_coeff_sum += x_coeff_holder[i];
+            }
+          }

          // Compute cubic interpolation in x dimension using the x coefficients.
          // From the result of cubic interpolation in x dim, compute cubic interpolation in y dimension
@ -531,9 +563,9 @@ void ResizeBiCubic(
          float result = 0;
          for (int64_t y_val = y_int - 1, i = 0; y_val <= y_int + 2; y_val++, i++) {
            auto x_interpolation_result = CubicInterpolation1D(Xdata, x_int, y_val,
-                                                               input_height, input_width, coeff_x,
+                                                               input_height, input_width, coeff_x, x_coeff_sum,
                                                               interpolation_result_cache);
-            result += x_interpolation_result * coeff_y[i];
+            result += x_interpolation_result * coeff_y[i]/y_coeff_sum;
          }

          Ydata[y * output_width + x] = static_cast<T>(result);
@ -615,7 +647,7 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context, const std::vector<floa

      ResizeBiCubic(batch_size, num_channels, input_height, input_width,
                    is_2D ? scales[0] : scales[2], is_2D ? scales[1] : scales[3], cubic_coeff_a_, use_extrapolation_,
-                    extrapolation_value_, roi, X->template Data<float>(), Y->template MutableData<float>(),
+                    extrapolation_value_, exclude_outside_, roi, X->template Data<float>(), Y->template MutableData<float>(),
                    get_original_coordinate_);
      return Status::OK();
    }
--- a/onnxruntime/core/providers/cpu/tensor/upsample.h
+++ b/onnxruntime/core/providers/cpu/tensor/upsample.h
@ -59,6 +59,10 @@ class UpsampleBase {
    cubic_coeff_a_ = info.GetAttrOrDefault<float>("cubic_coeff_a", -0.75f);
    exclude_outside_ = info.GetAttrOrDefault<int64_t>("exclude_outside", 0) == 0 ? false : true;

+    if (exclude_outside_ == 1 && mode_ != CUBIC) {
+      ORT_THROW("exclude_outside can be set to 1 only when mode is CUBIC. Current mode is set to " + mode );
+    }
+
    // after version 11 update, this optimization is no longer applicable for all the available modes...
    // TODO : needs more testing to enable this for version 11
    use_nearest2x_optimization = start > 10 ? false : true;
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@ -429,10 +429,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
      {"det_2d", "not implemented yet"},
      {"det_nd", "not implemented yet"},
-      {"resize_downsample_scales_cubic_A_n0p5_exclude_outside", "not implemented yet"},
-      {"resize_downsample_scales_cubic_align_corners", "not implemented yet"},
-      {"resize_downsample_scales_linear_align_corners", "not implemented yet"},
-      {"resize_upsample_scales_cubic_A_n0p5_exclude_outside", "not implemented yet"},
+      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
+      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},