diff --git a/onnxruntime/core/providers/cpu/nn/conv_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_attributes.h index b31030acc5..0c20916371 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/conv_attributes.h @@ -190,7 +190,8 @@ struct ConvAttributes { bool& post_slicing_needed, TensorShapeVector& slice_starts, TensorShapeVector& slice_ends, - TensorShapeVector& slice_axes) const { + TensorShapeVector& slice_axes, + bool channels_last = false) const { size_t rank = input_shape.NumDimensions(); // Make sure all "metadata" containers have the right number of elements if (rank > strides_p.size()) @@ -256,7 +257,11 @@ struct ConvAttributes { } post_slicing_needed = true; - slice_axes.push_back(static_cast(dim) + 2); + if (channels_last) { + slice_axes.push_back(static_cast(dim) + 1); + } else { + slice_axes.push_back(static_cast(dim) + 2); + } slice_starts.push_back(excess_output_head); slice_ends.push_back(excess_output_head + output_dim_size); // we may modify this below output_shape_with_revised_pads.push_back(excess_output_head + output_dim_size); // we may modify this below @@ -286,7 +291,11 @@ struct ConvAttributes { // Head has not been over-padded. Only tail pads need to be modified. post_slicing_needed = true; - slice_axes.push_back(static_cast(dim) + 2); + if (channels_last) { + slice_axes.push_back(static_cast(dim) + 1); + } else { + slice_axes.push_back(static_cast(dim) + 2); + } slice_starts.push_back(0); slice_ends.push_back(output_dim_size - revised_dim_size); } diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc index bb2755f54a..81db3c4186 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.cc +++ b/onnxruntime/core/providers/cuda/nn/conv.cc @@ -88,7 +88,7 @@ Status SliceOutUnwantedOutputSection(cudaStream_t stream, template Status Conv::UpdateState(OpKernelContext* context, bool bias_expected) const { - //set X + // set X const Tensor* X = context->Input(0); const TensorShape& x_shape = X->Shape(); const auto x_dims = x_shape.AsShapeVector(); @@ -180,7 +180,8 @@ Status Conv::UpdateState(OpKernelContext* context, bool bias_expected) TensorShapeVector y_dims_with_adjusted_pads(y_dims); ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShapeWithAdjustedPads(spatial_shape, kernel_shape, strides, dilations, pads, y_dims, y_dims_with_adjusted_pads, - post_slicing_required, slice_starts, slice_ends, slice_axes)); + post_slicing_required, slice_starts, slice_ends, slice_axes, + channels_last)); if (channels_last) { y_dims.push_back(M); y_dims_with_adjusted_pads.push_back(M); @@ -287,7 +288,7 @@ Status Conv::UpdateState(OpKernelContext* context, bool bias_expected) TensorShapeVector b_dims(2 + kernel_shape.size(), 1); b_dims[1] = b_shape[0]; ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType())); - //s_.b_data = reinterpret_cast(B->Data()); + // s_.b_data = reinterpret_cast(B->Data()); } else if (bias_expected) { TensorShapeVector b_dims(2 + kernel_shape.size(), 1); b_dims[1] = w_dims[0]; diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc index 2468e3f048..6846813c7c 100644 --- a/onnxruntime/core/providers/rocm/nn/conv.cc +++ b/onnxruntime/core/providers/rocm/nn/conv.cc @@ -1,5 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. + #include "core/providers/rocm/nn/conv.h" #include "core/common/span_utils.h" #include "core/providers/rocm/nn/conv_impl.h" @@ -180,7 +181,8 @@ Status Conv::UpdateState(OpKernelContext* context, bool bias_expected) TensorShapeVector y_dims_with_adjusted_pads(y_dims); ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShapeWithAdjustedPads(spatial_shape, kernel_shape, strides, dilations, pads, y_dims, y_dims_with_adjusted_pads, - post_slicing_required, slice_starts, slice_ends, slice_axes)); + post_slicing_required, slice_starts, slice_ends, slice_axes, + channels_last)); if (channels_last) { y_dims.push_back(M); @@ -346,19 +348,11 @@ Status Conv::ComputeInternal(OpKernelContext* context) const { s_.y_data, workspace.get(), s_.workspace_bytes)); - if (nullptr != s_.b_data) { - constexpr bool channels_last = NHWC; - if (channels_last) { - const Tensor* B = context->Input(2); - const auto& b_shape = B->Shape(); - ConvBiasImpl(Stream(context), reinterpret_cast(s_.Y->MutableData()), - reinterpret_cast(B->Data()), - reinterpret_cast(s_.Y->MutableData()), b_shape[0], s_.Y->Shape().Size()); - } else { - MIOPEN_RETURN_IF_ERROR(miopenConvolutionForwardBias(miopen_handle, &alpha, s_.b_tensor, s_.b_data, - &beta, s_.y_tensor, s_.y_data)); - } + constexpr bool channels_last = NHWC; + if (nullptr != s_.b_data && !channels_last) { + MIOPEN_RETURN_IF_ERROR(miopenConvolutionForwardBias(miopen_handle, &alpha, s_.b_tensor, s_.b_data, + &beta, s_.y_tensor, s_.y_data)); } // To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions // This may have lead to extra results that are unnecessary and hence we slice that off here @@ -367,6 +361,14 @@ Status Conv::ComputeInternal(OpKernelContext* context) const { s_.Y->MutableDataRaw(), s_.y_dims.GetDims(), s_.slice_starts, s_.slice_ends, s_.slice_axes, s_.element_size)); } + if (nullptr != s_.b_data && channels_last) { + const Tensor* B = context->Input(2); + const auto& b_shape = B->Shape(); + + ConvBiasImpl(Stream(context), reinterpret_cast(s_.Y->MutableDataRaw()), + reinterpret_cast(B->Data()), + reinterpret_cast(s_.Y->MutableDataRaw()), b_shape[0], s_.Y->Shape().Size()); + } return Status::OK(); } diff --git a/onnxruntime/test/contrib_ops/nhwc_conv_op_test.cc b/onnxruntime/test/contrib_ops/nhwc_conv_op_test.cc index 3d00885dd0..a79677357c 100644 --- a/onnxruntime/test/contrib_ops/nhwc_conv_op_test.cc +++ b/onnxruntime/test/contrib_ops/nhwc_conv_op_test.cc @@ -229,5 +229,51 @@ TEST(NhwcConvTest, Conv2D_AutoPad2) { RunNhwcConv(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); } +TEST(NhwcConvTest, Conv2D_asymmetric_padding1) { + NhwcConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{3, 3}, // kernel_shape + vector{1, 1, 0, 0}, // pads + vector{1, 1}, // strides + {} // excluded EPs + }; + + vector X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + vector X_shape = {1, 3, 3, 1}; + vector W = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + vector W_shape = {1, 3, 3, 1}; + vector B = {1.f}; + vector B_shape = {1}; + vector Y_shape = {1, 2, 2, 1}; + auto expected_vals = {13.f, 22.f, 28.f, 46.f}; + + RunNhwcConv(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); +} + +TEST(NhwcConvTest, Conv2D_asymmetric_padding2) { + NhwcConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{3, 3}, // kernel_shape + vector{0, 0, 1, 1}, // pads + vector{1, 1}, // strides + {} // excluded EPs + }; + + vector X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + vector X_shape = {1, 3, 3, 1}; + vector W = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + vector W_shape = {1, 3, 3, 1}; + vector B = {1.f}; + vector B_shape = {1}; + vector Y_shape = {1, 2, 2, 1}; + auto expected_vals = {46.f, 34.f, 40.f, 29.f}; + + RunNhwcConv(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); +} + } // namespace test } // namespace onnxruntime