Fix Nhwcconv with asymmetric padding (#15050)

1. Fix Nhwcconv with asymmetric padding. The slice axies are (1,2) with
NHWC layout.
2. For ROCm EP, Move Addbias after SliceOutUnwantedOutputSection,
because before that, the actual output of Conv is s_.y_data.
This commit is contained in:
PeixuanZuo 2023-03-17 08:38:25 +08:00 committed by GitHub
parent 6a6513f9c0
commit 55174bb2e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 77 additions and 19 deletions

View file

@ -190,7 +190,8 @@ struct ConvAttributes {
bool& post_slicing_needed,
TensorShapeVector& slice_starts,
TensorShapeVector& slice_ends,
TensorShapeVector& slice_axes) const {
TensorShapeVector& slice_axes,
bool channels_last = false) const {
size_t rank = input_shape.NumDimensions();
// Make sure all "metadata" containers have the right number of elements
if (rank > strides_p.size())
@ -256,7 +257,11 @@ struct ConvAttributes {
}
post_slicing_needed = true;
slice_axes.push_back(static_cast<int64_t>(dim) + 2);
if (channels_last) {
slice_axes.push_back(static_cast<int64_t>(dim) + 1);
} else {
slice_axes.push_back(static_cast<int64_t>(dim) + 2);
}
slice_starts.push_back(excess_output_head);
slice_ends.push_back(excess_output_head + output_dim_size); // we may modify this below
output_shape_with_revised_pads.push_back(excess_output_head + output_dim_size); // we may modify this below
@ -286,7 +291,11 @@ struct ConvAttributes {
// Head has not been over-padded. Only tail pads need to be modified.
post_slicing_needed = true;
slice_axes.push_back(static_cast<int64_t>(dim) + 2);
if (channels_last) {
slice_axes.push_back(static_cast<int64_t>(dim) + 1);
} else {
slice_axes.push_back(static_cast<int64_t>(dim) + 2);
}
slice_starts.push_back(0);
slice_ends.push_back(output_dim_size - revised_dim_size);
}

View file

@ -88,7 +88,7 @@ Status SliceOutUnwantedOutputSection(cudaStream_t stream,
template <typename T, bool NHWC>
Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected) const {
//set X
// set X
const Tensor* X = context->Input<Tensor>(0);
const TensorShape& x_shape = X->Shape();
const auto x_dims = x_shape.AsShapeVector();
@ -180,7 +180,8 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
TensorShapeVector y_dims_with_adjusted_pads(y_dims);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShapeWithAdjustedPads(spatial_shape, kernel_shape,
strides, dilations, pads, y_dims, y_dims_with_adjusted_pads,
post_slicing_required, slice_starts, slice_ends, slice_axes));
post_slicing_required, slice_starts, slice_ends, slice_axes,
channels_last));
if (channels_last) {
y_dims.push_back(M);
y_dims_with_adjusted_pads.push_back(M);
@ -287,7 +288,7 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
TensorShapeVector b_dims(2 + kernel_shape.size(), 1);
b_dims[1] = b_shape[0];
ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType<CudaT>()));
//s_.b_data = reinterpret_cast<const CudaT*>(B->Data<T>());
// s_.b_data = reinterpret_cast<const CudaT*>(B->Data<T>());
} else if (bias_expected) {
TensorShapeVector b_dims(2 + kernel_shape.size(), 1);
b_dims[1] = w_dims[0];

View file

@ -1,5 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/providers/rocm/nn/conv.h"
#include "core/common/span_utils.h"
#include "core/providers/rocm/nn/conv_impl.h"
@ -180,7 +181,8 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
TensorShapeVector y_dims_with_adjusted_pads(y_dims);
ORT_RETURN_IF_ERROR(conv_attrs_.InferOutputShapeWithAdjustedPads(spatial_shape, kernel_shape,
strides, dilations, pads, y_dims, y_dims_with_adjusted_pads,
post_slicing_required, slice_starts, slice_ends, slice_axes));
post_slicing_required, slice_starts, slice_ends, slice_axes,
channels_last));
if (channels_last) {
y_dims.push_back(M);
@ -346,19 +348,11 @@ Status Conv<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
s_.y_data,
workspace.get(),
s_.workspace_bytes));
if (nullptr != s_.b_data) {
constexpr bool channels_last = NHWC;
if (channels_last) {
const Tensor* B = context->Input<Tensor>(2);
const auto& b_shape = B->Shape();
ConvBiasImpl(Stream(context), reinterpret_cast<HipT*>(s_.Y->MutableData<T>()),
reinterpret_cast<const HipT*>(B->Data<T>()),
reinterpret_cast<HipT*>(s_.Y->MutableData<T>()), b_shape[0], s_.Y->Shape().Size());
} else {
MIOPEN_RETURN_IF_ERROR(miopenConvolutionForwardBias(miopen_handle, &alpha, s_.b_tensor, s_.b_data,
&beta, s_.y_tensor, s_.y_data));
}
constexpr bool channels_last = NHWC;
if (nullptr != s_.b_data && !channels_last) {
MIOPEN_RETURN_IF_ERROR(miopenConvolutionForwardBias(miopen_handle, &alpha, s_.b_tensor, s_.b_data,
&beta, s_.y_tensor, s_.y_data));
}
// To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions
// This may have lead to extra results that are unnecessary and hence we slice that off here
@ -367,6 +361,14 @@ Status Conv<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
s_.Y->MutableDataRaw(), s_.y_dims.GetDims(), s_.slice_starts,
s_.slice_ends, s_.slice_axes, s_.element_size));
}
if (nullptr != s_.b_data && channels_last) {
const Tensor* B = context->Input<Tensor>(2);
const auto& b_shape = B->Shape();
ConvBiasImpl(Stream(context), reinterpret_cast<const HipT*>(s_.Y->MutableDataRaw()),
reinterpret_cast<const HipT*>(B->Data<T>()),
reinterpret_cast<HipT*>(s_.Y->MutableDataRaw()), b_shape[0], s_.Y->Shape().Size());
}
return Status::OK();
}

View file

@ -229,5 +229,51 @@ TEST(NhwcConvTest, Conv2D_AutoPad2) {
RunNhwcConv(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape);
}
TEST(NhwcConvTest, Conv2D_asymmetric_padding1) {
NhwcConvOpAndTestAttributes attrs = {
"", // auto_pad
vector<int64_t>{1, 1}, // dilations
1, // group
vector<int64_t>{3, 3}, // kernel_shape
vector<int64_t>{1, 1, 0, 0}, // pads
vector<int64_t>{1, 1}, // strides
{} // excluded EPs
};
vector<float> X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
vector<int64_t> X_shape = {1, 3, 3, 1};
vector<float> W = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
vector<int64_t> W_shape = {1, 3, 3, 1};
vector<float> B = {1.f};
vector<int64_t> B_shape = {1};
vector<int64_t> Y_shape = {1, 2, 2, 1};
auto expected_vals = {13.f, 22.f, 28.f, 46.f};
RunNhwcConv(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
}
TEST(NhwcConvTest, Conv2D_asymmetric_padding2) {
NhwcConvOpAndTestAttributes attrs = {
"", // auto_pad
vector<int64_t>{1, 1}, // dilations
1, // group
vector<int64_t>{3, 3}, // kernel_shape
vector<int64_t>{0, 0, 1, 1}, // pads
vector<int64_t>{1, 1}, // strides
{} // excluded EPs
};
vector<float> X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
vector<int64_t> X_shape = {1, 3, 3, 1};
vector<float> W = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
vector<int64_t> W_shape = {1, 3, 3, 1};
vector<float> B = {1.f};
vector<int64_t> B_shape = {1};
vector<int64_t> Y_shape = {1, 2, 2, 1};
auto expected_vals = {46.f, 34.f, 40.f, 29.f};
RunNhwcConv(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
}
} // namespace test
} // namespace onnxruntime