[DML EP] Add NhwcConv (#15194)

This commit is contained in:
Patrice Vignola 2023-04-10 23:16:09 -07:00 committed by GitHub
parent 9acbfc6a29
commit c5b6ee1a99
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 129 additions and 58 deletions

View file

@ -1181,6 +1181,7 @@ Do not modify directly.*
|FusedMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
|Gelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
|GroupNorm|*in* X:**T**<br> *in* gamma:**M**<br> *in* beta:**M**<br> *out* Y:**T**|1+|**M** = tensor(float), tensor(float16)<br/> **T** = tensor(float), tensor(float16)|
|NhwcConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
|QLinearAdd|*in* A:**T**<br> *in* A_scale:**tensor(float)**<br> *in* A_zero_point:**T**<br> *in* B:**T**<br> *in* B_scale:**tensor(float)**<br> *in* B_zero_point:**T**<br> *in* C_scale:**tensor(float)**<br> *in* C_zero_point:**T**<br> *out* C:**T**|1+|**T** = tensor(int8), tensor(uint8)|
|QLinearSigmoid|*in* X:**T**<br> *in* X_scale:**tensor(float)**<br> *in* X_zero_point:**T**<br> *in* Y_scale:**tensor(float)**<br> *in* Y_zero_point:**T**<br> *out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)|
|QuantizeLinear|*in* x:**T1**<br> *in* y_scale:**T1**<br> *in* y_zero_point:**T2**<br> *out* y:**T2**|1+|**T1** = tensor(float)<br/> **T2** = tensor(uint8)|

View file

@ -9,14 +9,14 @@ namespace Dml
class DmlOperatorConvInteger : public DmlOperator, public ConvolutionHelperBase
{
private:
enum InputTensors
{
IN_X,
IN_X_ZERO_POINT,
IN_F,
IN_F_ZERO_POINT,
enum InputTensors
{
IN_X,
IN_X_ZERO_POINT,
IN_F,
IN_F_ZERO_POINT,
};
public:
using Self = DmlOperatorConvInteger;
@ -24,15 +24,15 @@ public:
const MLOperatorKernelCreationContext& kernelInfo
)
: DmlOperator(kernelInfo),
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), false, false, 0, 1)
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), false, false, false, 0, 1)
{
std::vector<std::optional<uint32_t>> kernelInputIndices = {0, 2, 1, 3};
std::vector<std::optional<uint32_t>> kernelOutputIndices = {0};
DmlOperator::Initialize(kernelInfo, kernelInputIndices);
// DirectML is limited to handle only 2D. So for 1D tensors, massage the tensor descriptions. By default, the
// TensorDesc simply right aligns all the values up to 4D (padding the leading dimensions with 1's),
// DirectML is limited to handle only 2D. So for 1D tensors, massage the tensor descriptions. By default, the
// TensorDesc simply right aligns all the values up to 4D (padding the leading dimensions with 1's),
// but 1D tensors actually need to insert the 1 between C and W. e.g. [2,3,4] becomes [2,3,1,4]
m_inputTensorDescs[IN_X] = CreateTensorDescFromInput(kernelInfo, 0/*Onnx Index*/, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
m_inputTensorDescs[IN_F] = CreateTensorDescFromInput(kernelInfo, 1/*Onnx Index*/, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
@ -42,9 +42,9 @@ public:
// Resize the Filter ZeroPoint to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the C channel.
m_inputTensorDescs[IN_F_ZERO_POINT] = CreateTensorDescFromInput(
kernelInfo,
3/*Onnx Index*/,
TensorAxis::DoNotCoerce,
kernelInfo,
3/*Onnx Index*/,
TensorAxis::DoNotCoerce,
TensorAxis::C,
TensorAxis::LeftAligned,
std::nullopt,

View file

@ -15,10 +15,11 @@ public:
const MLOperatorKernelCreationContext& kernelInfo,
DML_CONVOLUTION_MODE mode,
DML_CONVOLUTION_DIRECTION direction,
bool hasDynamicPads
bool hasDynamicPads,
bool isNhwc
)
: DmlOperator(kernelInfo),
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), direction == DML_CONVOLUTION_DIRECTION_BACKWARD, hasDynamicPads, 0, 1)
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), direction == DML_CONVOLUTION_DIRECTION_BACKWARD, hasDynamicPads, isNhwc, 0, 1)
{
uint32_t biasIndex = hasDynamicPads ? 3 : 2;
bool hasBiasInput = kernelInfo.GetInputCount() > biasIndex;
@ -33,6 +34,43 @@ public:
// e.g. [2,3,4] becomes [2,3,1,4]
m_inputTensorDescs[0] = CreateTensorDescFromInput(kernelInfo, 0, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
m_inputTensorDescs[1] = CreateTensorDescFromInput(kernelInfo, 1, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
m_outputTensorDescs[0] = CreateTensorDescFromOutput(kernelInfo, 0, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
if (isNhwc)
{
// Restrict to 4D like other implementations
ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[0].GetDimensionCount() == 4);
const auto inputSizes = m_inputTensorDescs[0].GetSizes();
const uint32_t inputBatch = inputSizes[0];
const uint32_t inputHeight = inputSizes[1];
const uint32_t inputWidth = inputSizes[2];
const uint32_t inputChannels = inputSizes[3];
const std::array<uint32_t, 4> nchwInputSizes = {inputBatch, inputChannels, inputHeight, inputWidth};
const std::array<uint32_t, 4> nchwInputStrides = {inputHeight * inputWidth * inputChannels, 1, inputWidth * inputChannels, inputChannels};
m_inputTensorDescs[0] = TensorDesc(m_inputTensorDescs[0].GetDmlDataType(), nchwInputSizes, nchwInputStrides);
// Restrict to 4D like other implementations
ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[1].GetDimensionCount() == 4);
const auto weightSizes = m_inputTensorDescs[1].GetSizes();
const uint32_t featureMaps = weightSizes[0];
const uint32_t kernelHeight = weightSizes[1];
const uint32_t kernelWidth = weightSizes[2];
const uint32_t channelsPerGroup = weightSizes[3];
const std::array<uint32_t, 4> nchwKernelSizes = {featureMaps, channelsPerGroup, kernelHeight, kernelWidth};
const std::array<uint32_t, 4> nchwKernelStrides = {kernelHeight * kernelWidth * channelsPerGroup, 1, kernelWidth * channelsPerGroup, channelsPerGroup};
m_inputTensorDescs[1] = TensorDesc(m_inputTensorDescs[1].GetDmlDataType(), nchwKernelSizes, nchwKernelStrides);
// Restrict to 4D like other implementations
ML_CHECK_VALID_ARGUMENT(m_outputTensorDescs[0].GetDimensionCount() == 4);
const auto outputSizes = m_outputTensorDescs[0].GetSizes();
const uint32_t outputBatch = outputSizes[0];
const uint32_t outputHeight = outputSizes[1];
const uint32_t outputWidth = outputSizes[2];
const uint32_t outputChannels = outputSizes[3];
const std::array<uint32_t, 4> nchwOutputSizes = {outputBatch, outputChannels, outputHeight, outputWidth};
const std::array<uint32_t, 4> nchwOutputStrides = {outputHeight * outputWidth * outputChannels, 1, outputWidth * outputChannels, outputChannels};
m_outputTensorDescs[0] = TensorDesc(m_outputTensorDescs[0].GetDmlDataType(), nchwOutputSizes, nchwOutputStrides);
}
// Bias is optional so only adjust it if it exists.
if (hasBiasInput)
@ -47,9 +85,9 @@ public:
// Resize the bias to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the C channel.
m_inputTensorDescs[biasIndex] = CreateTensorDescFromInput(
kernelInfo,
biasIndex,
TensorAxis::DoNotCoerce,
kernelInfo,
biasIndex,
TensorAxis::DoNotCoerce,
TensorAxis::C,
TensorAxis::LeftAligned,
std::nullopt,
@ -57,8 +95,6 @@ public:
);
}
m_outputTensorDescs[0] = CreateTensorDescFromOutput(kernelInfo, 0, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
std::optional<ActivationOperatorDesc> fusedActivation = FusionHelpers::TryGetFusedActivationDesc(kernelInfo);
DML_OPERATOR_DESC fusedActivationDmlDesc = fusedActivation ? fusedActivation->GetDmlDesc() : DML_OPERATOR_DESC();
std::vector<DML_TENSOR_DESC> inputDescs = GetDmlInputDescs();
@ -95,20 +131,21 @@ public:
};
// A specific type of operation for registration.
template <DML_CONVOLUTION_MODE Mode, DML_CONVOLUTION_DIRECTION Direction, bool hasDynamicPads = false>
template <DML_CONVOLUTION_MODE Mode, DML_CONVOLUTION_DIRECTION Direction, bool hasDynamicPads = false, bool isNhwc = false>
class DmlOperatorConvolutionTemplate : public DmlOperatorConvolution
{
public:
DmlOperatorConvolutionTemplate(const MLOperatorKernelCreationContext& kernelInfo)
: DmlOperatorConvolution(kernelInfo, Mode, Direction, hasDynamicPads)
: DmlOperatorConvolution(kernelInfo, Mode, Direction, hasDynamicPads, isNhwc)
{
}
};
DML_OP_DEFINE_CREATION_FUNCTION(Conv, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_FORWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(NhwcConv, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_FORWARD, false, true>);
DML_OP_DEFINE_CREATION_FUNCTION(ConvTranspose, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_BACKWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(DmlFusedConv, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_FORWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(DmlFusedConvTranspose, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_BACKWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(DmlFusedConv, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_FORWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(DmlFusedConvTranspose, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_BACKWARD>);
DML_OP_DEFINE_CREATION_FUNCTION(ConvTransposeWithDynamicPads, DmlOperatorConvolutionTemplate<DML_CONVOLUTION_MODE_CROSS_CORRELATION, DML_CONVOLUTION_DIRECTION_BACKWARD, true>);
} // namespace Dml

View file

@ -9,19 +9,19 @@ namespace Dml
class DmlOperatorQLinearConv : public DmlOperator, public ConvolutionHelperBase
{
private:
enum InputTensors
{
IN_X,
enum InputTensors
{
IN_X,
IN_X_SCALE,
IN_X_ZERO_POINT,
IN_F,
IN_X_ZERO_POINT,
IN_F,
IN_F_SCALE,
IN_F_ZERO_POINT,
IN_F_ZERO_POINT,
IN_BIAS,
IN_Y_SCALE,
IN_Y_ZERO_POINT
};
public:
using Self = DmlOperatorQLinearConv;
@ -29,15 +29,15 @@ public:
const MLOperatorKernelCreationContext& kernelInfo
)
: DmlOperator(kernelInfo),
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), false, false, 0, 3)
ConvolutionHelperBase(kernelInfo, kernelInfo.GetTensorShapeDescription(), false, false, false, 0, 3)
{
std::vector<std::optional<uint32_t>> kernelInputIndices = {0, 1, 2, 3, 4, 5, 8, 6, 7};
std::vector<std::optional<uint32_t>> kernelOutputIndices = {0};
DmlOperator::Initialize(kernelInfo, kernelInputIndices);
// DirectML is limited to handle only 2D. So for 1D tensors, massage the tensor descriptions. By default, the
// TensorDesc simply right aligns all the values up to 4D (padding the leading dimensions with 1's),
// DirectML is limited to handle only 2D. So for 1D tensors, massage the tensor descriptions. By default, the
// TensorDesc simply right aligns all the values up to 4D (padding the leading dimensions with 1's),
// but 1D tensors actually need to insert the 1 between C and W. e.g. [2,3,4] becomes [2,3,1,4]
m_inputTensorDescs[IN_X] = CreateTensorDescFromInput(kernelInfo, 0/*Onnx Index*/, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
m_inputTensorDescs[IN_F] = CreateTensorDescFromInput(kernelInfo, 3/*Onnx Index*/, TensorAxis::DoNotCoerce, TensorAxis::NoPlacementAdjustment, NonspatialDimensionCount, std::nullopt);
@ -56,9 +56,9 @@ public:
// Resize the bias to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the C channel.
m_inputTensorDescs[IN_BIAS] = CreateTensorDescFromInput(
kernelInfo,
8/*Onnx Index*/,
TensorAxis::DoNotCoerce,
kernelInfo,
8/*Onnx Index*/,
TensorAxis::DoNotCoerce,
TensorAxis::C,
TensorAxis::LeftAligned,
std::nullopt,
@ -69,9 +69,9 @@ public:
// Resize the Filter ZeroPoint to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the C channel.
m_inputTensorDescs[IN_F_ZERO_POINT] = CreateTensorDescFromInput(
kernelInfo,
5/*Onnx Index*/,
TensorAxis::DoNotCoerce,
kernelInfo,
5/*Onnx Index*/,
TensorAxis::DoNotCoerce,
TensorAxis::C,
TensorAxis::LeftAligned,
std::nullopt,
@ -80,9 +80,9 @@ public:
// Resize the Filter Scale to be the same dimension as the input tensor.
// The 1D tensor needs to be moved to the C channel.
m_inputTensorDescs[IN_F_SCALE] = CreateTensorDescFromInput(
kernelInfo,
4/*Onnx Index*/,
TensorAxis::DoNotCoerce,
kernelInfo,
4/*Onnx Index*/,
TensorAxis::DoNotCoerce,
TensorAxis::C,
TensorAxis::LeftAligned,
std::nullopt,

View file

@ -194,6 +194,7 @@ struct OperatorRegistrationInformation
DML_OP_EXTERN_CREATION_FUNCTION(Copy);
DML_OP_EXTERN_CREATION_FUNCTION(FC);
DML_OP_EXTERN_CREATION_FUNCTION(Conv);
DML_OP_EXTERN_CREATION_FUNCTION(NhwcConv);
DML_OP_EXTERN_CREATION_FUNCTION(ConvTranspose);
DML_OP_EXTERN_CREATION_FUNCTION(ConvTransposeWithDynamicPads);
DML_OP_EXTERN_CREATION_FUNCTION(AveragePool);
@ -528,6 +529,7 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation
// Deep Learning Standard Layers
{REG_INFO( 7, Conv, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},
{REG_INFO( 11, Conv, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},
{REG_INFO_MS( 1, NhwcConv, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},
{REG_INFO( 7, ConvTranspose, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},
{REG_INFO( 11, ConvTranspose, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},
{REG_INFO( 7, AveragePool, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)},

View file

@ -292,11 +292,12 @@ namespace OperatorHelper
// are ordered such that they are at the end (e.g. NCHW or NCDHW).
std::vector<DimensionType> InitializeKernelOutputDimensions(
gsl::span<const DimensionType> inputDimensions,
const KernelArgs& args
const KernelArgs& args,
bool isNhwc
)
{
ML_CHECK_VALID_ARGUMENT(gsl::narrow_cast<uint32_t>(inputDimensions.size()) >= args.spatialDimensionCount);
int dimOffset = gsl::narrow_cast<int>(inputDimensions.size()) - args.spatialDimensionCount;
int dimOffset = isNhwc ? 1 : gsl::narrow_cast<int>(inputDimensions.size()) - args.spatialDimensionCount;
std::vector<DimensionType> outputDimensions(inputDimensions.begin(), inputDimensions.end());
@ -478,7 +479,8 @@ namespace OperatorHelper
void ResolveAutoPadding(
KernelArgs& args,
gsl::span<const DimensionType> inputDimensions
gsl::span<const DimensionType> inputDimensions,
bool isNhwc
)
{
if (!args.autoPad)
@ -490,7 +492,9 @@ namespace OperatorHelper
uint32_t spatialDimensionCount = gsl::narrow_cast<uint32_t>(inputDimensions.size()) - NonspatialDimensionCount;
ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); // Support up to 3D convolution (in 5D tensor).
const int dimOffset = gsl::narrow_cast<int>(inputDimensions.size()) - spatialDimensionCount;
ML_CHECK_VALID_ARGUMENT(!isNhwc || inputDimensions.size() == 4);
const int dimOffset = isNhwc ? 1 : gsl::narrow_cast<int>(inputDimensions.size()) - spatialDimensionCount;
for (size_t dim = 0; dim < spatialDimensionCount; ++dim)
{
@ -763,8 +767,16 @@ namespace OperatorHelper
ResolvingPadding(inputDimensions);
m_outputShapes.resize(1);
m_outputShapes[0] = InitializeKernelOutputDimensions(inputDimensions, m_kernel);
m_outputShapes[0].GetShape()[C] = filterDims[K];
m_outputShapes[0] = InitializeKernelOutputDimensions(inputDimensions, m_kernel, m_isNhwc);
if (m_isNhwc)
{
m_outputShapes[0].GetShape()[static_cast<uint32_t>(NhwcInputDims::C)] = filterDims[K];
}
else
{
m_outputShapes[0].GetShape()[C] = filterDims[K];
}
}
void ConvolutionHelperBase::InitializeKernelAndShapesTransposed(
@ -868,7 +880,7 @@ namespace OperatorHelper
void ConvolutionHelperBase::ResolvingPadding(gsl::span<const DimensionType> inputDimensions)
{
ResolveAutoPadding(m_kernel, inputDimensions);
ResolveAutoPadding(m_kernel, inputDimensions, m_isNhwc);
}
std::vector<EdgeShapes> GemmHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const

View file

@ -204,7 +204,8 @@ struct KernelArgs
std::vector<DimensionType> InitializeKernelOutputDimensions(
gsl::span<const DimensionType> inputDimensions,
const KernelArgs& args);
const KernelArgs& args,
bool isNhwc = false);
std::vector<DimensionType> InitializeKernelOutputDimsTranspose(
gsl::span<const DimensionType> inputDimensions,
@ -219,7 +220,8 @@ KernelArgs InitializeKernel(
void ResolveAutoPadding(
KernelArgs& args,
gsl::span<const DimensionType> inputDimensions);
gsl::span<const DimensionType> inputDimensions,
bool isNhwc = false);
void MatMulShapeMapping(
std::vector<DimensionType>& inputShape0,
@ -450,13 +452,15 @@ class ConvolutionHelperBase
public:
enum FilterDims { K };
enum InputDims { N, C, H, W };
enum class NhwcInputDims { N, H, W, C };
public:
// Info_t is used to obtain attributes which will be used for calculating the output shape later.
template<typename Info_t, typename Shape_t>
ConvolutionHelperBase(const Info_t& info, const Shape_t& shape, bool transpose, bool hasDynamicPads, uint32_t inputTensorIndex, uint32_t filterTensorIndex) :
ConvolutionHelperBase(const Info_t& info, const Shape_t& shape, bool transpose, bool hasDynamicPads, bool isNhwc, uint32_t inputTensorIndex, uint32_t filterTensorIndex) :
m_inputTensorIndex(inputTensorIndex),
m_filterTensorIndex(filterTensorIndex),
m_isNhwc(isNhwc),
m_kernel(InitializeKernel(info, shape.GetInputTensorDimensionCount(inputTensorIndex), shape.GetInputTensorShape(filterTensorIndex)))
{
m_groupCount = info.template GetOptionalAttribute<uint32_t>(AttrName::Group, 1);
@ -487,6 +491,7 @@ protected:
uint32_t m_groupCount;
uint32_t m_inputTensorIndex;
uint32_t m_filterTensorIndex;
bool m_isNhwc;
KernelArgs m_kernel;
std::vector<EdgeShapes> m_outputShapes;
};
@ -495,28 +500,35 @@ class ConvHelper : public ConvolutionHelperBase
{
public:
template<typename Info_t, typename Shape_t>
ConvHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, false, false, 0, 1) {}
ConvHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, false, false, false, 0, 1) {}
};
class NhwcConvHelper : public ConvolutionHelperBase
{
public:
template<typename Info_t, typename Shape_t>
NhwcConvHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, false, false, true, 0, 1) {}
};
class ConvTransposeHelper : public ConvolutionHelperBase
{
public:
template<typename Info_t, typename Shape_t>
ConvTransposeHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, true, false, 0, 1) {}
ConvTransposeHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, true, false, false, 0, 1) {}
};
class ConvTransposeWithDynamicPadsHelper : public ConvolutionHelperBase
{
public:
template<typename Info_t, typename Shape_t>
ConvTransposeWithDynamicPadsHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, true, true, 0, 1) {}
ConvTransposeWithDynamicPadsHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, true, true, false, 0, 1) {}
};
class QLinearConvHelper : public ConvolutionHelperBase
{
public:
template<typename Info_t, typename Shape_t>
QLinearConvHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, false, false, 0, 3) {}
QLinearConvHelper(const Info_t& info, const Shape_t& shape) : ConvolutionHelperBase(info, shape, false, false, false, 0, 3) {}
};
class GemmHelper
@ -1416,6 +1428,7 @@ public:
};
using ShapeInferenceHelper_Conv = ConvHelper;
using ShapeInferenceHelper_NhwcConv = NhwcConvHelper;
using ShapeInferenceHelper_ConvTranspose = ConvTransposeHelper;
using ShapeInferenceHelper_ConvTransposeWithDynamicPads = ConvTransposeWithDynamicPadsHelper;
using ShapeInferenceHelper_ConvInteger = ConvHelper;

View file

@ -410,6 +410,7 @@ namespace OperatorHelper
static const int sc_sinceVer_Attention = 1;
static const int sc_sinceVer_SkipLayerNormalization = 1;
static const int sc_sinceVer_EmbedLayerNormalization = 1;
static const int sc_sinceVer_NhwcConv = 1;
static const int sc_sinceVer_BiasAdd = 1;
static const int sc_sinceVer_QuickGelu = 1;
static const int sc_sinceVer_GroupNorm = 1;

View file

@ -32,8 +32,9 @@ void TestNhwcConvOp(const NhwcConvOpAndTestAttributes& attributes,
int min_cuda_architecture = use_float16 ? 530 : 0;
bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get());
bool enable_dml = (nullptr != DefaultDmlExecutionProvider().get());
if (enable_cuda || enable_rocm) {
if (enable_cuda || enable_rocm || enable_dml) {
OpTester test("NhwcConv", 1, onnxruntime::kMSDomain);
test.AddAttribute("group", attributes.group);
test.AddAttribute("kernel_shape", attributes.kernel_shape);
@ -82,6 +83,10 @@ void TestNhwcConvOp(const NhwcConvOpAndTestAttributes& attributes,
execution_providers.push_back(DefaultRocmExecutionProvider());
}
if (enable_dml) {
execution_providers.push_back(DefaultDmlExecutionProvider());
}
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}