Revert QlinearAdd1

This commit is contained in:
Jeff Bloomfield 2023-08-07 14:30:08 -07:00
parent 49791b5dec
commit 4dfeef48eb
5 changed files with 8 additions and 105 deletions

View file

@ -2,18 +2,6 @@
// Licensed under the MIT License.
#pragma once
struct DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC
{
const DML_TENSOR_DESC* ATensor;
const DML_TENSOR_DESC* AScaleTensor;
_Maybenull_ const DML_TENSOR_DESC* AZeroPointTensor;
const DML_TENSOR_DESC* BTensor;
_Maybenull_ const DML_TENSOR_DESC* BScaleTensor;
_Maybenull_ const DML_TENSOR_DESC* BZeroPointTensor;
const DML_TENSOR_DESC* OutputScaleTensor; // This is an input tensor
_Maybenull_ const DML_TENSOR_DESC* OutputZeroPointTensor; // This is an input tensor
const DML_TENSOR_DESC* OutputTensor;
};
struct DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC
{
@ -26,9 +14,7 @@ struct DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC
_Maybenull_ const DML_TENSOR_DESC* BiasTensor;
const DML_TENSOR_DESC* OutputTensor;
};
const int DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT = 0x80000011;
const int DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1 = 0x8000000e;
const int DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT = 0x80000011;
namespace ApiTraits
{
@ -1045,12 +1031,6 @@ struct OperatorDescTraits<DML_DIAGONAL_MATRIX1_OPERATOR_DESC>
static constexpr DML_OPERATOR_TYPE Type = DML_OPERATOR_DIAGONAL_MATRIX1;
};
template <>
struct OperatorDescTraits<DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC>
{
static constexpr DML_OPERATOR_TYPE Type = (DML_OPERATOR_TYPE) DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1;
};
template <>
struct OperatorDescTraits<DML_ACTIVATION_ELU_OPERATOR_DESC>
{
@ -2190,12 +2170,6 @@ struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_ACTIVATION_GELU>
using DescType = DML_ACTIVATION_GELU_OPERATOR_DESC;
};
template <>
struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1>
{
using DescType = DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC;
};
template <>
struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_MULTIHEAD_ATTENTION>
{
@ -2548,8 +2522,6 @@ auto OperatorTypeVisitor(DML_OPERATOR_TYPE type, Visitor&& visitor, Ts&&... args
#pragma warning(push)
#pragma warning(disable: 4063)
case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1:
return std::invoke(std::forward<Visitor>(visitor), DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC{}, std::forward<Ts>(args)...);
case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT:
return std::invoke(std::forward<Visitor>(visitor), DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC{}, std::forward<Ts>(args)...);
#pragma warning(pop)
@ -2708,7 +2680,6 @@ inline gsl::czstring ToString(DML_OPERATOR_TYPE value)
case DML_OPERATOR_DIAGONAL_MATRIX1: return "DML_OPERATOR_DIAGONAL_MATRIX1";
#pragma warning(push)
#pragma warning(disable: 4063)
case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: return "DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1";
case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT: return "DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT";
case DML_OPERATOR_MULTIHEAD_ATTENTION: return "DML_OPERATOR_MULTIHEAD_ATTENTION";
#pragma warning(pop)

View file

@ -2692,23 +2692,4 @@ constexpr DML_SCHEMA_FIELD DML_RNN_ZERO_OPERATOR_SCHEMA_FIELDS[3] {
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_OUTPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputTensor", false },
};
constexpr DML_SCHEMA_FIELD DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA_FIELDS[9] {
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "ATensor", false },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "AScaleTensor", false },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "AZeroPointTensor", true },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BTensor", false },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BScaleTensor", true },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BZeroPointTensor", true },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputScaleTensor", false },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputZeroPointTensor", true },
DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_OUTPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputTensor", false },
};
constexpr DML_OPERATOR_SCHEMA DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA {
"DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1",
static_cast<DML_OPERATOR_TYPE>(DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1),
DML_SCHEMA_OPERATOR_SUPPORT_FLAG_NONE,
9,
DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA_FIELDS,
};
} // extern "C"

View file

@ -1646,21 +1646,6 @@ inline std::vector<OperatorField> GetFields(const DML_ACTIVATION_GELU_OPERATOR_D
OperatorField(&DML_ACTIVATION_GELU_OPERATOR_SCHEMA.Fields[1], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.OutputTensor))),
};
}
inline std::vector<OperatorField> GetFields(const DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC& desc)
{
return {
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[0], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.ATensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[1], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.AScaleTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[2], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.AZeroPointTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[3], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.BTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[4], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.BScaleTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[5], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.BZeroPointTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[6], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.OutputScaleTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[7], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.OutputZeroPointTensor))),
OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[8], ToOperatorFieldType(static_cast<const DML_TENSOR_DESC*>(desc.OutputTensor))),
};
}
inline const DML_OPERATOR_SCHEMA& GetSchema(DML_OPERATOR_TYPE operatorType)
{
switch (operatorType)
@ -1831,7 +1816,6 @@ inline const DML_OPERATOR_SCHEMA& GetSchema(DML_OPERATOR_TYPE operatorType)
case DML_OPERATOR_ACTIVATION_GELU: return DML_ACTIVATION_GELU_OPERATOR_SCHEMA;
#pragma warning(push)
#pragma warning(disable: 4063)
case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: return DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA;
case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT: return DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_SCHEMA;
#pragma warning(pop)
default:
@ -2508,10 +2492,6 @@ inline AbstractOperatorDesc ConvertOperatorDesc(const DML_OPERATOR_DESC& opDesc)
return AbstractOperatorDesc(
&DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_SCHEMA,
GetFields(*static_cast<const DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC*>(opDesc.Desc)));
case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1:
return AbstractOperatorDesc(
&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA,
GetFields(*static_cast<const DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC*>(opDesc.Desc)));
#pragma warning(pop)
default:

View file

@ -16,35 +16,14 @@ class DmlOperatorQLinearAdd : public DmlOperator
IN_B_SCALE,
IN_B_ZERO_POINT,
IN_C_SCALE,
IN_C_ZERO_POINT,
IN_COUNT
IN_C_ZERO_POINT
};
public:
DmlOperatorQLinearAdd(const MLOperatorKernelCreationContext& kernelInfo)
: DmlOperator(kernelInfo)
{
std::vector<std::optional<uint32_t>> kernelInputIndices(8);
for (uint32_t i = 0; i < IN_COUNT; ++i)
{
kernelInputIndices[i] = i;
}
std::optional<MLOperatorTensor> constantAScaleTensor = kernelInfo.TryGetConstantInputTensor(IN_A_SCALE);
std::optional<MLOperatorTensor> constantBScaleTensor = kernelInfo.TryGetConstantInputTensor(IN_B_SCALE);
if (constantAScaleTensor && constantBScaleTensor)
{
if (constantAScaleTensor->GetUnalignedTensorByteSize() == constantBScaleTensor->GetUnalignedTensorByteSize() &&
!memcmp(constantAScaleTensor->GetByteData(), constantBScaleTensor->GetByteData(), constantAScaleTensor->GetUnalignedTensorByteSize()))
{
kernelInputIndices[IN_B_SCALE] = std::nullopt;
}
}
DmlOperator::Initialize(kernelInfo, kernelInputIndices);
DmlOperator::Initialize(kernelInfo);
std::vector<DimensionType> outputShape = kernelInfo.GetTensorShapeDescription().GetOutputTensorShape(0);
@ -57,15 +36,7 @@ public:
m_inputTensorDescs[IN_A_SCALE] = CreateTensorDescFromInput(kernelInfo, 1/*A Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
m_inputTensorDescs[IN_A_ZERO_POINT] = CreateTensorDescFromInput(kernelInfo, 2/*A Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
if (kernelInputIndices[IN_B_SCALE])
{
m_inputTensorDescs[IN_B_SCALE] = CreateTensorDescFromInput(kernelInfo, 4/*B Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
}
else
{
m_inputTensorDescs[IN_B_SCALE] = TensorDesc();
}
m_inputTensorDescs[IN_B_SCALE] = CreateTensorDescFromInput(kernelInfo, 4/*B Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
m_inputTensorDescs[IN_B_ZERO_POINT] = CreateTensorDescFromInput(kernelInfo, 5/*B Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
m_inputTensorDescs[IN_C_SCALE] = CreateTensorDescFromInput(kernelInfo, 6/*C Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize);
@ -77,18 +48,18 @@ public:
std::vector<DML_TENSOR_DESC> inputDescs = GetDmlInputDescs();
std::vector<DML_TENSOR_DESC> outputDescs = GetDmlOutputDescs();
DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC AddDesc = {};
DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD_OPERATOR_DESC AddDesc = {};
AddDesc.ATensor = &inputDescs[IN_A];
AddDesc.AScaleTensor = &inputDescs[IN_A_SCALE];
AddDesc.AZeroPointTensor = inputDescs[IN_A_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_A_ZERO_POINT] : nullptr;
AddDesc.BTensor = &inputDescs[IN_B];
AddDesc.BScaleTensor = inputDescs[IN_B_SCALE].Desc != nullptr ? &inputDescs[IN_B_SCALE] : nullptr;
AddDesc.BScaleTensor = &inputDescs[IN_B_SCALE];
AddDesc.BZeroPointTensor = inputDescs[IN_B_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_B_ZERO_POINT] : nullptr;
AddDesc.OutputScaleTensor = &inputDescs[IN_C_SCALE];
AddDesc.OutputZeroPointTensor = inputDescs[IN_C_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_C_ZERO_POINT] : nullptr;
AddDesc.OutputTensor = &outputDescs[0];
DML_OPERATOR_DESC opDesc = { (DML_OPERATOR_TYPE) DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1, &AddDesc };
DML_OPERATOR_DESC opDesc = { DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD, &AddDesc };
SetDmlOperatorDesc(opDesc, kernelInfo);
}
};

View file

@ -82,7 +82,7 @@ IMLOperatorKernelCreationContextPrivate : public IMLOperatorKernelCreationContex
uint32_t inputIndex,
_Outptr_ IMLOperatorTensor** tensor
) const noexcept PURE;
STDMETHOD_(bool, IsDmlGraphNode)() const noexcept PURE;
STDMETHOD(SetDmlOperator)(