From 4dfeef48ebb062ecd84ce5624017c2c8edf1537c Mon Sep 17 00:00:00 2001 From: Jeff Bloomfield Date: Mon, 7 Aug 2023 14:30:08 -0700 Subject: [PATCH] Revert QlinearAdd1 --- .../src/External/DirectMLHelpers/ApiTraits.h | 31 +------------- .../External/DirectMLHelpers/DirectMLSchema.h | 19 --------- .../DirectMLHelpers/GeneratedSchemaHelpers.h | 20 --------- .../src/Operators/DmlOperatorQLinearAdd.cpp | 41 +++---------------- .../MLOperatorAuthorPrivate.h | 2 +- 5 files changed, 8 insertions(+), 105 deletions(-) diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h index 8c66e610df..f6d71ce629 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/ApiTraits.h @@ -2,18 +2,6 @@ // Licensed under the MIT License. #pragma once -struct DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC -{ - const DML_TENSOR_DESC* ATensor; - const DML_TENSOR_DESC* AScaleTensor; - _Maybenull_ const DML_TENSOR_DESC* AZeroPointTensor; - const DML_TENSOR_DESC* BTensor; - _Maybenull_ const DML_TENSOR_DESC* BScaleTensor; - _Maybenull_ const DML_TENSOR_DESC* BZeroPointTensor; - const DML_TENSOR_DESC* OutputScaleTensor; // This is an input tensor - _Maybenull_ const DML_TENSOR_DESC* OutputZeroPointTensor; // This is an input tensor - const DML_TENSOR_DESC* OutputTensor; -}; struct DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC { @@ -26,9 +14,7 @@ struct DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC _Maybenull_ const DML_TENSOR_DESC* BiasTensor; const DML_TENSOR_DESC* OutputTensor; }; - -const int DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT = 0x80000011; -const int DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1 = 0x8000000e; +const int DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT = 0x80000011; namespace ApiTraits { @@ -1045,12 +1031,6 @@ struct OperatorDescTraits static constexpr DML_OPERATOR_TYPE Type = DML_OPERATOR_DIAGONAL_MATRIX1; }; -template <> -struct OperatorDescTraits -{ - static constexpr DML_OPERATOR_TYPE Type = (DML_OPERATOR_TYPE) DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1; -}; - template <> struct OperatorDescTraits { @@ -2190,12 +2170,6 @@ struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_ACTIVATION_GELU> using DescType = DML_ACTIVATION_GELU_OPERATOR_DESC; }; -template <> -struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1> -{ - using DescType = DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC; -}; - template <> struct OperatorTypeTraits<(DML_OPERATOR_TYPE)DML_OPERATOR_MULTIHEAD_ATTENTION> { @@ -2548,8 +2522,6 @@ auto OperatorTypeVisitor(DML_OPERATOR_TYPE type, Visitor&& visitor, Ts&&... args #pragma warning(push) #pragma warning(disable: 4063) - case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: - return std::invoke(std::forward(visitor), DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC{}, std::forward(args)...); case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT: return std::invoke(std::forward(visitor), DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_DESC{}, std::forward(args)...); #pragma warning(pop) @@ -2708,7 +2680,6 @@ inline gsl::czstring ToString(DML_OPERATOR_TYPE value) case DML_OPERATOR_DIAGONAL_MATRIX1: return "DML_OPERATOR_DIAGONAL_MATRIX1"; #pragma warning(push) #pragma warning(disable: 4063) - case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: return "DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1"; case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT: return "DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT"; case DML_OPERATOR_MULTIHEAD_ATTENTION: return "DML_OPERATOR_MULTIHEAD_ATTENTION"; #pragma warning(pop) diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/DirectMLSchema.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/DirectMLSchema.h index 9071760f9f..f3a3aec50e 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/DirectMLSchema.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/DirectMLSchema.h @@ -2692,23 +2692,4 @@ constexpr DML_SCHEMA_FIELD DML_RNN_ZERO_OPERATOR_SCHEMA_FIELDS[3] { DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_OUTPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputTensor", false }, }; -constexpr DML_SCHEMA_FIELD DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA_FIELDS[9] { - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "ATensor", false }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "AScaleTensor", false }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "AZeroPointTensor", true }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BTensor", false }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BScaleTensor", true }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "BZeroPointTensor", true }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputScaleTensor", false }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_INPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputZeroPointTensor", true }, - DML_SCHEMA_FIELD { DML_SCHEMA_FIELD_KIND_OUTPUT_TENSOR, DML_SCHEMA_FIELD_TYPE_TENSOR_DESC, "OutputTensor", false }, -}; - -constexpr DML_OPERATOR_SCHEMA DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA { - "DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1", - static_cast(DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1), - DML_SCHEMA_OPERATOR_SUPPORT_FLAG_NONE, - 9, - DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA_FIELDS, -}; } // extern "C" diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/GeneratedSchemaHelpers.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/GeneratedSchemaHelpers.h index ef5bbc036a..90915c7e75 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/GeneratedSchemaHelpers.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/External/DirectMLHelpers/GeneratedSchemaHelpers.h @@ -1646,21 +1646,6 @@ inline std::vector GetFields(const DML_ACTIVATION_GELU_OPERATOR_D OperatorField(&DML_ACTIVATION_GELU_OPERATOR_SCHEMA.Fields[1], ToOperatorFieldType(static_cast(desc.OutputTensor))), }; } -inline std::vector GetFields(const DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC& desc) -{ - return { - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[0], ToOperatorFieldType(static_cast(desc.ATensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[1], ToOperatorFieldType(static_cast(desc.AScaleTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[2], ToOperatorFieldType(static_cast(desc.AZeroPointTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[3], ToOperatorFieldType(static_cast(desc.BTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[4], ToOperatorFieldType(static_cast(desc.BScaleTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[5], ToOperatorFieldType(static_cast(desc.BZeroPointTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[6], ToOperatorFieldType(static_cast(desc.OutputScaleTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[7], ToOperatorFieldType(static_cast(desc.OutputZeroPointTensor))), - OperatorField(&DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA.Fields[8], ToOperatorFieldType(static_cast(desc.OutputTensor))), - }; -} - inline const DML_OPERATOR_SCHEMA& GetSchema(DML_OPERATOR_TYPE operatorType) { switch (operatorType) @@ -1831,7 +1816,6 @@ inline const DML_OPERATOR_SCHEMA& GetSchema(DML_OPERATOR_TYPE operatorType) case DML_OPERATOR_ACTIVATION_GELU: return DML_ACTIVATION_GELU_OPERATOR_SCHEMA; #pragma warning(push) #pragma warning(disable: 4063) - case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: return DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA; case DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT: return DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_SCHEMA; #pragma warning(pop) default: @@ -2508,10 +2492,6 @@ inline AbstractOperatorDesc ConvertOperatorDesc(const DML_OPERATOR_DESC& opDesc) return AbstractOperatorDesc( &DML_MATRIX_MULTIPLY_INTEGER_TO_FLOAT_OPERATOR_SCHEMA, GetFields(*static_cast(opDesc.Desc))); - case DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1: - return AbstractOperatorDesc( - &DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_SCHEMA, - GetFields(*static_cast(opDesc.Desc))); #pragma warning(pop) default: diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorQLinearAdd.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorQLinearAdd.cpp index a2deab6b93..7b50dfb9ff 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorQLinearAdd.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorQLinearAdd.cpp @@ -16,35 +16,14 @@ class DmlOperatorQLinearAdd : public DmlOperator IN_B_SCALE, IN_B_ZERO_POINT, IN_C_SCALE, - IN_C_ZERO_POINT, - IN_COUNT + IN_C_ZERO_POINT }; public: DmlOperatorQLinearAdd(const MLOperatorKernelCreationContext& kernelInfo) : DmlOperator(kernelInfo) { - - std::vector> kernelInputIndices(8); - - for (uint32_t i = 0; i < IN_COUNT; ++i) - { - kernelInputIndices[i] = i; - } - - std::optional constantAScaleTensor = kernelInfo.TryGetConstantInputTensor(IN_A_SCALE); - std::optional constantBScaleTensor = kernelInfo.TryGetConstantInputTensor(IN_B_SCALE); - - if (constantAScaleTensor && constantBScaleTensor) - { - if (constantAScaleTensor->GetUnalignedTensorByteSize() == constantBScaleTensor->GetUnalignedTensorByteSize() && - !memcmp(constantAScaleTensor->GetByteData(), constantBScaleTensor->GetByteData(), constantAScaleTensor->GetUnalignedTensorByteSize())) - { - kernelInputIndices[IN_B_SCALE] = std::nullopt; - } - } - - DmlOperator::Initialize(kernelInfo, kernelInputIndices); + DmlOperator::Initialize(kernelInfo); std::vector outputShape = kernelInfo.GetTensorShapeDescription().GetOutputTensorShape(0); @@ -57,15 +36,7 @@ public: m_inputTensorDescs[IN_A_SCALE] = CreateTensorDescFromInput(kernelInfo, 1/*A Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); m_inputTensorDescs[IN_A_ZERO_POINT] = CreateTensorDescFromInput(kernelInfo, 2/*A Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); - if (kernelInputIndices[IN_B_SCALE]) - { - m_inputTensorDescs[IN_B_SCALE] = CreateTensorDescFromInput(kernelInfo, 4/*B Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); - } - else - { - m_inputTensorDescs[IN_B_SCALE] = TensorDesc(); - } - + m_inputTensorDescs[IN_B_SCALE] = CreateTensorDescFromInput(kernelInfo, 4/*B Scale OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); m_inputTensorDescs[IN_B_ZERO_POINT] = CreateTensorDescFromInput(kernelInfo, 5/*B Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); m_inputTensorDescs[IN_C_SCALE] = CreateTensorDescFromInput(kernelInfo, 6/*C Zero point OnnxIndex*/, TensorAxis::DoNotCoerce, TensorAxis::W, TensorAxis::RightAligned, std::nullopt, dmlDimSize); @@ -77,18 +48,18 @@ public: std::vector inputDescs = GetDmlInputDescs(); std::vector outputDescs = GetDmlOutputDescs(); - DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1_OPERATOR_DESC AddDesc = {}; + DML_ELEMENT_WISE_QUANTIZED_LINEAR_ADD_OPERATOR_DESC AddDesc = {}; AddDesc.ATensor = &inputDescs[IN_A]; AddDesc.AScaleTensor = &inputDescs[IN_A_SCALE]; AddDesc.AZeroPointTensor = inputDescs[IN_A_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_A_ZERO_POINT] : nullptr; AddDesc.BTensor = &inputDescs[IN_B]; - AddDesc.BScaleTensor = inputDescs[IN_B_SCALE].Desc != nullptr ? &inputDescs[IN_B_SCALE] : nullptr; + AddDesc.BScaleTensor = &inputDescs[IN_B_SCALE]; AddDesc.BZeroPointTensor = inputDescs[IN_B_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_B_ZERO_POINT] : nullptr; AddDesc.OutputScaleTensor = &inputDescs[IN_C_SCALE]; AddDesc.OutputZeroPointTensor = inputDescs[IN_C_ZERO_POINT].Desc != nullptr ? &inputDescs[IN_C_ZERO_POINT] : nullptr; AddDesc.OutputTensor = &outputDescs[0]; - DML_OPERATOR_DESC opDesc = { (DML_OPERATOR_TYPE) DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD1, &AddDesc }; + DML_OPERATOR_DESC opDesc = { DML_OPERATOR_ELEMENT_WISE_QUANTIZED_LINEAR_ADD, &AddDesc }; SetDmlOperatorDesc(opDesc, kernelInfo); } }; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorPrivate.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorPrivate.h index d388418891..3bec8d3864 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorPrivate.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorPrivate.h @@ -82,7 +82,7 @@ IMLOperatorKernelCreationContextPrivate : public IMLOperatorKernelCreationContex uint32_t inputIndex, _Outptr_ IMLOperatorTensor** tensor ) const noexcept PURE; - + STDMETHOD_(bool, IsDmlGraphNode)() const noexcept PURE; STDMETHOD(SetDmlOperator)(