diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp index 10ab559560..c9437ee0fd 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp @@ -301,7 +301,7 @@ constexpr static std::array supportedTypeListLogica constexpr static std::array supportedTypeListLogicalComparison9 = /* A&B,C */ { SupportedTensorDataTypes::Float16to32|SupportedTensorDataTypes::Ints8to64, SupportedTensorDataTypes::Bool }; constexpr static std::array supportedTypeListSigned = { SupportedTensorDataTypes::Float16to32 | SupportedTensorDataTypes::Int64 | SupportedTensorDataTypes::Int32 | SupportedTensorDataTypes::Int16 | SupportedTensorDataTypes::Int8 }; constexpr static std::array supportedTypeListRange = {SupportedTensorDataTypes::Int16|SupportedTensorDataTypes::Int32|SupportedTensorDataTypes::Int64|SupportedTensorDataTypes::Float32}; -constexpr static std::array supportedTypeListResize11 = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32 /* float32 ROI read by CPU */}; +constexpr static std::array supportedTypeListResize11 = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32 /* ROI read by CPU */}; constexpr static std::array supportedTypeListInteger = {SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8, SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8, SupportedTensorDataTypes::Int32 }; constexpr static std::array supportedTypeListInteger8 = {SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8 }; constexpr static std::array supportedTypeListRoiAlign = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Int32|SupportedTensorDataTypes::Int64 }; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp index bb501f4cd9..f3b9ddceb2 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp @@ -32,130 +32,6 @@ namespace OperatorHelper } } - void ReadCpuLocalTensorIntoInt32( - const MLOperatorTensor& tensor, - std::vector& result - ) - { - result.clear(); - ML_CHECK_VALID_ARGUMENT(tensor.IsCpuData(), "Tensor must be CPU Tensor."); - - const std::vector& tensorDimensions = tensor.GetShape(); - const uint32_t elementCount = ComputeElementCountFromDimensions(tensorDimensions); - - switch (tensor.GetTensorDataType()) - { - case MLOperatorTensorDataType::Int32: - { - const int32_t* data = tensor.GetData(); - result.assign(data, data + elementCount); - } - break; - - case MLOperatorTensorDataType::Int64: - { - const int64_t* data = tensor.GetData(); - result.reserve(elementCount); - - // Use clamped cast rather than static_cast/narrow_cast, - // because it's not uncommon for a model to specify a - // 64-bit INTMAX constant as a sentinel value to mean - // the largest possible value (even though the actual - // dimension values come nowhere close to that, far - // less than 32-bit INTMAX). - for (auto d : gsl::make_span(data, data + elementCount)) - { - result.push_back(clamp_cast(d)); - } - } - break; - - default: - ML_INVALID_ARGUMENT("Expecting CPU local tensor of type int32 or int64."); - break; - } - } - - void ReadCpuLocalTensorIntoFloat32( - const MLOperatorTensor& tensor, - std::vector& result - ) - { - result.clear(); - ML_CHECK_VALID_ARGUMENT(tensor.IsCpuData(), "Tensor must be CPU Tensor."); - - const std::vector& tensorDimensions = tensor.GetShape(); - const uint32_t elementCount = ComputeElementCountFromDimensions(tensorDimensions); - result.resize(elementCount); - - switch (tensor.GetTensorDataType()) - { - case MLOperatorTensorDataType::Float16: - { - const onnxruntime::MLFloat16* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - case MLOperatorTensorDataType::/*Float32*/Float: - { - const float* data = tensor.GetData(); - result.assign(data, data + elementCount); - } - break; - - case MLOperatorTensorDataType::/*Float64*/Double: - { - const double* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - case MLOperatorTensorDataType::Int32: - { - const int32_t* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - case MLOperatorTensorDataType::UInt32: - { - const uint32_t* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - case MLOperatorTensorDataType::Int64: - { - const int64_t* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - case MLOperatorTensorDataType::UInt64: - { - const uint64_t* data = tensor.GetData(); - std::transform(result.begin(), result.end(), result.begin(), [](auto v) {return static_cast(v); }); - } - break; - - default: - ML_INVALID_ARGUMENT("Expecting CPU local tensor of type float32."); - break; - } - } - - void DowncastDimensions(gsl::span inputDimensions, std::vector& outputDimensions) - { - outputDimensions.reserve(inputDimensions.size()); - outputDimensions.clear(); - - for (int64_t dim : inputDimensions) - { - outputDimensions.push_back(gsl::narrow_cast(std::clamp(dim, INT32_MIN, INT32_MAX))); - } - } - float CastFloat16ToFloat32(uint16_t input) { // Promote float16m10e5s1 to float32m23e8s1. @@ -244,6 +120,130 @@ namespace OperatorHelper } #pragma warning(pop) + void ReadCpuLocalTensorIntoInt32( + const MLOperatorTensor& tensor, + std::vector& result + ) + { + result.clear(); + ML_CHECK_VALID_ARGUMENT(tensor.IsCpuData(), "Tensor must be CPU Tensor."); + + const std::vector& tensorDimensions = tensor.GetShape(); + const uint32_t elementCount = ComputeElementCountFromDimensions(tensorDimensions); + + switch (tensor.GetTensorDataType()) + { + case MLOperatorTensorDataType::Int32: + { + const int32_t* data = tensor.GetData(); + result.assign(data, data + elementCount); + } + break; + + case MLOperatorTensorDataType::Int64: + { + const int64_t* data = tensor.GetData(); + result.reserve(elementCount); + + // Use clamped cast rather than static_cast/narrow_cast, + // because it's not uncommon for a model to specify a + // 64-bit INTMAX constant as a sentinel value to mean + // the largest possible value (even though the actual + // dimension values come nowhere close to that, far + // less than 32-bit INTMAX). + for (auto d : gsl::make_span(data, data + elementCount)) + { + result.push_back(clamp_cast(d)); + } + } + break; + + default: + ML_INVALID_ARGUMENT("Expecting CPU local tensor of type int32 or int64."); + break; + } + } + + void ReadCpuLocalTensorIntoFloat32( + const MLOperatorTensor& tensor, + std::vector& result + ) + { + result.clear(); + ML_CHECK_VALID_ARGUMENT(tensor.IsCpuData(), "Tensor must be CPU Tensor."); + + const std::vector& tensorDimensions = tensor.GetShape(); + const uint32_t elementCount = ComputeElementCountFromDimensions(tensorDimensions); + result.resize(elementCount); + + switch (tensor.GetTensorDataType()) + { + case MLOperatorTensorDataType::Float16: + { + const uint16_t* data = reinterpret_cast(tensor.GetByteData()); + std::transform(data, data + elementCount, result.begin(), CastFloat16ToFloat32); + } + break; + + case MLOperatorTensorDataType::/*Float32*/Float: + { + const float* data = tensor.GetData(); + result.assign(data, data + elementCount); + } + break; + + case MLOperatorTensorDataType::/*Float64*/Double: + { + const double* data = tensor.GetData(); + std::transform(data, data + elementCount, result.begin(), [](auto v) {return static_cast(v); }); + } + break; + + case MLOperatorTensorDataType::Int32: + { + const int32_t* data = tensor.GetData(); + std::transform(data, data + elementCount, result.begin(), [](auto v) {return static_cast(v); }); + } + break; + + case MLOperatorTensorDataType::UInt32: + { + const uint32_t* data = tensor.GetData(); + std::transform(data, data + elementCount, result.begin(), [](auto v) {return static_cast(v); }); + } + break; + + case MLOperatorTensorDataType::Int64: + { + const int64_t* data = tensor.GetData(); + std::transform(data, data + elementCount, result.begin(), [](auto v) {return static_cast(v); }); + } + break; + + case MLOperatorTensorDataType::UInt64: + { + const uint64_t* data = tensor.GetData(); + std::transform(data, data + elementCount, result.begin(), [](auto v) {return static_cast(v); }); + } + break; + + default: + ML_INVALID_ARGUMENT("Expecting CPU local tensor of type float32."); + break; + } + } + + void DowncastDimensions(gsl::span inputDimensions, std::vector& outputDimensions) + { + outputDimensions.reserve(inputDimensions.size()); + outputDimensions.clear(); + + for (int64_t dim : inputDimensions) + { + outputDimensions.push_back(gsl::narrow_cast(std::clamp(dim, INT32_MIN, INT32_MAX))); + } + } + int64_t IsFloatDataType(MLOperatorTensorDataType tensorDataType) { switch (tensorDataType) @@ -1132,7 +1132,7 @@ namespace OperatorHelper ML_CHECK_VALID_ARGUMENT(inputCount + 1 == m_components.size(), "Mismatch between input tensor count and string equation component count."); ML_CHECK_VALID_ARGUMENT(outputCount == 1, "EinSum expects exactly 1 output tensor."); - std::vector labelSizes(m_labelIndices.size(), static_cast(INT_MIN)); + std::vector labelSizes(m_labelIndices.size(), UINT_MAX); // Read every input tensor, comparing labels to ensure consistent sizes from the equation parsed earlier. for (uint32_t i = 0; i < inputCount; ++i) @@ -1153,7 +1153,7 @@ namespace OperatorHelper uint32_t labelIndex = labelIndices[j]; assert(labelIndex < labelSizes.size()); - if (labelSizes[labelIndex] == static_cast(INT_MIN)) + if (labelSizes[labelIndex] == UINT_MAX) { labelSizes[labelIndex] = dimensionSize; }