Merged PR 5861108: Allow nodes in DML graph partitions with empty shapes on constant CPU inputs

Resize is spec'd to ignore the "roi" tensor in certain modes. For some reason, converters are specifying an arbitrary value for this tensor, even though it's optional. This makes the graph partitioner skip a check for empty shape dimensions for tensors such as this, which the DML kernel registers as consuming as CPU inputs. Otherwise, the node is not included in DML graph partitions, because the DML graph doesn't handle empty dimensions. Related work items: #32221164
2026-06-06 00:03:22 +00:00 · 2021-03-31 19:06:08 +00:00 · 2021-03-31 19:06:08 +00:00 · f87527c0df
commit f87527c0df
parent 915931384a
3 changed files with 7 additions and 6 deletions
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp
@ -477,9 +477,9 @@ namespace Dml
                        std::optional<uint32_t> requiredInputCount = internalRegInfo->graphNodeFactoryRegistration->requiredInputCount;
                        if (requiredCpuInputsConstant &&
                            TryGetStaticInputShapes( node, graphNodeProperty.first->second.inputShapes) &&
-                            !ContainsEmptyDimensions(graphNodeProperty.first->second.inputShapes) &&
+                            !ContainsEmptyDimensions(graphNodeProperty.first->second.inputShapes, internalRegInfo->requiredConstantCpuInputs) &&
                            TryGetStaticOutputShapes(node, graphNodeProperty.first->second.outputShapes) &&
-                            !ContainsEmptyDimensions(graphNodeProperty.first->second.outputShapes) &&
+                            !ContainsEmptyDimensions(graphNodeProperty.first->second.outputShapes, internalRegInfo->requiredConstantCpuInputs) &&
                            (requiredInputCount == std::nullopt || *requiredInputCount == node.InputDefs().size()))
                        {
                            *isDmlGraphNode = true;
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
@ -1876,12 +1876,13 @@ bool TryGetStaticOutputShapes(const onnxruntime::Node& node, EdgeShapes& outputS
  return true;
 }

-bool ContainsEmptyDimensions(const EdgeShapes& shapes) {
+bool ContainsEmptyDimensions(const EdgeShapes& shapes, gsl::span<const uint32_t> ignoredShapeIndices) {
  for (size_t i = 0; i < shapes.EdgeCount(); i++) {
    const std::vector<uint32_t>& shape = shapes.GetShape(i);

-    if (std::find(shape.begin(), shape.end(), 0) != shape.end()) {
-      return true;
+    if (std::find(shape.begin(), shape.end(), 0) != shape.end() && 
+        std::find(ignoredShapeIndices.begin(), ignoredShapeIndices.end(), i) == ignoredShapeIndices.end()) {
+          return true;
    }
  }

--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
@ -637,7 +637,7 @@ onnx::AttributeProto_AttributeType ToProto(MLOperatorAttributeType type);

 bool TryGetStaticInputShapes(const onnxruntime::Node& node, EdgeShapes& inputShapes);
 bool TryGetStaticOutputShapes(const onnxruntime::Node& node, EdgeShapes& outputShapes);
-bool ContainsEmptyDimensions(const EdgeShapes& shapes);
+bool ContainsEmptyDimensions(const EdgeShapes& shapes, gsl::span<const uint32_t> ignoredShapeIndices = gsl::span<const uint32_t>());

 std::tuple<std::unique_ptr<std::byte[]>, size_t> UnpackTensor(const onnx::TensorProto& initializer);
 }    // namespace Windows::AI::MachineLearning::Adapter