Minor optimization: if a node has already been placed, there's no need to find a kernel for it. (#2417)

2026-07-30 20:18:08 +00:00 · 2019-11-17 20:08:33 -08:00 · 2019-11-17 20:08:33 -08:00 · f268e69c79
commit f268e69c79
parent 5ab7041fa7
1 changed files with 3 additions and 2 deletions
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@ -1204,8 +1204,9 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
      continue;

    const auto& node = *p_node;
-    const auto* cuda_kernel_def = GetKernelRegistry()->TryFindKernel(node, Type());
-    if (cuda_kernel_def == nullptr || !node.GetExecutionProviderType().empty()) {
+    const KernelCreateInfo* cuda_kernel_def = nullptr;
+    if (!node.GetExecutionProviderType().empty() ||
+        !(cuda_kernel_def = GetKernelRegistry()->TryFindKernel(node, Type()))) {
      // node is not in cuda exeuction provider if no kernel def found,
      // or if other execution provider already assigned to it
      defs_outside_cuda.insert(node.OutputDefs().cbegin(), node.OutputDefs().cend());