diff --git a/onnxruntime/core/framework/execution_providers.h b/onnxruntime/core/framework/execution_providers.h index 80e6a28baa..8a26944ef5 100644 --- a/onnxruntime/core/framework/execution_providers.h +++ b/onnxruntime/core/framework/execution_providers.h @@ -88,6 +88,12 @@ class ExecutionProviders { const std::vector& GetIds() const { return exec_provider_ids_; } const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; } + bool GetCpuProviderWasImplicitlyAdded() const { return cpu_execution_provider_was_implicitly_added_; } + + void SetCpuProviderWasImplicitlyAdded(bool cpu_execution_provider_was_implicitly_added) { + cpu_execution_provider_was_implicitly_added_ = cpu_execution_provider_was_implicitly_added; + } + private: // Some compilers emit incomprehensive output if this is allowed // with a container that has unique_ptr or something move-only. @@ -99,5 +105,9 @@ class ExecutionProviders { // maps for fast lookup of an index into exec_providers_ std::unordered_map provider_idx_map_; + + // Whether the CPU provider was implicitly added to a session for fallback (true), + // or whether it was explicitly added by the caller. + bool cpu_execution_provider_was_implicitly_added_ = false; }; } // namespace onnxruntime diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 5f25d23225..15d15ef9a0 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -1137,9 +1137,11 @@ static void ComputeConstantInitializerUseCount(const Graph& graph, InlinedHashMa } using NodePlacementMap = std::unordered_map>; +using NodePlacementSet = std::unordered_set; static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_verbose, - NodePlacementMap& node_placements) { + NodePlacementMap& node_placements, + NodePlacementSet& node_placement_provider_set) { for (const auto& node : graph.Nodes()) { const auto& node_provider = node.GetExecutionProviderType(); if (node_provider.empty()) { @@ -1148,6 +1150,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver node.OpType(), "(", node.SinceVersion(), ") node with name '", node.Name(), "'"); } + node_placement_provider_set.insert(node_provider); + #if !defined(ORT_MINIMAL_BUILD) if (is_verbose) { // TODO: should we disable this if the number of nodes is above a certain threshold? const std::string node_str = node.OpType() + " (" + node.Name() + ")"; @@ -1159,7 +1163,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver if (node.ContainsSubgraph()) { const auto subgraphs = node.GetSubgraphs(); for (const auto& subgraph : subgraphs) { - ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements)); + ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements, + node_placement_provider_set)); } } } @@ -1167,8 +1172,10 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver return Status::OK(); } -static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger) { +static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger, + const ExecutionProviders& providers) { NodePlacementMap node_placements{}; + NodePlacementSet node_placement_provider_set{}; #if !defined(ORT_MINIMAL_BUILD) const bool is_verbose_mode = logger.GetSeverity() == logging::Severity::kVERBOSE; #else @@ -1176,7 +1183,7 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging:: const bool is_verbose_mode = false; #endif // !defined(ORT_MINIMAL_BUILD) - ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements)); + ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements, node_placement_provider_set)); #if !defined(ORT_MINIMAL_BUILD) // print placement info @@ -1196,6 +1203,17 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging:: } #endif // !defined(ORT_MINIMAL_BUILD) + // Silent fallback from GPU/NPU to CPU nodes can cause performance issues due to memory copies and frequent stalls. + // If the user explicitly included the CPU provider anyway, then remain silent, but if it was implicitly added, + // and unexpected fallback happened to a non-preferred provider, warn the user. + size_t explicit_provider_count = providers.NumProviders() - (providers.GetCpuProviderWasImplicitlyAdded() ? 1 : 0); + if (node_placement_provider_set.size() > explicit_provider_count) { + LOGS(logger, WARNING) << "Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf."; + if (!is_verbose_mode) { + LOGS(logger, WARNING) << "Rerunning with verbose output on a non-minimal build will show node assignments."; + } + } + return Status::OK(); } @@ -1213,10 +1231,10 @@ Status SessionState::FinalizeSessionState(const std::basic_string(epi); ORT_RETURN_IF_ERROR_SESSIONID_(RegisterExecutionProvider(std::move(p_cpu_exec_provider))); + execution_providers_.SetCpuProviderWasImplicitlyAdded(true); } // re-acquire mutex