From 568950e28cbd03711b07b0ceb72adce514fdbc2c Mon Sep 17 00:00:00 2001 From: Dwayne Robinson Date: Tue, 13 Sep 2022 15:53:17 -0700 Subject: [PATCH] Warn on node EP silent fallback from preferred provider (#10831) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Warn on node EP fallback from preferred provider * Clarify with comment * Update to ORT's weird coding style for ragged parameter wrap * Android build error: unused parameter ‘providers’ * Update logic to be more robust * Updates from Pranav's feedback about messaging to rerun with verbose and respecting explicit vs implicit EP addition. Also merge from main. * brace style patch up * Update with feedback from Pranav and Scott McKay * Restore node_placement_set after realizing it only applies when is_verbose is true * Fix build warning on Android * Renamed to node_placement_provider_set per Pranav's suggestion --- .../core/framework/execution_providers.h | 10 +++++++ onnxruntime/core/framework/session_state.cc | 30 +++++++++++++++---- onnxruntime/core/session/inference_session.cc | 1 + 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/framework/execution_providers.h b/onnxruntime/core/framework/execution_providers.h index 80e6a28baa..8a26944ef5 100644 --- a/onnxruntime/core/framework/execution_providers.h +++ b/onnxruntime/core/framework/execution_providers.h @@ -88,6 +88,12 @@ class ExecutionProviders { const std::vector& GetIds() const { return exec_provider_ids_; } const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; } + bool GetCpuProviderWasImplicitlyAdded() const { return cpu_execution_provider_was_implicitly_added_; } + + void SetCpuProviderWasImplicitlyAdded(bool cpu_execution_provider_was_implicitly_added) { + cpu_execution_provider_was_implicitly_added_ = cpu_execution_provider_was_implicitly_added; + } + private: // Some compilers emit incomprehensive output if this is allowed // with a container that has unique_ptr or something move-only. @@ -99,5 +105,9 @@ class ExecutionProviders { // maps for fast lookup of an index into exec_providers_ std::unordered_map provider_idx_map_; + + // Whether the CPU provider was implicitly added to a session for fallback (true), + // or whether it was explicitly added by the caller. + bool cpu_execution_provider_was_implicitly_added_ = false; }; } // namespace onnxruntime diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 5f25d23225..15d15ef9a0 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -1137,9 +1137,11 @@ static void ComputeConstantInitializerUseCount(const Graph& graph, InlinedHashMa } using NodePlacementMap = std::unordered_map>; +using NodePlacementSet = std::unordered_set; static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_verbose, - NodePlacementMap& node_placements) { + NodePlacementMap& node_placements, + NodePlacementSet& node_placement_provider_set) { for (const auto& node : graph.Nodes()) { const auto& node_provider = node.GetExecutionProviderType(); if (node_provider.empty()) { @@ -1148,6 +1150,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver node.OpType(), "(", node.SinceVersion(), ") node with name '", node.Name(), "'"); } + node_placement_provider_set.insert(node_provider); + #if !defined(ORT_MINIMAL_BUILD) if (is_verbose) { // TODO: should we disable this if the number of nodes is above a certain threshold? const std::string node_str = node.OpType() + " (" + node.Name() + ")"; @@ -1159,7 +1163,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver if (node.ContainsSubgraph()) { const auto subgraphs = node.GetSubgraphs(); for (const auto& subgraph : subgraphs) { - ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements)); + ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements, + node_placement_provider_set)); } } } @@ -1167,8 +1172,10 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver return Status::OK(); } -static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger) { +static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger, + const ExecutionProviders& providers) { NodePlacementMap node_placements{}; + NodePlacementSet node_placement_provider_set{}; #if !defined(ORT_MINIMAL_BUILD) const bool is_verbose_mode = logger.GetSeverity() == logging::Severity::kVERBOSE; #else @@ -1176,7 +1183,7 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging:: const bool is_verbose_mode = false; #endif // !defined(ORT_MINIMAL_BUILD) - ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements)); + ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements, node_placement_provider_set)); #if !defined(ORT_MINIMAL_BUILD) // print placement info @@ -1196,6 +1203,17 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging:: } #endif // !defined(ORT_MINIMAL_BUILD) + // Silent fallback from GPU/NPU to CPU nodes can cause performance issues due to memory copies and frequent stalls. + // If the user explicitly included the CPU provider anyway, then remain silent, but if it was implicitly added, + // and unexpected fallback happened to a non-preferred provider, warn the user. + size_t explicit_provider_count = providers.NumProviders() - (providers.GetCpuProviderWasImplicitlyAdded() ? 1 : 0); + if (node_placement_provider_set.size() > explicit_provider_count) { + LOGS(logger, WARNING) << "Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf."; + if (!is_verbose_mode) { + LOGS(logger, WARNING) << "Rerunning with verbose output on a non-minimal build will show node assignments."; + } + } + return Status::OK(); } @@ -1213,10 +1231,10 @@ Status SessionState::FinalizeSessionState(const std::basic_string(epi); ORT_RETURN_IF_ERROR_SESSIONID_(RegisterExecutionProvider(std::move(p_cpu_exec_provider))); + execution_providers_.SetCpuProviderWasImplicitlyAdded(true); } // re-acquire mutex