Warn on node EP silent fallback from preferred provider (#10831)

* Warn on node EP fallback from preferred provider
* Clarify with comment
* Update to ORT's weird coding style for ragged parameter wrap
* Android build error: unused parameter ‘providers’
* Update logic to be more robust
* Updates from Pranav's feedback about messaging to rerun with verbose and respecting explicit vs implicit EP addition. Also merge from main.
* brace style patch up
* Update with feedback from Pranav and Scott McKay
* Restore node_placement_set after realizing it only applies when is_verbose is true
* Fix build warning on Android
* Renamed to node_placement_provider_set per Pranav's suggestion
This commit is contained in:
Dwayne Robinson 2022-09-13 15:53:17 -07:00 committed by GitHub
parent 78bc53f91d
commit 568950e28c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 6 deletions

View file

@ -88,6 +88,12 @@ class ExecutionProviders {
const std::vector<std::string>& GetIds() const { return exec_provider_ids_; }
const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; }
bool GetCpuProviderWasImplicitlyAdded() const { return cpu_execution_provider_was_implicitly_added_; }
void SetCpuProviderWasImplicitlyAdded(bool cpu_execution_provider_was_implicitly_added) {
cpu_execution_provider_was_implicitly_added_ = cpu_execution_provider_was_implicitly_added;
}
private:
// Some compilers emit incomprehensive output if this is allowed
// with a container that has unique_ptr or something move-only.
@ -99,5 +105,9 @@ class ExecutionProviders {
// maps for fast lookup of an index into exec_providers_
std::unordered_map<std::string, size_t> provider_idx_map_;
// Whether the CPU provider was implicitly added to a session for fallback (true),
// or whether it was explicitly added by the caller.
bool cpu_execution_provider_was_implicitly_added_ = false;
};
} // namespace onnxruntime

View file

@ -1137,9 +1137,11 @@ static void ComputeConstantInitializerUseCount(const Graph& graph, InlinedHashMa
}
using NodePlacementMap = std::unordered_map<std::string, std::vector<std::string>>;
using NodePlacementSet = std::unordered_set<std::string>;
static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_verbose,
NodePlacementMap& node_placements) {
NodePlacementMap& node_placements,
NodePlacementSet& node_placement_provider_set) {
for (const auto& node : graph.Nodes()) {
const auto& node_provider = node.GetExecutionProviderType();
if (node_provider.empty()) {
@ -1148,6 +1150,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver
node.OpType(), "(", node.SinceVersion(), ") node with name '", node.Name(), "'");
}
node_placement_provider_set.insert(node_provider);
#if !defined(ORT_MINIMAL_BUILD)
if (is_verbose) { // TODO: should we disable this if the number of nodes is above a certain threshold?
const std::string node_str = node.OpType() + " (" + node.Name() + ")";
@ -1159,7 +1163,8 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver
if (node.ContainsSubgraph()) {
const auto subgraphs = node.GetSubgraphs();
for (const auto& subgraph : subgraphs) {
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements));
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(*subgraph, is_verbose, node_placements,
node_placement_provider_set));
}
}
}
@ -1167,8 +1172,10 @@ static Status VerifyEachNodeIsAssignedToAnEpImpl(const Graph& graph, bool is_ver
return Status::OK();
}
static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger) {
static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::Logger& logger,
const ExecutionProviders& providers) {
NodePlacementMap node_placements{};
NodePlacementSet node_placement_provider_set{};
#if !defined(ORT_MINIMAL_BUILD)
const bool is_verbose_mode = logger.GetSeverity() == logging::Severity::kVERBOSE;
#else
@ -1176,7 +1183,7 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::
const bool is_verbose_mode = false;
#endif // !defined(ORT_MINIMAL_BUILD)
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements));
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEpImpl(graph, is_verbose_mode, node_placements, node_placement_provider_set));
#if !defined(ORT_MINIMAL_BUILD)
// print placement info
@ -1196,6 +1203,17 @@ static Status VerifyEachNodeIsAssignedToAnEp(const Graph& graph, const logging::
}
#endif // !defined(ORT_MINIMAL_BUILD)
// Silent fallback from GPU/NPU to CPU nodes can cause performance issues due to memory copies and frequent stalls.
// If the user explicitly included the CPU provider anyway, then remain silent, but if it was implicitly added,
// and unexpected fallback happened to a non-preferred provider, warn the user.
size_t explicit_provider_count = providers.NumProviders() - (providers.GetCpuProviderWasImplicitlyAdded() ? 1 : 0);
if (node_placement_provider_set.size() > explicit_provider_count) {
LOGS(logger, WARNING) << "Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.";
if (!is_verbose_mode) {
LOGS(logger, WARNING) << "Rerunning with verbose output on a non-minimal build will show node assignments.";
}
}
return Status::OK();
}
@ -1213,10 +1231,10 @@ Status SessionState::FinalizeSessionState(const std::basic_string<PATH_CHAR_TYPE
if (serialized_session_state) {
ORT_RETURN_IF_ERROR(LoadFromOrtFormat(*serialized_session_state, kernel_registry_manager));
// LoadFromOrtFormat() may assign node EPs so check afterwards
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEp(graph_, logger_));
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEp(graph_, logger_, execution_providers_));
} else {
#if !defined(ORT_MINIMAL_BUILD)
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEp(graph_, logger_));
ORT_RETURN_IF_ERROR(VerifyEachNodeIsAssignedToAnEp(graph_, logger_, execution_providers_));
ORT_RETURN_IF_ERROR(PopulateKernelCreateInfo(kernel_registry_manager, saving_ort_format));
#else
ORT_UNUSED_PARAMETER(graph_location);

View file

@ -1233,6 +1233,7 @@ common::Status InferenceSession::Initialize() {
CPUExecutionProviderInfo epi{session_options_.enable_cpu_mem_arena};
auto p_cpu_exec_provider = std::make_unique<CPUExecutionProvider>(epi);
ORT_RETURN_IF_ERROR_SESSIONID_(RegisterExecutionProvider(std::move(p_cpu_exec_provider)));
execution_providers_.SetCpuProviderWasImplicitlyAdded(true);
}
// re-acquire mutex