mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
Correctly handle implicit inputs for fused nodes (#2390)
* Correctly handle implicit inputs for fused nodes Previously, nuphar's partitioning function didn't include node's implicit inputs into the inputs list of MetaDef, and hence a crash was triggered in the onnx graph checker. This commit fixed the issue. Furthermore, it also fixed a related issue where we didn't add implicit inputs into graph_inputs_excluding_initializers_ in Graph::SetGraphInputsOutputs. the issue was that graph_inputs_including_initializers_ populated by SetInputs (e.g. called by FunctionImpl::FunctionImpl) may contain implicit inputs which were not of any node's initializers in the graph. Because they were not part of any initializers, these implicit inputs couldn't be visited by going through all nodes' inputs. Consequently, they would *not* be added into graph_inputs_excluding_initializers_. We fixed the issue by first copying the populated graph_inputs_including_initializers_ into graph_inputs_excluding_initalizers_, which then had both initializers and non-initializers as its initial content. Later, we erase initializers from the list. In this way, we can ensure all implicit inputs to remain in graph_inputs_excluding_initializers_. * refined comments and fixed duplicates Address CR by revisiting comments in terms of implicit inputs Also fixed an issue by skipping duplicates while copying inputs from graph_inputs_including_initializers_. * address CR explain why we need to collect nodes' implicit inputs * don't rely on pointer values for iterating std::set Previously, openvino relied on iterating a set of NodeArg pointers to construct inputs and outputs for a fused graph. It could cause non-determinism. The reason was that although iterating std::set by itself is stable, pointer values of NodeArgs may vary. Consequently, we could end up visiting the set's elements in different orders for different runs for the same test, which resulted in constructing inputs (and outputs) with different orders to the fused graph. For example, for the same test, we may have inputs [A, B] in some runs but inputs[B, A] in others. Let's use std::string as the key type to avoid such nondeterminism. This commit also added implicit inputs into meta->inputs while returning the capability from the openvino provider. * Fixed another latent issue in openvino's GetCapability function The issue was that we couldn't simply erase fused_inputs and fused_outputs while iterating the nodes. For example, an output NodeArg may have multiple uses, and it's wrong if we erase it from fused_outputs when we encounter only one of its uses as input.
This commit is contained in:
parent
6bcf95477e
commit
d486481455
4 changed files with 110 additions and 50 deletions
|
|
@ -140,18 +140,21 @@ std::unique_ptr<ComputeCapability> ToCapacity(const onnxruntime::GraphViewer& gr
|
|||
|
||||
for (const auto& node_index : subgraph->nodes) {
|
||||
const auto& node = *graph.GetNode(node_index);
|
||||
auto process_input_fn =
|
||||
[&meta_def, &node, &node_indices](const onnxruntime::NodeArg& def, size_t) {
|
||||
const onnxruntime::Node* input_node = GetInputNode(node, &def);
|
||||
bool input_from_subgraph = (input_node && node_indices.count(input_node->Index()));
|
||||
if (!input_from_subgraph) {
|
||||
// input is from weights or outside of graph
|
||||
meta_def->inputs.push_back(def.Name());
|
||||
}
|
||||
return Status::OK();
|
||||
};
|
||||
// handle current graph's inputs
|
||||
node.ForEachWithIndex(
|
||||
node.InputDefs(),
|
||||
[&meta_def, &node, &node_indices](const onnxruntime::NodeArg& def, size_t) {
|
||||
const onnxruntime::Node* input_node = GetInputNode(node, &def);
|
||||
bool input_from_subgraph = (input_node && node_indices.count(input_node->Index()));
|
||||
if (!input_from_subgraph) {
|
||||
// input is from weights or outside of graph
|
||||
meta_def->inputs.push_back(def.Name());
|
||||
}
|
||||
return Status::OK();
|
||||
});
|
||||
node.ForEachWithIndex(node.InputDefs(), process_input_fn);
|
||||
// nodes' implicit inputs also need to be collected. They need to
|
||||
// be promoted to being explicit inputs for everything to work.
|
||||
node.ForEachWithIndex(node.ImplicitInputDefs(), process_input_fn);
|
||||
|
||||
// Handle outouts
|
||||
// two cases are considerd as outputs
|
||||
|
|
@ -211,15 +214,16 @@ std::unique_ptr<ComputeCapability> ToCapacity(const onnxruntime::GraphViewer& gr
|
|||
// Therefore, the all inner nested subgraph's initializers should be already in the immediate nested subgraph's inputs.
|
||||
if (nullptr != immediate_nested_subgraph) {
|
||||
for (auto& n : immediate_nested_subgraph->Nodes()) {
|
||||
n.ForEachWithIndex(
|
||||
n.InputDefs(),
|
||||
[&meta_def, &all_initializers](const onnxruntime::NodeArg& def, size_t) {
|
||||
auto iter = all_initializers.find(def.Name());
|
||||
if (iter != all_initializers.end()) {
|
||||
meta_def->inputs.push_back(def.Name());
|
||||
}
|
||||
return Status::OK();
|
||||
});
|
||||
auto add_input_fn =
|
||||
[&meta_def, &all_initializers](const onnxruntime::NodeArg& def, size_t) {
|
||||
auto iter = all_initializers.find(def.Name());
|
||||
if (iter != all_initializers.end()) {
|
||||
meta_def->inputs.push_back(def.Name());
|
||||
}
|
||||
return Status::OK();
|
||||
};
|
||||
n.ForEachWithIndex(n.InputDefs(), add_input_fn);
|
||||
n.ForEachWithIndex(n.ImplicitInputDefs(), add_input_fn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2446,10 +2446,6 @@ void Graph::CleanUnusedInitializers() {
|
|||
|
||||
GSL_SUPPRESS(es .84) // warning about ignoring return value from insert(...)
|
||||
Status Graph::SetGraphInputsOutputs() {
|
||||
// Reset graph inputs excluding initializers/value_info.
|
||||
graph_inputs_excluding_initializers_.clear();
|
||||
value_info_.clear();
|
||||
|
||||
// Flag indicates that this graph is loaded from model file.
|
||||
// If it's true, then graph inputs and outputs will keep the same
|
||||
// as what are specified in the model, otherwise, graph inputs
|
||||
|
|
@ -2457,6 +2453,10 @@ Status Graph::SetGraphInputsOutputs() {
|
|||
const bool loaded_from_model_file = GraphLoadedFromModelFile(graph_proto_);
|
||||
|
||||
if (loaded_from_model_file) {
|
||||
// Reset graph inputs excluding initializers/value_info.
|
||||
graph_inputs_excluding_initializers_.clear();
|
||||
value_info_.clear();
|
||||
|
||||
// Reset graph inputs/outputs.
|
||||
graph_inputs_including_initializers_.clear();
|
||||
graph_outputs_.clear();
|
||||
|
|
@ -2531,13 +2531,36 @@ Status Graph::SetGraphInputsOutputs() {
|
|||
}
|
||||
|
||||
} else {
|
||||
// Reset value_info.
|
||||
value_info_.clear();
|
||||
|
||||
std::unordered_map<std::string, size_t> output_name_to_node_arg_index;
|
||||
std::vector<const NodeArg*> output_node_args_in_order;
|
||||
|
||||
// if something is coming from outer scope, consider it already added
|
||||
std::unordered_set<std::string> added_input_names{outer_scope_node_arg_names_};
|
||||
graph_inputs_excluding_initializers_.clear();
|
||||
if (!graph_inputs_manually_set_) {
|
||||
graph_inputs_including_initializers_.clear();
|
||||
} else {
|
||||
// If we've set graph_inputs_including_initializers_ by calling SetInputs,
|
||||
// we copy its non-duplicate elements to graph_inputs_excluding_initializers_.
|
||||
// Later, we will erase initializers from graph_inputs_excluding_initializers_
|
||||
// if graph_inputs_manually_set_ is true.
|
||||
// In this way, we can ensure graph_inputs_excluding_initializers_ is the full
|
||||
// set of inputs less initializers, which could be a graph input used only
|
||||
// by a subgraph and thereby only an implicit input to a node, or a graph input
|
||||
// not used anywhere.
|
||||
// We also make sure graph_inputs_excluding_initializers_ list doesn't have any
|
||||
// duplicate names.
|
||||
std::unordered_set<std::string> existing_names;
|
||||
for (auto arg : graph_inputs_including_initializers_) {
|
||||
const std::string& name = arg->Name();
|
||||
if (existing_names.count(name) == 0) {
|
||||
graph_inputs_excluding_initializers_.push_back(arg);
|
||||
existing_names.insert(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!graph_outputs_manually_set_) {
|
||||
|
|
@ -2581,6 +2604,10 @@ Status Graph::SetGraphInputsOutputs() {
|
|||
if (!is_initializer || ir_version_ < 4) {
|
||||
graph_inputs_including_initializers_.push_back(input_arg);
|
||||
}
|
||||
if (!is_initializer) {
|
||||
// If input_arg is not of an initializer, we add it into graph_inputs_excluding_initializers_.
|
||||
graph_inputs_excluding_initializers_.push_back(input_arg);
|
||||
}
|
||||
} else {
|
||||
// graph_inputs_including_initializers_ has been manually populated by SetInputs.
|
||||
// Validation: the <input_arg> must be in graph inputs or initializers when it's manually set.
|
||||
|
|
@ -2591,13 +2618,18 @@ Status Graph::SetGraphInputsOutputs() {
|
|||
return Status(ONNXRUNTIME, FAIL,
|
||||
name + " must be either specified in graph inputs or graph initializers.");
|
||||
}
|
||||
} else {
|
||||
// If arg_input is of an initializer, we remove it from graph_inputs_excluding_initializers_
|
||||
// whose initial content has both initializers and non-initializers.
|
||||
auto input_pos = std::find(graph_inputs_excluding_initializers_.begin(),
|
||||
graph_inputs_excluding_initializers_.end(),
|
||||
input_arg);
|
||||
if (input_pos != graph_inputs_excluding_initializers_.end()) {
|
||||
graph_inputs_excluding_initializers_.erase(input_pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_initializer) {
|
||||
graph_inputs_excluding_initializers_.push_back(input_arg);
|
||||
}
|
||||
|
||||
added_input_names.insert(name);
|
||||
}
|
||||
} else if (graph_output_args.erase(output_arg_iter->first) >= 1) {
|
||||
|
|
|
|||
|
|
@ -523,7 +523,22 @@ std::vector<std::unique_ptr<ComputeCapability>> OpenVINOExecutionProvider::GetCa
|
|||
|
||||
auto model_proto = GetModelProtoFromFusedNode(graph_viewer);
|
||||
|
||||
std::set<const onnxruntime::NodeArg *> fused_inputs, fused_outputs;
|
||||
// Make sure *not* to use a pointer type for the key, which can cause nondeterminism
|
||||
// for iterating the set elements. The reason is that although iterating std::set by
|
||||
// itself is stable, pointer values of NodeArgs may vary. Consequently, we may end up
|
||||
// visiting the set's elements in different orders for different runs for the same test,
|
||||
// which will result in constructing inputs (and outputs) with different orders to
|
||||
// the fused graph. For example, for the same test, we may have inputs [A, B] in some
|
||||
// runs but inputs[B, A] in others.
|
||||
// Let's use std::string as the key type to avoid such nondeterminism.
|
||||
std::set<std::string> fused_inputs, fused_outputs;
|
||||
|
||||
// Keep inputs and outputs inside the fused graph, i.e. those NodeArgs
|
||||
// being consumed by nodes within the fused graph. Note that we cannot
|
||||
// simply erase those inner args while iterating the nodes. For example,
|
||||
// an output arg may have multiple uses, it would be wrong if we erase it
|
||||
// from fused_outputs when we encounter only one of its uses as inputs.
|
||||
std::set<std::string> inner_inputs, inner_outputs;
|
||||
|
||||
try {
|
||||
CheckGraphSupported(graph_viewer, device_id);
|
||||
|
|
@ -552,21 +567,32 @@ std::vector<std::unique_ptr<ComputeCapability>> OpenVINOExecutionProvider::GetCa
|
|||
sub_graph->nodes.push_back(index);
|
||||
const auto node = graph_viewer.GetNode(index);
|
||||
|
||||
// Track graph inputs and initializers
|
||||
for (const auto& input_def : node->InputDefs()) {
|
||||
if (fused_outputs.find(input_def) == fused_outputs.end()) {
|
||||
fused_inputs.insert(input_def);
|
||||
} else {
|
||||
fused_outputs.erase(input_def);
|
||||
}
|
||||
}
|
||||
auto process_input_fn =
|
||||
[&fused_inputs, &fused_outputs, &inner_inputs, &inner_outputs, &node](
|
||||
const onnxruntime::NodeArg& input_def, size_t) {
|
||||
|
||||
const auto& name = input_def.Name();
|
||||
if (fused_outputs.find(name) == fused_outputs.end()) {
|
||||
fused_inputs.insert(name);
|
||||
} else {
|
||||
inner_outputs.insert(name);
|
||||
}
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
// Track graph inputs (both explicit and implicit) and initializers
|
||||
node->ForEachWithIndex(node->InputDefs(), process_input_fn);
|
||||
// Implicit inputs will be promoted to be explicit inputs of the fused
|
||||
// graph later by ORT.
|
||||
node->ForEachWithIndex(node->ImplicitInputDefs(), process_input_fn);
|
||||
|
||||
// Track graph outputs
|
||||
for (const auto& output_def : node->OutputDefs()) {
|
||||
if (fused_inputs.find(output_def) == fused_inputs.end()) {
|
||||
fused_outputs.insert(output_def);
|
||||
const auto& name = output_def->Name();
|
||||
if (fused_inputs.find(name) == fused_inputs.end()) {
|
||||
fused_outputs.insert(name);
|
||||
} else {
|
||||
fused_inputs.erase(output_def);
|
||||
inner_inputs.insert(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -588,12 +614,16 @@ std::vector<std::unique_ptr<ComputeCapability>> OpenVINOExecutionProvider::GetCa
|
|||
meta_def->domain = "OpenVINO";
|
||||
meta_def->since_version = 1;
|
||||
|
||||
for (auto input : fused_inputs) {
|
||||
meta_def->inputs.push_back(input->Name());
|
||||
for (const auto& name : fused_inputs) {
|
||||
if (inner_inputs.count(name) == 0) {
|
||||
meta_def->inputs.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto output : fused_outputs) {
|
||||
meta_def->outputs.push_back(output->Name());
|
||||
for (const auto& name : fused_outputs) {
|
||||
if (inner_outputs.count(name) == 0) {
|
||||
meta_def->outputs.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
sub_graph->SetMetaDef(meta_def);
|
||||
|
|
|
|||
|
|
@ -536,12 +536,6 @@ static void OuterScopeAccess_ShapeInMainGraph_NoTypeAndShapeInSubgraph(bool is_v
|
|||
|
||||
options.include_outer_scope_add = true;
|
||||
|
||||
// Scan9.OuterScopeAccess_ShapeInMainGraph_NoTypeAndShapeInSubgraph fails with nuphar. See Bug 525222.
|
||||
// Remove this once that is fixed.
|
||||
if (is_v8 == false) {
|
||||
options.excluded_provider_types.insert(kNupharExecutionProvider);
|
||||
}
|
||||
|
||||
ShortSequenceOneInBatchOneLoopStateVar(options);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue