mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-27 03:11:28 +00:00
Add UseCount for External Outputs (#6894)
* add usecount for external outputs * ut
This commit is contained in:
parent
f1ade14e44
commit
91c6a330c0
2 changed files with 48 additions and 10 deletions
|
|
@ -427,6 +427,15 @@ class PlannerImpl {
|
|||
plan_.allocation_plan.resize(num_ml_values);
|
||||
}
|
||||
|
||||
bool ExternalOutputs(const Node& node) const {
|
||||
const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
|
||||
if (ci.kernel_def == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ci.kernel_def->ExternalOutputs();
|
||||
}
|
||||
|
||||
Status ComputeUseCounts() {
|
||||
// Note: for every ml-value, its definition must appear before all its uses in a topological sort of a valid model
|
||||
std::unordered_set<std::string> graph_inputs;
|
||||
|
|
@ -511,12 +520,14 @@ class PlannerImpl {
|
|||
|
||||
auto outputs = pnode->OutputDefs();
|
||||
auto num_outputs = outputs.size();
|
||||
bool external_outputs = ExternalOutputs(*pnode);
|
||||
for (size_t i = 0; i < num_outputs; ++i) {
|
||||
auto* node_output = outputs[i];
|
||||
if (!node_output->Exists()) continue;
|
||||
OrtValueIndex index = Index(node_output->Name());
|
||||
ProcessDef(index, node_output);
|
||||
++UseCount(index);
|
||||
// Ensures external outputs will not be reused.
|
||||
UseCount(index) += (external_outputs ? 2 : 1);
|
||||
auto allocator = exec_provider->GetAllocator(0, p_kernel_def->OutputMemoryType(i));
|
||||
ORT_ENFORCE(allocator);
|
||||
plan_.SetLocation(static_cast<size_t>(index),
|
||||
|
|
@ -600,15 +611,6 @@ class PlannerImpl {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
bool ExternalOutputs(const Node& node) const {
|
||||
const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
|
||||
if (ci.kernel_def == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ci.kernel_def->ExternalOutputs();
|
||||
}
|
||||
|
||||
// Should only be used after ProcessDef()
|
||||
Status ComputeReusePlan() {
|
||||
std::vector<SequentialExecutionPlan::NodeExecutionPlan>& execution_plan(plan_.execution_plan);
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@ class PlannerTest : public ::testing::Test {
|
|||
|
||||
std::unique_ptr<::onnxruntime::KernelDef> std_kernel_; // a unary kernel with no-aliasing and no-in-place
|
||||
std::unique_ptr<::onnxruntime::KernelDef> in_place_kernel_; // a unary kernel with in-place
|
||||
std::unique_ptr<::onnxruntime::KernelDef> external_outputs_kernel_; // an unary kernel with external outputs
|
||||
|
||||
std::unordered_map<std::string, onnxruntime::NodeArg*> name_to_arg_;
|
||||
std::vector<std::unique_ptr<UnaryNode>> nodes_;
|
||||
|
|
@ -178,6 +179,8 @@ class PlannerTest : public ::testing::Test {
|
|||
std_kernel_ = KernelDefBuilder().SetName("Transpose").Provider(kCpuExecutionProvider).SinceVersion(1, 10).Build();
|
||||
in_place_kernel_ =
|
||||
KernelDefBuilder().SetName("Relu").Provider(kCpuExecutionProvider).SinceVersion(1, 10).MayInplace(0, 0).Build();
|
||||
external_outputs_kernel_ =
|
||||
KernelDefBuilder().SetName("Tanh").Provider(kCpuExecutionProvider).SinceVersion(1, 10).ExternalOutputs().Build();
|
||||
CPUExecutionProviderInfo epi;
|
||||
auto execution_provider = onnxruntime::make_unique<CPUExecutionProvider>(epi);
|
||||
execution_providers_.Add("CPUExecutionProvider", std::move(execution_provider));
|
||||
|
|
@ -209,6 +212,10 @@ class PlannerTest : public ::testing::Test {
|
|||
return AddNode(*in_place_kernel_, input, output);
|
||||
}
|
||||
|
||||
onnxruntime::Node* AddExternalOutputsNode(std::string& input, std::string& output) {
|
||||
return AddNode(*external_outputs_kernel_, input, output);
|
||||
}
|
||||
|
||||
void BindKernel(onnxruntime::Node* p_node, ::onnxruntime::KernelDef& kernel_def, KernelRegistry* reg,
|
||||
std::unordered_map<NodeIndex, gsl::not_null<const KernelCreateInfo*>>& kernel_create_info_map) {
|
||||
const IExecutionProvider* ep = execution_providers_.Get(*p_node);
|
||||
|
|
@ -403,6 +410,35 @@ TEST_F(PlannerTest, InPlaceTest) {
|
|||
CheckFreed(2, {X2});
|
||||
}
|
||||
|
||||
TEST_F(PlannerTest, ExternalOutputsTest) {
|
||||
// tensor variables:
|
||||
std::string X1("X1"), X2("X2"), X3("X3"), X4("X4");
|
||||
|
||||
// graph structure:
|
||||
AddExternalOutputsNode(X1, X2); // external-outputs operator; X1: input; X2: temporary
|
||||
AddNormalNode(X2, X3); // normal operator; X3: temporary
|
||||
AddNormalNode(X3, X4); // normal operator; X4: output
|
||||
|
||||
// simulate shape-inference results:
|
||||
Shape shape1{"M", "N"};
|
||||
auto shape = &shape1.value;
|
||||
SetShape({{X1, shape}, {X2, shape}, {X3, shape}, {X4, shape}});
|
||||
|
||||
CreatePlan();
|
||||
|
||||
// check allocation kind:
|
||||
CheckAllocKind(X1, AllocKind::kPreExisting);
|
||||
CheckAllocKind(X2, AllocKind::kPreExisting);
|
||||
CheckAllocKind(X3, AllocKind::kAllocate);
|
||||
CheckAllocKind(X4, AllocKind::kAllocateOutput);
|
||||
|
||||
// check each ml-value is freed at appropriate step
|
||||
// X2 will not be reused and will not be freed. X3 will be allocated and will be freed.
|
||||
CheckFreed(0, {});
|
||||
CheckFreed(1, {});
|
||||
CheckFreed(2, {X3});
|
||||
}
|
||||
|
||||
// InPlaceSizeMismatchTest: Check that Inplace reuse is not allowed when sizes don't match.
|
||||
// Also tests reuse of disjoint lifetime tensors.
|
||||
TEST_F(PlannerTest, InPlaceSizeMismatchTest) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue