Add UseCount for External Outputs (#6894)

* add usecount for external outputs

* ut
This commit is contained in:
Vincent Wang 2021-03-09 17:06:27 +08:00 committed by GitHub
parent f1ade14e44
commit 91c6a330c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 10 deletions

View file

@ -427,6 +427,15 @@ class PlannerImpl {
plan_.allocation_plan.resize(num_ml_values);
}
bool ExternalOutputs(const Node& node) const {
const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
if (ci.kernel_def == nullptr) {
return false;
}
return ci.kernel_def->ExternalOutputs();
}
Status ComputeUseCounts() {
// Note: for every ml-value, its definition must appear before all its uses in a topological sort of a valid model
std::unordered_set<std::string> graph_inputs;
@ -511,12 +520,14 @@ class PlannerImpl {
auto outputs = pnode->OutputDefs();
auto num_outputs = outputs.size();
bool external_outputs = ExternalOutputs(*pnode);
for (size_t i = 0; i < num_outputs; ++i) {
auto* node_output = outputs[i];
if (!node_output->Exists()) continue;
OrtValueIndex index = Index(node_output->Name());
ProcessDef(index, node_output);
++UseCount(index);
// Ensures external outputs will not be reused.
UseCount(index) += (external_outputs ? 2 : 1);
auto allocator = exec_provider->GetAllocator(0, p_kernel_def->OutputMemoryType(i));
ORT_ENFORCE(allocator);
plan_.SetLocation(static_cast<size_t>(index),
@ -600,15 +611,6 @@ class PlannerImpl {
return Status::OK();
}
bool ExternalOutputs(const Node& node) const {
const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
if (ci.kernel_def == nullptr) {
return false;
}
return ci.kernel_def->ExternalOutputs();
}
// Should only be used after ProcessDef()
Status ComputeReusePlan() {
std::vector<SequentialExecutionPlan::NodeExecutionPlan>& execution_plan(plan_.execution_plan);

View file

@ -156,6 +156,7 @@ class PlannerTest : public ::testing::Test {
std::unique_ptr<::onnxruntime::KernelDef> std_kernel_; // a unary kernel with no-aliasing and no-in-place
std::unique_ptr<::onnxruntime::KernelDef> in_place_kernel_; // a unary kernel with in-place
std::unique_ptr<::onnxruntime::KernelDef> external_outputs_kernel_; // an unary kernel with external outputs
std::unordered_map<std::string, onnxruntime::NodeArg*> name_to_arg_;
std::vector<std::unique_ptr<UnaryNode>> nodes_;
@ -178,6 +179,8 @@ class PlannerTest : public ::testing::Test {
std_kernel_ = KernelDefBuilder().SetName("Transpose").Provider(kCpuExecutionProvider).SinceVersion(1, 10).Build();
in_place_kernel_ =
KernelDefBuilder().SetName("Relu").Provider(kCpuExecutionProvider).SinceVersion(1, 10).MayInplace(0, 0).Build();
external_outputs_kernel_ =
KernelDefBuilder().SetName("Tanh").Provider(kCpuExecutionProvider).SinceVersion(1, 10).ExternalOutputs().Build();
CPUExecutionProviderInfo epi;
auto execution_provider = onnxruntime::make_unique<CPUExecutionProvider>(epi);
execution_providers_.Add("CPUExecutionProvider", std::move(execution_provider));
@ -209,6 +212,10 @@ class PlannerTest : public ::testing::Test {
return AddNode(*in_place_kernel_, input, output);
}
onnxruntime::Node* AddExternalOutputsNode(std::string& input, std::string& output) {
return AddNode(*external_outputs_kernel_, input, output);
}
void BindKernel(onnxruntime::Node* p_node, ::onnxruntime::KernelDef& kernel_def, KernelRegistry* reg,
std::unordered_map<NodeIndex, gsl::not_null<const KernelCreateInfo*>>& kernel_create_info_map) {
const IExecutionProvider* ep = execution_providers_.Get(*p_node);
@ -403,6 +410,35 @@ TEST_F(PlannerTest, InPlaceTest) {
CheckFreed(2, {X2});
}
TEST_F(PlannerTest, ExternalOutputsTest) {
// tensor variables:
std::string X1("X1"), X2("X2"), X3("X3"), X4("X4");
// graph structure:
AddExternalOutputsNode(X1, X2); // external-outputs operator; X1: input; X2: temporary
AddNormalNode(X2, X3); // normal operator; X3: temporary
AddNormalNode(X3, X4); // normal operator; X4: output
// simulate shape-inference results:
Shape shape1{"M", "N"};
auto shape = &shape1.value;
SetShape({{X1, shape}, {X2, shape}, {X3, shape}, {X4, shape}});
CreatePlan();
// check allocation kind:
CheckAllocKind(X1, AllocKind::kPreExisting);
CheckAllocKind(X2, AllocKind::kPreExisting);
CheckAllocKind(X3, AllocKind::kAllocate);
CheckAllocKind(X4, AllocKind::kAllocateOutput);
// check each ml-value is freed at appropriate step
// X2 will not be reused and will not be freed. X3 will be allocated and will be freed.
CheckFreed(0, {});
CheckFreed(1, {});
CheckFreed(2, {X3});
}
// InPlaceSizeMismatchTest: Check that Inplace reuse is not allowed when sizes don't match.
// Also tests reuse of disjoint lifetime tensors.
TEST_F(PlannerTest, InPlaceSizeMismatchTest) {