Fix bug in allocation planner while planning location for initializers (#9306)

This commit is contained in:
Hariharan Seshadri 2021-10-07 19:05:07 -07:00 committed by GitHub
parent 68601fc296
commit 5f5f28bf14
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -698,6 +698,16 @@ class PlannerImpl {
}
auto wt_index = Index(def_name);
// TODO: Identify error cases where-in an initializer is used on different
// devices within the same graph level.
// If we ever encounter that, it means that there is a severe bug in Memcpy
// transformer and the model will crash while running. The Memcpy transformer
// is supposed to duplicate initializers being used on different devices within
// the same graph level and hence we should never see an initializer being used
// on different devices here.
// The same initializer being used on different devices across graph levels
// (subgraphs) is okay and utils::CopyInputsAcrossDevices() will take it to
// the right device before subgraph execution.
locations[wt_index].emplace_back(
GetLocationForNodeInput(node_input_index, node, kernel_create_info_map));
}
@ -730,6 +740,20 @@ class PlannerImpl {
}
Status GeneratePlanForWeights() {
// TODO: Move away from usage of vector of `OrtMemoryInfo`s per weight (initializer)
// We do not need to maintain a vector of locations that a weight is used in.
// We only need to know the location of its first usage because:
// (1) If the initializer is used in the graph level it is introduced in, then it can
// only be used on one device as the Memcpy transformer will duplicate the initializer
// (with a different name) in case it is used on multiple devices.
// If the initializer is also additionally used in one of the subgraphs, we rely
// on the utils::CopyInputsAcrossDevices() to copy it over to the appropriate device
// before the subgraphs are executed.
// (2) If the initializer is NOT used in the level it is introduced in and only used
// in subgraphs, even then knowing its first usage location is enough as it can't be
// used on different devices within the same graph level (see (1) for reason), and for
// nested subgraphs, we can rely on the utils::CopyInputsAcrossDevices() to copy it
// over to the appropriate device before the subgraphs are executed.
std::vector<std::vector<OrtMemoryInfo>> locations(plan_.allocation_plan.size());
GeneratePlanForWeightsHelper(graph_viewer_, graph_viewer_.GetAllInitializedTensors(),
@ -739,6 +763,7 @@ class PlannerImpl {
const std::vector<OrtMemoryInfo>& loc = locations[i];
if (loc.empty()) continue;
plan_.allocation_plan[i].alloc_kind = AllocKind::kAllocateStatically;
// The planned location for an initializer is the location of its first usage.
plan_.allocation_plan[i].location = loc[0];
#if !defined(ORT_MINIMAL_BUILD) && defined(ORT_MEMORY_PROFILE)
size_t max_pc = plan_.execution_plan.size();
@ -748,13 +773,6 @@ class PlannerImpl {
plan_.allocation_plan[i].value_type = utils::GetMLDataType(*node_arg);
plan_.allocation_plan[i].life_interval = std::pair<size_t, size_t>(0, max_pc);
#endif
for (size_t j = 0; j != loc.size(); ++j) {
if (loc[j] != loc[0]) {
// set the location to CPU
plan_.allocation_plan[i].location = execution_providers_.GetDefaultCpuMemoryInfo();
break;
}
}
}
return Status::OK();
}