mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Fix bug in allocation planner while planning location for initializers (#9306)
This commit is contained in:
parent
68601fc296
commit
5f5f28bf14
1 changed files with 25 additions and 7 deletions
|
|
@ -698,6 +698,16 @@ class PlannerImpl {
|
|||
}
|
||||
|
||||
auto wt_index = Index(def_name);
|
||||
// TODO: Identify error cases where-in an initializer is used on different
|
||||
// devices within the same graph level.
|
||||
// If we ever encounter that, it means that there is a severe bug in Memcpy
|
||||
// transformer and the model will crash while running. The Memcpy transformer
|
||||
// is supposed to duplicate initializers being used on different devices within
|
||||
// the same graph level and hence we should never see an initializer being used
|
||||
// on different devices here.
|
||||
// The same initializer being used on different devices across graph levels
|
||||
// (subgraphs) is okay and utils::CopyInputsAcrossDevices() will take it to
|
||||
// the right device before subgraph execution.
|
||||
locations[wt_index].emplace_back(
|
||||
GetLocationForNodeInput(node_input_index, node, kernel_create_info_map));
|
||||
}
|
||||
|
|
@ -730,6 +740,20 @@ class PlannerImpl {
|
|||
}
|
||||
|
||||
Status GeneratePlanForWeights() {
|
||||
// TODO: Move away from usage of vector of `OrtMemoryInfo`s per weight (initializer)
|
||||
// We do not need to maintain a vector of locations that a weight is used in.
|
||||
// We only need to know the location of its first usage because:
|
||||
// (1) If the initializer is used in the graph level it is introduced in, then it can
|
||||
// only be used on one device as the Memcpy transformer will duplicate the initializer
|
||||
// (with a different name) in case it is used on multiple devices.
|
||||
// If the initializer is also additionally used in one of the subgraphs, we rely
|
||||
// on the utils::CopyInputsAcrossDevices() to copy it over to the appropriate device
|
||||
// before the subgraphs are executed.
|
||||
// (2) If the initializer is NOT used in the level it is introduced in and only used
|
||||
// in subgraphs, even then knowing its first usage location is enough as it can't be
|
||||
// used on different devices within the same graph level (see (1) for reason), and for
|
||||
// nested subgraphs, we can rely on the utils::CopyInputsAcrossDevices() to copy it
|
||||
// over to the appropriate device before the subgraphs are executed.
|
||||
std::vector<std::vector<OrtMemoryInfo>> locations(plan_.allocation_plan.size());
|
||||
|
||||
GeneratePlanForWeightsHelper(graph_viewer_, graph_viewer_.GetAllInitializedTensors(),
|
||||
|
|
@ -739,6 +763,7 @@ class PlannerImpl {
|
|||
const std::vector<OrtMemoryInfo>& loc = locations[i];
|
||||
if (loc.empty()) continue;
|
||||
plan_.allocation_plan[i].alloc_kind = AllocKind::kAllocateStatically;
|
||||
// The planned location for an initializer is the location of its first usage.
|
||||
plan_.allocation_plan[i].location = loc[0];
|
||||
#if !defined(ORT_MINIMAL_BUILD) && defined(ORT_MEMORY_PROFILE)
|
||||
size_t max_pc = plan_.execution_plan.size();
|
||||
|
|
@ -748,13 +773,6 @@ class PlannerImpl {
|
|||
plan_.allocation_plan[i].value_type = utils::GetMLDataType(*node_arg);
|
||||
plan_.allocation_plan[i].life_interval = std::pair<size_t, size_t>(0, max_pc);
|
||||
#endif
|
||||
for (size_t j = 0; j != loc.size(); ++j) {
|
||||
if (loc[j] != loc[0]) {
|
||||
// set the location to CPU
|
||||
plan_.allocation_plan[i].location = execution_providers_.GetDefaultCpuMemoryInfo();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue