mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-28 22:56:32 +00:00
Optimize constant sharing perf (#20143)
### Optimize constant sharing perf by avoiding [renaming for the first name we detect a constant pattern. Currently every time we start run ConstantSharing, for each initializer, we find its pattern does not exist, then we create a new NodeArg with a unique name. Then later if other initializer share the same pattern, they will be replaced by the NodeArg. The problem is: once there is no real constant sharing cases, we still modify the graph for each initializer. This is not needed. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
This commit is contained in:
parent
07b5377f7c
commit
81005e2c92
2 changed files with 19 additions and 18 deletions
|
|
@ -32,10 +32,9 @@ using SupportedTypeList = boost::mp11::mp_list<MLFloat16, float, double, int32_t
|
|||
// A threshold is defined here to restrict the graph transformation only applied to small tensors.
|
||||
// Be note: having a bigger threshold means more overhead when we do the graph transformations.
|
||||
// `8` is chosen to cover common constant use cases in some Reshape/Gather/Concat's inputs.
|
||||
// TODO(pengwa): we can gradually increase this threshold if we see more benefits (memory saving
|
||||
// TODO(pengwa): we can gradually increase this threshold if we see more benefits (memory-saving
|
||||
// or more CSE optimizations triggered). Should be careful to cover test cases that assume initializer
|
||||
// name did not change after transformation then.
|
||||
static constexpr char SHARED_INITIALIZER_PREFIX[] = "ortshared_";
|
||||
|
||||
bool IsAllowedToShare(const ONNX_NAMESPACE::TensorShapeProto* input_shape,
|
||||
int64_t& num_elements) {
|
||||
|
|
@ -78,7 +77,7 @@ bool PrepareInputPortsToReplace(Graph& graph, const NodeArg* origin_initializer_
|
|||
}
|
||||
|
||||
// Iterate all input defs to replace those that are equal to origin_initializer_node_arg,
|
||||
// Then it would be safe to remove the consumer node afterwards.
|
||||
// Then it would be safe to remove the consumer node afterward.
|
||||
for (int i = 0; i < static_cast<int>(const_node->InputDefs().size()); ++i) {
|
||||
if (const_node->InputDefs()[i] == origin_initializer_node_arg) {
|
||||
consumer_node_to_input_ports_map[const_node].push_back(i);
|
||||
|
|
@ -233,24 +232,17 @@ Status ConstantSharing::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve
|
|||
size_t value_id = GetOrAddValueInConstantStore(std::move(init_value), const_value_store, data_store_key);
|
||||
|
||||
// Construct a string by data type, value, and rank. Used as a key in pattern_key_to_shared_arg_map.
|
||||
const std::string pattern_key = MakeString(SHARED_INITIALIZER_PREFIX, data_store_key, "_", value_id);
|
||||
const std::string pattern_key = MakeString(data_store_key, "_", value_id);
|
||||
|
||||
// If there is no such existing scalar pattern, add a new one.
|
||||
if (pattern_key_to_shared_arg_map.find(pattern_key) == pattern_key_to_shared_arg_map.end()) {
|
||||
// Do a copy and rename the TensorProto.
|
||||
ONNX_NAMESPACE::TensorProto constant_tensor_proto_as_replacement(*tensor_proto);
|
||||
constant_tensor_proto_as_replacement.set_name(graph.GenerateNodeArgName(pattern_key));
|
||||
NodeArg& shared_scalar_initializer_node_arg = graph_utils::AddInitializer(graph,
|
||||
constant_tensor_proto_as_replacement);
|
||||
pattern_key_to_shared_arg_map[pattern_key] = &shared_scalar_initializer_node_arg;
|
||||
pattern_key_to_shared_arg_map[pattern_key] = origin_initializer_node_arg;
|
||||
} else {
|
||||
shared_count += 1;
|
||||
ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg,
|
||||
pattern_key_to_shared_arg_map[pattern_key]);
|
||||
modified = true;
|
||||
}
|
||||
|
||||
ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg,
|
||||
pattern_key_to_shared_arg_map[pattern_key]);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
if (shared_count > 0) {
|
||||
LOGS(logger, INFO) << "Total shared scalar initializer count: " << shared_count;
|
||||
|
|
|
|||
|
|
@ -14,13 +14,13 @@ namespace onnxruntime {
|
|||
@class ConstantSharing
|
||||
|
||||
Transformer that traverses the graph top-down and performs constant sharing, i.e.,
|
||||
constant initializers having same dtype, value and shape, will be replaced by one single (newly created) initializer.
|
||||
Currently, only scalar valued initializers are handled.
|
||||
constant initializers having same data type, value and shape, will be replaced by one single initializer.
|
||||
Currently, only scalar-valued initializers are handled.
|
||||
*/
|
||||
class ConstantSharing : public GraphTransformer {
|
||||
public:
|
||||
/**
|
||||
* @param compatible_execution_providers comptatible execution provider list for considered nodes.
|
||||
* @param compatible_execution_providers compatible execution provider list for considered nodes.
|
||||
* @param excluded_initializers explicitly excluded initializer names that should not changed.
|
||||
*/
|
||||
ConstantSharing(const InlinedHashSet<std::string_view>& compatible_execution_providers = {},
|
||||
|
|
@ -29,6 +29,15 @@ class ConstantSharing : public GraphTransformer {
|
|||
excluded_initializers_(excluded_initializers) {
|
||||
}
|
||||
|
||||
bool ShouldOnlyApplyOnce() const override {
|
||||
#if defined(ENABLE_TRAINING)
|
||||
return false;
|
||||
#else
|
||||
// Reduce model processing time by applying this optimization only once for inference.
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static constexpr int64_t TENSOR_ELEM_COUNT_THRESHOLD = 8;
|
||||
|
||||
private:
|
||||
|
|
|
|||
Loading…
Reference in a new issue