Optimize constant sharing perf (#20143)

### Optimize constant sharing perf

by avoiding [renaming for the first name we detect a constant pattern. 

Currently every time we start run ConstantSharing, for each initializer,
we find its pattern does not exist, then we create a new NodeArg with a
unique name. Then later if other initializer share the same pattern,
they will be replaced by the NodeArg.

The problem is: once there is no real constant sharing cases, we still
modify the graph for each initializer. This is not needed.

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
This commit is contained in:
pengwa 2024-04-09 12:04:36 +08:00 committed by GitHub
parent 07b5377f7c
commit 81005e2c92
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 19 additions and 18 deletions

View file

@ -32,10 +32,9 @@ using SupportedTypeList = boost::mp11::mp_list<MLFloat16, float, double, int32_t
// A threshold is defined here to restrict the graph transformation only applied to small tensors.
// Be note: having a bigger threshold means more overhead when we do the graph transformations.
// `8` is chosen to cover common constant use cases in some Reshape/Gather/Concat's inputs.
// TODO(pengwa): we can gradually increase this threshold if we see more benefits (memory saving
// TODO(pengwa): we can gradually increase this threshold if we see more benefits (memory-saving
// or more CSE optimizations triggered). Should be careful to cover test cases that assume initializer
// name did not change after transformation then.
static constexpr char SHARED_INITIALIZER_PREFIX[] = "ortshared_";
bool IsAllowedToShare(const ONNX_NAMESPACE::TensorShapeProto* input_shape,
int64_t& num_elements) {
@ -78,7 +77,7 @@ bool PrepareInputPortsToReplace(Graph& graph, const NodeArg* origin_initializer_
}
// Iterate all input defs to replace those that are equal to origin_initializer_node_arg,
// Then it would be safe to remove the consumer node afterwards.
// Then it would be safe to remove the consumer node afterward.
for (int i = 0; i < static_cast<int>(const_node->InputDefs().size()); ++i) {
if (const_node->InputDefs()[i] == origin_initializer_node_arg) {
consumer_node_to_input_ports_map[const_node].push_back(i);
@ -233,24 +232,17 @@ Status ConstantSharing::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve
size_t value_id = GetOrAddValueInConstantStore(std::move(init_value), const_value_store, data_store_key);
// Construct a string by data type, value, and rank. Used as a key in pattern_key_to_shared_arg_map.
const std::string pattern_key = MakeString(SHARED_INITIALIZER_PREFIX, data_store_key, "_", value_id);
const std::string pattern_key = MakeString(data_store_key, "_", value_id);
// If there is no such existing scalar pattern, add a new one.
if (pattern_key_to_shared_arg_map.find(pattern_key) == pattern_key_to_shared_arg_map.end()) {
// Do a copy and rename the TensorProto.
ONNX_NAMESPACE::TensorProto constant_tensor_proto_as_replacement(*tensor_proto);
constant_tensor_proto_as_replacement.set_name(graph.GenerateNodeArgName(pattern_key));
NodeArg& shared_scalar_initializer_node_arg = graph_utils::AddInitializer(graph,
constant_tensor_proto_as_replacement);
pattern_key_to_shared_arg_map[pattern_key] = &shared_scalar_initializer_node_arg;
pattern_key_to_shared_arg_map[pattern_key] = origin_initializer_node_arg;
} else {
shared_count += 1;
ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg,
pattern_key_to_shared_arg_map[pattern_key]);
modified = true;
}
ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg,
pattern_key_to_shared_arg_map[pattern_key]);
modified = true;
}
if (shared_count > 0) {
LOGS(logger, INFO) << "Total shared scalar initializer count: " << shared_count;

View file

@ -14,13 +14,13 @@ namespace onnxruntime {
@class ConstantSharing
Transformer that traverses the graph top-down and performs constant sharing, i.e.,
constant initializers having same dtype, value and shape, will be replaced by one single (newly created) initializer.
Currently, only scalar valued initializers are handled.
constant initializers having same data type, value and shape, will be replaced by one single initializer.
Currently, only scalar-valued initializers are handled.
*/
class ConstantSharing : public GraphTransformer {
public:
/**
* @param compatible_execution_providers comptatible execution provider list for considered nodes.
* @param compatible_execution_providers compatible execution provider list for considered nodes.
* @param excluded_initializers explicitly excluded initializer names that should not changed.
*/
ConstantSharing(const InlinedHashSet<std::string_view>& compatible_execution_providers = {},
@ -29,6 +29,15 @@ class ConstantSharing : public GraphTransformer {
excluded_initializers_(excluded_initializers) {
}
bool ShouldOnlyApplyOnce() const override {
#if defined(ENABLE_TRAINING)
return false;
#else
// Reduce model processing time by applying this optimization only once for inference.
return true;
#endif
}
static constexpr int64_t TENSOR_ELEM_COUNT_THRESHOLD = 8;
private: