From 81005e2c92e4d629d5ea3e12bd2b23468c23845f Mon Sep 17 00:00:00 2001 From: pengwa Date: Tue, 9 Apr 2024 12:04:36 +0800 Subject: [PATCH] Optimize constant sharing perf (#20143) ### Optimize constant sharing perf by avoiding [renaming for the first name we detect a constant pattern. Currently every time we start run ConstantSharing, for each initializer, we find its pattern does not exist, then we create a new NodeArg with a unique name. Then later if other initializer share the same pattern, they will be replaced by the NodeArg. The problem is: once there is no real constant sharing cases, we still modify the graph for each initializer. This is not needed. ### Motivation and Context --- .../core/optimizer/constant_sharing.cc | 22 ++++++------------- onnxruntime/core/optimizer/constant_sharing.h | 15 ++++++++++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/onnxruntime/core/optimizer/constant_sharing.cc b/onnxruntime/core/optimizer/constant_sharing.cc index a3c5a72ee7..e2a5732d59 100644 --- a/onnxruntime/core/optimizer/constant_sharing.cc +++ b/onnxruntime/core/optimizer/constant_sharing.cc @@ -32,10 +32,9 @@ using SupportedTypeList = boost::mp11::mp_list(const_node->InputDefs().size()); ++i) { if (const_node->InputDefs()[i] == origin_initializer_node_arg) { consumer_node_to_input_ports_map[const_node].push_back(i); @@ -233,24 +232,17 @@ Status ConstantSharing::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve size_t value_id = GetOrAddValueInConstantStore(std::move(init_value), const_value_store, data_store_key); // Construct a string by data type, value, and rank. Used as a key in pattern_key_to_shared_arg_map. - const std::string pattern_key = MakeString(SHARED_INITIALIZER_PREFIX, data_store_key, "_", value_id); + const std::string pattern_key = MakeString(data_store_key, "_", value_id); // If there is no such existing scalar pattern, add a new one. if (pattern_key_to_shared_arg_map.find(pattern_key) == pattern_key_to_shared_arg_map.end()) { - // Do a copy and rename the TensorProto. - ONNX_NAMESPACE::TensorProto constant_tensor_proto_as_replacement(*tensor_proto); - constant_tensor_proto_as_replacement.set_name(graph.GenerateNodeArgName(pattern_key)); - NodeArg& shared_scalar_initializer_node_arg = graph_utils::AddInitializer(graph, - constant_tensor_proto_as_replacement); - pattern_key_to_shared_arg_map[pattern_key] = &shared_scalar_initializer_node_arg; + pattern_key_to_shared_arg_map[pattern_key] = origin_initializer_node_arg; } else { shared_count += 1; + ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg, + pattern_key_to_shared_arg_map[pattern_key]); + modified = true; } - - ReplaceInputsToUseSharedInitializer(graph, consumer_node_to_input_ports_map, origin_initializer_node_arg, - pattern_key_to_shared_arg_map[pattern_key]); - - modified = true; } if (shared_count > 0) { LOGS(logger, INFO) << "Total shared scalar initializer count: " << shared_count; diff --git a/onnxruntime/core/optimizer/constant_sharing.h b/onnxruntime/core/optimizer/constant_sharing.h index 3d0cb875da..cfe252b3ed 100644 --- a/onnxruntime/core/optimizer/constant_sharing.h +++ b/onnxruntime/core/optimizer/constant_sharing.h @@ -14,13 +14,13 @@ namespace onnxruntime { @class ConstantSharing Transformer that traverses the graph top-down and performs constant sharing, i.e., -constant initializers having same dtype, value and shape, will be replaced by one single (newly created) initializer. -Currently, only scalar valued initializers are handled. +constant initializers having same data type, value and shape, will be replaced by one single initializer. +Currently, only scalar-valued initializers are handled. */ class ConstantSharing : public GraphTransformer { public: /** - * @param compatible_execution_providers comptatible execution provider list for considered nodes. + * @param compatible_execution_providers compatible execution provider list for considered nodes. * @param excluded_initializers explicitly excluded initializer names that should not changed. */ ConstantSharing(const InlinedHashSet& compatible_execution_providers = {}, @@ -29,6 +29,15 @@ class ConstantSharing : public GraphTransformer { excluded_initializers_(excluded_initializers) { } + bool ShouldOnlyApplyOnce() const override { +#if defined(ENABLE_TRAINING) + return false; +#else + // Reduce model processing time by applying this optimization only once for inference. + return true; +#endif + } + static constexpr int64_t TENSOR_ELEM_COUNT_THRESHOLD = 8; private: