diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py index 162713e54b..90ae35a931 100644 --- a/onnxruntime/python/tools/transformers/convert_generation.py +++ b/onnxruntime/python/tools/transformers/convert_generation.py @@ -889,6 +889,7 @@ def remove_shared_initializers( graph2: GraphProto, shared_prefix: str = "shared_", min_elements: int = 1024, + require_raw_data: bool = False, ): """Remove initializers with same value from two graphs. @@ -897,6 +898,7 @@ def remove_shared_initializers( graph2 (GraphProto): the second graph to process shared_prefix (str): add prefix to the shared initializers among two graphs min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024. + require_raw_data (bool, optional): Only remove tensors with raw_data field to speed up method """ mapping_initializers_1 = {} @@ -913,7 +915,7 @@ def remove_shared_initializers( if not (initializer2.dims and sum(initializer2.dims) >= min_elements): continue - if OnnxModel.has_same_value(initializer1, initializer2): + if OnnxModel.has_same_value(initializer1, initializer2, require_raw_data=True): mapping_initializers_1[initializer1.name] = shared_prefix + initializer2.name shared_initializers_1.append(initializer1) @@ -986,14 +988,14 @@ def remove_shared_initializers( return shared_initializers_2 -def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto): +def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto, require_raw_data: bool = False): encoder = OnnxModel(encoder_model) decoder = OnnxModel(decoder_model) encoder.add_prefix_to_names("e_") decoder.add_prefix_to_names("d_") - encoder.remove_duplicated_initializer() - decoder.remove_duplicated_initializer() - initializers = remove_shared_initializers(encoder.model.graph, decoder.model.graph, "s_") + encoder.remove_duplicated_initializer(require_raw_data) + decoder.remove_duplicated_initializer(require_raw_data) + initializers = remove_shared_initializers(decoder.model.graph, encoder.model.graph, "s_", require_raw_data) return initializers diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py index d3a47200c5..1a20cbd101 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py @@ -141,7 +141,7 @@ def chain_model(args): # Initializers/opsets # Delete shared data between decoder/encoder and move to larger graph initializers - initializers = get_shared_initializers(encoder_model, decoder_model) + initializers = get_shared_initializers(encoder_model, decoder_model, require_raw_data=True) node.attribute.extend( [ helper.make_attribute("decoder", decoder_model.graph), diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 3b1c624720..ead61df9f3 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -1092,13 +1092,15 @@ class OnnxModel: return op_count @staticmethod - def has_same_value(tensor1: TensorProto, tensor2: TensorProto) -> bool: + def has_same_value(tensor1: TensorProto, tensor2: TensorProto, require_raw_data: bool = False) -> bool: """Returns True when two tensors have same value. Note that name can be different. Args: tensor1 (TensorProto): initializer 1 tensor2 (TensorProto): initializer 2 + require_raw_data (bool): ignore tensors without raw_data + Note: Flag can speed up runtime significantly Returns: bool: True when two intializers has same value. @@ -1107,11 +1109,15 @@ class OnnxModel: return False if tensor1.HasField("raw_data") and tensor2.HasField("raw_data"): return tensor1.raw_data == tensor2.raw_data + if require_raw_data: + return False + return (numpy_helper.to_array(tensor1) == numpy_helper.to_array(tensor2)).all() - def remove_duplicated_initializer(self): + def remove_duplicated_initializer(self, require_raw_data: bool = False): """Remove initializers with duplicated values, and only keep the first one. It could help reduce size of models (like ALBert) with shared weights. + If require_raw_data passed, method will only compare raw_data initializers to speed runtime Note: this function does not process subgraph. """ if len(self.graphs()) > 1: @@ -1124,7 +1130,9 @@ class OnnxModel: if same[i] >= 0: continue for j in range(i + 1, initializer_count): - if OnnxModel.has_same_value(self.model.graph.initializer[i], self.model.graph.initializer[j]): + if OnnxModel.has_same_value( + self.model.graph.initializer[i], self.model.graph.initializer[j], require_raw_data + ): same[j] = i count = 0