mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Speed Up Whisper Export (#16504)
### Description Add a greedy option to the initializer deduplication process in the Whisper export. Currently to detect shared initializers, ORT compares every initializer against every other initializer (n^2). In the comparison operator, if the two initializers have different data types (e.g. raw_data and int_64), both initializers are converted to a numpy array and the cast result is compared. This cast happens in every comparison, and exponentially affects the runtime of finding shared initializers. This cast operation is the bottleneck for the current Whisper export script. The conversion to the numpy array is useful for detecting equal initializer values across nodes of different data types (e.g. recognizing a bias value of 0.0 is the same as a slice index of 0) but isn't triggered when comparing initializers of the same data type (e.g. weight value of 0.6 == weight value of 0.6). The latter case is where the majority of utility is for Whisper, and so by eliminating our path for comparing numpy arrays for initializers we save a lot of time for minimal cost. In other words, this PR adds an option to remove the ability to detect shared initializers of different types (e.g. Slice Index and MatMul Constant) while retaining the ability to deduplicate weights. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - Current time to export Whisper-large is prohibitive. --------- Co-authored-by: Peter McAughan <petermca@microsoft.com>
This commit is contained in:
parent
708dec5d95
commit
47f136e2d3
3 changed files with 19 additions and 9 deletions
|
|
@ -889,6 +889,7 @@ def remove_shared_initializers(
|
|||
graph2: GraphProto,
|
||||
shared_prefix: str = "shared_",
|
||||
min_elements: int = 1024,
|
||||
require_raw_data: bool = False,
|
||||
):
|
||||
"""Remove initializers with same value from two graphs.
|
||||
|
||||
|
|
@ -897,6 +898,7 @@ def remove_shared_initializers(
|
|||
graph2 (GraphProto): the second graph to process
|
||||
shared_prefix (str): add prefix to the shared initializers among two graphs
|
||||
min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.
|
||||
require_raw_data (bool, optional): Only remove tensors with raw_data field to speed up method
|
||||
"""
|
||||
|
||||
mapping_initializers_1 = {}
|
||||
|
|
@ -913,7 +915,7 @@ def remove_shared_initializers(
|
|||
if not (initializer2.dims and sum(initializer2.dims) >= min_elements):
|
||||
continue
|
||||
|
||||
if OnnxModel.has_same_value(initializer1, initializer2):
|
||||
if OnnxModel.has_same_value(initializer1, initializer2, require_raw_data=True):
|
||||
mapping_initializers_1[initializer1.name] = shared_prefix + initializer2.name
|
||||
shared_initializers_1.append(initializer1)
|
||||
|
||||
|
|
@ -986,14 +988,14 @@ def remove_shared_initializers(
|
|||
return shared_initializers_2
|
||||
|
||||
|
||||
def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto):
|
||||
def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto, require_raw_data: bool = False):
|
||||
encoder = OnnxModel(encoder_model)
|
||||
decoder = OnnxModel(decoder_model)
|
||||
encoder.add_prefix_to_names("e_")
|
||||
decoder.add_prefix_to_names("d_")
|
||||
encoder.remove_duplicated_initializer()
|
||||
decoder.remove_duplicated_initializer()
|
||||
initializers = remove_shared_initializers(encoder.model.graph, decoder.model.graph, "s_")
|
||||
encoder.remove_duplicated_initializer(require_raw_data)
|
||||
decoder.remove_duplicated_initializer(require_raw_data)
|
||||
initializers = remove_shared_initializers(decoder.model.graph, encoder.model.graph, "s_", require_raw_data)
|
||||
return initializers
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ def chain_model(args):
|
|||
|
||||
# Initializers/opsets
|
||||
# Delete shared data between decoder/encoder and move to larger graph initializers
|
||||
initializers = get_shared_initializers(encoder_model, decoder_model)
|
||||
initializers = get_shared_initializers(encoder_model, decoder_model, require_raw_data=True)
|
||||
node.attribute.extend(
|
||||
[
|
||||
helper.make_attribute("decoder", decoder_model.graph),
|
||||
|
|
|
|||
|
|
@ -1092,13 +1092,15 @@ class OnnxModel:
|
|||
return op_count
|
||||
|
||||
@staticmethod
|
||||
def has_same_value(tensor1: TensorProto, tensor2: TensorProto) -> bool:
|
||||
def has_same_value(tensor1: TensorProto, tensor2: TensorProto, require_raw_data: bool = False) -> bool:
|
||||
"""Returns True when two tensors have same value.
|
||||
Note that name can be different.
|
||||
|
||||
Args:
|
||||
tensor1 (TensorProto): initializer 1
|
||||
tensor2 (TensorProto): initializer 2
|
||||
require_raw_data (bool): ignore tensors without raw_data
|
||||
Note: Flag can speed up runtime significantly
|
||||
|
||||
Returns:
|
||||
bool: True when two intializers has same value.
|
||||
|
|
@ -1107,11 +1109,15 @@ class OnnxModel:
|
|||
return False
|
||||
if tensor1.HasField("raw_data") and tensor2.HasField("raw_data"):
|
||||
return tensor1.raw_data == tensor2.raw_data
|
||||
if require_raw_data:
|
||||
return False
|
||||
|
||||
return (numpy_helper.to_array(tensor1) == numpy_helper.to_array(tensor2)).all()
|
||||
|
||||
def remove_duplicated_initializer(self):
|
||||
def remove_duplicated_initializer(self, require_raw_data: bool = False):
|
||||
"""Remove initializers with duplicated values, and only keep the first one.
|
||||
It could help reduce size of models (like ALBert) with shared weights.
|
||||
If require_raw_data passed, method will only compare raw_data initializers to speed runtime
|
||||
Note: this function does not process subgraph.
|
||||
"""
|
||||
if len(self.graphs()) > 1:
|
||||
|
|
@ -1124,7 +1130,9 @@ class OnnxModel:
|
|||
if same[i] >= 0:
|
||||
continue
|
||||
for j in range(i + 1, initializer_count):
|
||||
if OnnxModel.has_same_value(self.model.graph.initializer[i], self.model.graph.initializer[j]):
|
||||
if OnnxModel.has_same_value(
|
||||
self.model.graph.initializer[i], self.model.graph.initializer[j], require_raw_data
|
||||
):
|
||||
same[j] = i
|
||||
|
||||
count = 0
|
||||
|
|
|
|||
Loading…
Reference in a new issue