mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Some improvements on transformers tool (#5383)
* modify tensoflow benchmark gpu setting * add export from tf choice in script * fix typo * match more embedlayernorm pattern * format
This commit is contained in:
parent
fab7f799a7
commit
90f976d060
4 changed files with 60 additions and 11 deletions
|
|
@ -240,6 +240,31 @@ def run_pytorch(use_gpu, model_names, model_class, precision, num_threads, batch
|
|||
return results
|
||||
|
||||
|
||||
def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool):
|
||||
import tensorflow as tf
|
||||
from functools import wraps
|
||||
|
||||
def run_func(func):
|
||||
@wraps(func)
|
||||
def run_in_eager_mode(*args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
@wraps(func)
|
||||
@tf.function(experimental_compile=use_xla)
|
||||
def run_in_graph_mode(*args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
if do_eager_mode is True:
|
||||
assert (
|
||||
use_xla is False
|
||||
), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
|
||||
return run_in_eager_mode
|
||||
else:
|
||||
return run_in_graph_mode
|
||||
|
||||
return run_func
|
||||
|
||||
|
||||
def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
|
||||
repeat_times, cache_dir, verbose):
|
||||
results = []
|
||||
|
|
@ -258,6 +283,8 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, ba
|
|||
physical_devices = tf.config.list_physical_devices('GPU')
|
||||
try:
|
||||
tf.config.set_visible_devices(physical_devices[0], 'GPU')
|
||||
tf.config.experimental.set_memory_growth(physical_devices[0], True)
|
||||
tf.distribute.OneDeviceStrategy(device='/gpu:0')
|
||||
except RuntimeError as e:
|
||||
logger.exception(e)
|
||||
|
||||
|
|
@ -295,10 +322,12 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, ba
|
|||
input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32)
|
||||
|
||||
try:
|
||||
|
||||
# Disable both for better inference perf
|
||||
@run_with_tf_optimizations(do_eager_mode=False, use_xla=False)
|
||||
def encoder_forward():
|
||||
return model(input_ids, training=False)
|
||||
|
||||
@run_with_tf_optimizations(do_eager_mode=False, use_xla=False)
|
||||
def encoder_decoder_forward():
|
||||
return model(input_ids, decoder_input_ids=input_ids, training=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -130,13 +130,13 @@ class FusionEmbedLayerNoMask(Fusion):
|
|||
|
||||
input_ids = word_embedding_gather.input[1]
|
||||
|
||||
position_embedding_expand = None
|
||||
position_embedding_node_before_gather = None
|
||||
position_embedding_shape = None
|
||||
|
||||
position_embedding_path = self.model.match_parent_path(normalize_node, ['Gather', 'Expand'],
|
||||
[1, 1]) # for distill-bert
|
||||
if position_embedding_path is not None:
|
||||
position_embedding_weight_node, position_embedding_expand = position_embedding_path
|
||||
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
|
||||
else:
|
||||
position_embedding_path = self.model.match_parent_path(normalize_node, ['Reshape', 'Slice'], [1, 0])
|
||||
if position_embedding_path is not None:
|
||||
|
|
@ -145,28 +145,33 @@ class FusionEmbedLayerNoMask(Fusion):
|
|||
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Expand', 'Shape'],
|
||||
[1, 1, 1])
|
||||
if position_embedding_path is not None:
|
||||
position_embedding_weight_node, position_embedding_expand, position_embedding_shape = position_embedding_path
|
||||
position_embedding_weight_node, position_embedding_node_before_gather, position_embedding_shape = position_embedding_path
|
||||
else:
|
||||
position_embedding_path = self.model.match_parent_path(
|
||||
add_node, ['Gather', 'Expand', 'Concat', 'Unsqueeze', 'Gather', 'Shape'], [1, 1, 1, 1, 0, 0])
|
||||
if position_embedding_path is not None:
|
||||
position_embedding_weight_node, position_embedding_expand, _, _, _, position_embedding_shape = position_embedding_path
|
||||
position_embedding_weight_node, position_embedding_node_before_gather, _, _, _, position_embedding_shape = position_embedding_path
|
||||
else:
|
||||
# Here we will not try to get exact match. Instead, we only try identify position embedding weights.
|
||||
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Expand'], [1, 1])
|
||||
if position_embedding_path is not None:
|
||||
position_embedding_weight_node, position_embedding_expand = position_embedding_path
|
||||
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
|
||||
else:
|
||||
logger.info("Position embedding path is not found. Embed layer cannot be fused.")
|
||||
return
|
||||
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Slice'],
|
||||
[1, 1])
|
||||
if position_embedding_path is not None:
|
||||
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
|
||||
else:
|
||||
logger.info("Position embedding path is not found. Embed layer cannot be fused.")
|
||||
return
|
||||
|
||||
if position_embedding_shape is not None and position_embedding_shape.input[0] != input_ids:
|
||||
logger.info("position and word embedding is expected to be applied on same input")
|
||||
return
|
||||
|
||||
if position_embedding_expand and position_embedding_shape:
|
||||
if position_embedding_node_before_gather and position_embedding_shape:
|
||||
input_parent = self.model.get_parent(position_embedding_shape, 0, output_name_to_node)
|
||||
subgraph_nodes = self.model.get_parent_subgraph_nodes(position_embedding_expand,
|
||||
subgraph_nodes = self.model.get_parent_subgraph_nodes(position_embedding_node_before_gather,
|
||||
[input_parent] if input_parent else [],
|
||||
output_name_to_node)
|
||||
self.nodes_to_remove.extend(subgraph_nodes)
|
||||
|
|
|
|||
|
|
@ -261,7 +261,11 @@ def load_pt_model(model_name, model_class, cache_dir):
|
|||
def load_tf_model(model_name, model_class, cache_dir):
|
||||
config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir)
|
||||
|
||||
model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class, is_tf_model=True)
|
||||
model = load_pretrained_model(model_name,
|
||||
config=config,
|
||||
cache_dir=cache_dir,
|
||||
custom_model_class=model_class,
|
||||
is_tf_model=True)
|
||||
|
||||
return config, model
|
||||
|
||||
|
|
@ -366,6 +370,9 @@ def export_onnx_model_from_pt(model_name, opset_version, use_external_data_forma
|
|||
def export_onnx_model_from_tf(model_name, opset_version, use_external_data_format, model_type, model_class, cache_dir,
|
||||
onnx_dir, input_names, use_gpu, precision, optimize_onnx, validate_onnx,
|
||||
use_raw_attention_mask, overwrite, model_fusion_statistics):
|
||||
# Use CPU to export
|
||||
import tensorflow as tf
|
||||
tf.config.set_visible_devices([], 'GPU')
|
||||
|
||||
config, model = load_tf_model(model_name, model_class, cache_dir)
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ run_torch=false
|
|||
run_torchscript=true
|
||||
run_tensorflow=false
|
||||
|
||||
# Onnx model source (default is from pytorch, set export_onnx_from_tf=true to convert from tensorflow model)
|
||||
export_onnx_from_tf=false
|
||||
|
||||
# Devices to test (You can run either CPU or GPU, but not both: gpu need onnxruntime-gpu, and CPU need onnxruntime).
|
||||
run_gpu_fp32=true
|
||||
run_gpu_fp16=true
|
||||
|
|
@ -100,6 +103,11 @@ fi
|
|||
onnx_export_options="-i $input_counts -v -b 0 --overwrite -f fusion.csv -c $cache_dir --onnx_dir $onnx_dir"
|
||||
benchmark_options="-b $batch_sizes -s $sequence_lengths -t $average_over -f fusion.csv -r result.csv -d detail.csv -c $cache_dir --onnx_dir $onnx_dir"
|
||||
|
||||
if [ "$export_onnx_from_tf" = true ] ; then
|
||||
onnx_export_options="$onnx_export_options --model_source tf"
|
||||
benchmark_options="$benchmark_options --model_source tf"
|
||||
fi
|
||||
|
||||
if [ "$use_optimizer" = true ] ; then
|
||||
onnx_export_options="$onnx_export_options -o"
|
||||
benchmark_options="$benchmark_options -o"
|
||||
|
|
|
|||
Loading…
Reference in a new issue