Some improvements on transformers tool (#5383)

* modify tensoflow benchmark gpu setting

* add export from tf choice in script

* fix typo

* match more embedlayernorm pattern

* format
This commit is contained in:
Ye Wang 2020-10-08 19:35:17 -07:00 committed by GitHub
parent fab7f799a7
commit 90f976d060
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 11 deletions

View file

@ -240,6 +240,31 @@ def run_pytorch(use_gpu, model_names, model_class, precision, num_threads, batch
return results
def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool):
import tensorflow as tf
from functools import wraps
def run_func(func):
@wraps(func)
def run_in_eager_mode(*args, **kwargs):
return func(*args, **kwargs)
@wraps(func)
@tf.function(experimental_compile=use_xla)
def run_in_graph_mode(*args, **kwargs):
return func(*args, **kwargs)
if do_eager_mode is True:
assert (
use_xla is False
), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
return run_in_eager_mode
else:
return run_in_graph_mode
return run_func
def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
repeat_times, cache_dir, verbose):
results = []
@ -258,6 +283,8 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, ba
physical_devices = tf.config.list_physical_devices('GPU')
try:
tf.config.set_visible_devices(physical_devices[0], 'GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.distribute.OneDeviceStrategy(device='/gpu:0')
except RuntimeError as e:
logger.exception(e)
@ -295,10 +322,12 @@ def run_tensorflow(use_gpu, model_names, model_class, precision, num_threads, ba
input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32)
try:
# Disable both for better inference perf
@run_with_tf_optimizations(do_eager_mode=False, use_xla=False)
def encoder_forward():
return model(input_ids, training=False)
@run_with_tf_optimizations(do_eager_mode=False, use_xla=False)
def encoder_decoder_forward():
return model(input_ids, decoder_input_ids=input_ids, training=False)

View file

@ -130,13 +130,13 @@ class FusionEmbedLayerNoMask(Fusion):
input_ids = word_embedding_gather.input[1]
position_embedding_expand = None
position_embedding_node_before_gather = None
position_embedding_shape = None
position_embedding_path = self.model.match_parent_path(normalize_node, ['Gather', 'Expand'],
[1, 1]) # for distill-bert
if position_embedding_path is not None:
position_embedding_weight_node, position_embedding_expand = position_embedding_path
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
else:
position_embedding_path = self.model.match_parent_path(normalize_node, ['Reshape', 'Slice'], [1, 0])
if position_embedding_path is not None:
@ -145,28 +145,33 @@ class FusionEmbedLayerNoMask(Fusion):
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Expand', 'Shape'],
[1, 1, 1])
if position_embedding_path is not None:
position_embedding_weight_node, position_embedding_expand, position_embedding_shape = position_embedding_path
position_embedding_weight_node, position_embedding_node_before_gather, position_embedding_shape = position_embedding_path
else:
position_embedding_path = self.model.match_parent_path(
add_node, ['Gather', 'Expand', 'Concat', 'Unsqueeze', 'Gather', 'Shape'], [1, 1, 1, 1, 0, 0])
if position_embedding_path is not None:
position_embedding_weight_node, position_embedding_expand, _, _, _, position_embedding_shape = position_embedding_path
position_embedding_weight_node, position_embedding_node_before_gather, _, _, _, position_embedding_shape = position_embedding_path
else:
# Here we will not try to get exact match. Instead, we only try identify position embedding weights.
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Expand'], [1, 1])
if position_embedding_path is not None:
position_embedding_weight_node, position_embedding_expand = position_embedding_path
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
else:
logger.info("Position embedding path is not found. Embed layer cannot be fused.")
return
position_embedding_path = self.model.match_parent_path(add_node, ['Gather', 'Slice'],
[1, 1])
if position_embedding_path is not None:
position_embedding_weight_node, position_embedding_node_before_gather = position_embedding_path
else:
logger.info("Position embedding path is not found. Embed layer cannot be fused.")
return
if position_embedding_shape is not None and position_embedding_shape.input[0] != input_ids:
logger.info("position and word embedding is expected to be applied on same input")
return
if position_embedding_expand and position_embedding_shape:
if position_embedding_node_before_gather and position_embedding_shape:
input_parent = self.model.get_parent(position_embedding_shape, 0, output_name_to_node)
subgraph_nodes = self.model.get_parent_subgraph_nodes(position_embedding_expand,
subgraph_nodes = self.model.get_parent_subgraph_nodes(position_embedding_node_before_gather,
[input_parent] if input_parent else [],
output_name_to_node)
self.nodes_to_remove.extend(subgraph_nodes)

View file

@ -261,7 +261,11 @@ def load_pt_model(model_name, model_class, cache_dir):
def load_tf_model(model_name, model_class, cache_dir):
config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir)
model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class, is_tf_model=True)
model = load_pretrained_model(model_name,
config=config,
cache_dir=cache_dir,
custom_model_class=model_class,
is_tf_model=True)
return config, model
@ -366,6 +370,9 @@ def export_onnx_model_from_pt(model_name, opset_version, use_external_data_forma
def export_onnx_model_from_tf(model_name, opset_version, use_external_data_format, model_type, model_class, cache_dir,
onnx_dir, input_names, use_gpu, precision, optimize_onnx, validate_onnx,
use_raw_attention_mask, overwrite, model_fusion_statistics):
# Use CPU to export
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
config, model = load_tf_model(model_name, model_class, cache_dir)

View file

@ -22,6 +22,9 @@ run_torch=false
run_torchscript=true
run_tensorflow=false
# Onnx model source (default is from pytorch, set export_onnx_from_tf=true to convert from tensorflow model)
export_onnx_from_tf=false
# Devices to test (You can run either CPU or GPU, but not both: gpu need onnxruntime-gpu, and CPU need onnxruntime).
run_gpu_fp32=true
run_gpu_fp16=true
@ -100,6 +103,11 @@ fi
onnx_export_options="-i $input_counts -v -b 0 --overwrite -f fusion.csv -c $cache_dir --onnx_dir $onnx_dir"
benchmark_options="-b $batch_sizes -s $sequence_lengths -t $average_over -f fusion.csv -r result.csv -d detail.csv -c $cache_dir --onnx_dir $onnx_dir"
if [ "$export_onnx_from_tf" = true ] ; then
onnx_export_options="$onnx_export_options --model_source tf"
benchmark_options="$benchmark_options --model_source tf"
fi
if [ "$use_optimizer" = true ] ; then
onnx_export_options="$onnx_export_options -o"
benchmark_options="$benchmark_options -o"