update transformers required package versions (#6315)

This commit is contained in:
Tianlei Wu 2021-01-12 00:10:56 -08:00 committed by GitHub
parent c43ca45c4f
commit a038924bee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 66 additions and 69 deletions

View file

@ -53,10 +53,6 @@ def create_onnxruntime_session(onnx_model_path,
if num_threads > 0:
sess_options.intra_op_num_threads = num_threads
logger.debug(f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}")
elif (not use_gpu) and (version.parse(onnxruntime_version) < version.parse('1.3.0')):
# Set intra_op_num_threads = 1 to enable OpenMP for onnxruntime 1.2.0 (cpu)
# onnxruntime-gpu is not built with openmp so it is better to use default (0) or cpu_count instead.
sess_options.intra_op_num_threads = 1
if verbose:
sess_options.log_severity_level = 0
@ -98,9 +94,10 @@ def prepare_environment(cache_dir, output_dir, use_gpu):
logger.info(f'Transformers Version:{transformers.__version__}')
logger.info(f'Onnxruntime Version:{onnxruntime.__version__}')
# Support three major versions of PyTorch and OnnxRuntime, and up to 6 months of transformers.
from packaging import version
assert version.parse(torch.__version__) >= version.parse('1.4.0')
assert version.parse(transformers.__version__) >= version.parse('2.11.0')
assert version.parse(torch.__version__) >= version.parse('1.5.0')
assert version.parse(transformers.__version__) >= version.parse('3.0.0')
assert version.parse(onnxruntime.__version__) >= version.parse('1.4.0')
@ -223,8 +220,8 @@ def inference_ort_with_io_binding(ort_session,
allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device)
for i in range(len(ort_output_names)):
io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32, ort_outputs[i].shape,
output_buffers[i].data_ptr())
io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32,
ort_outputs[i].shape, output_buffers[i].data_ptr())
runtimes = timeit.repeat(lambda: ort_session.run_with_iobinding(io_binding), number=1, repeat=repeat_times)
result.update(result_template)
result.update({"io_binding": True})

View file

@ -132,7 +132,6 @@ class BertOnnxModel(OnnxModel):
new_graph_inputs = []
casted_bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(casted=True)
for input in graph.input:
if input.name in casted_bert_graph_inputs:
@ -178,13 +177,13 @@ class BertOnnxModel(OnnxModel):
def preprocess(self):
self.adjust_reshape_and_expand()
return
def adjust_reshape_and_expand(self):
nodes_to_remove = []
for node in self.nodes():
if node.op_type == 'Reshape':
if node.op_type == 'Reshape':
# Clean up unneccessary reshape nodes.
# Find reshape nodes with no actually data in "shape" attribute and remove.
# Find reshape nodes with no actually data in "shape" attribute and remove.
reshape_shape = self.get_constant_value(node.input[1])
if reshape_shape is not None and reshape_shape.size == 0:
nodes_to_remove.extend([node])
@ -192,9 +191,9 @@ class BertOnnxModel(OnnxModel):
continue
# Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
# changing current reshape's input to output of slice.
# changing current reshape's input to output of slice.
reshape_path = self.match_parent_path(node, ['Expand', 'Expand', 'Reshape', 'Slice'], [0, 0, 0, 0],
self.output_name_to_node())
self.output_name_to_node())
if reshape_path is not None:
expand_node = reshape_path[-3]
expand_shape_value = self.get_constant_value(expand_node.input[1])
@ -203,8 +202,9 @@ class BertOnnxModel(OnnxModel):
shape_value = self.get_constant_value(reshape_before_expand.input[1])
slice_node = reshape_path[-1]
if expand_shape_value is not None and shape_value is not None and len(expand_shape_value) is 2 and len(
shape_value) is 1 and expand_shape_value[1] == shape_value[0]:
if expand_shape_value is not None and shape_value is not None and len(
expand_shape_value) is 2 and len(
shape_value) is 1 and expand_shape_value[1] == shape_value[0]:
node.input[0] = slice_node.output[0]
self.remove_nodes(nodes_to_remove)
logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
@ -340,6 +340,6 @@ class BertOnnxModel(OnnxModel):
logger.debug("Embed Layer not fused")
if attention == 0:
logger.debug("Attention not fused")
logger.warning("Attention not fused")
return is_perfect

View file

@ -214,7 +214,10 @@ def _parse_arguments():
default=0,
help="onnxruntime optimization level. 0 will disable onnxruntime.")
parser.add_argument('--use_external_data_format', required=False, action='store_true', help="use external data format")
parser.add_argument('--use_external_data_format',
required=False,
action='store_true',
help="use external data format")
parser.set_defaults(use_external_data_format=False)
args = parser.parse_args()
@ -306,8 +309,9 @@ def optimize_model(input,
os.remove(temp_model_path)
logger.debug("Remove tempoary model: {}".format(temp_model_path))
optimizer.model.producer_name = "onnxruntime_tools"
optimizer.model.producer_version = "1.5.2"
optimizer.model.producer_name = "onnxruntime.transformers"
from onnxruntime import __version__ as onnxruntime_version
optimizer.model.producer_version = onnxruntime_version
return optimizer
@ -325,9 +329,7 @@ def main():
_setup_logger(args.verbose)
if os.path.realpath(args.input) == os.path.realpath(args.output):
logger.warning(
f"Specified the same input and output path. Note that this may overwrite the original model"
)
logger.warning(f"Specified the same input and output path. Note that this may overwrite the original model")
optimization_options = _get_optimization_options(args)

View file

@ -75,7 +75,7 @@ def parse_arguments(argv=None):
required=False,
default='default',
choices=['bert', 'gpt2', 'longformer', 'default'],
help="Way to create dummy inputs. If your model is not aa")
help="Type of dummy inputs. The default will create inputs with ones.")
parser.add_argument('-g', '--use_gpu', required=False, action='store_true', help="use GPU")
parser.set_defaults(use_gpu=False)
@ -96,8 +96,7 @@ def parse_arguments(argv=None):
parser.add_argument('-v', '--verbose', required=False, action='store_true')
parser.set_defaults(verbose=False)
args = parser.parse_args(argv)
return args
return parser.parse_args(argv)
def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_name, segment_ids_name, input_mask_name):
@ -116,7 +115,7 @@ def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_na
return all_inputs
def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_size, sequence_length, all_inputs):
def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, all_inputs):
from benchmark_helper import create_onnxruntime_session
session = create_onnxruntime_session(onnx_model_path,
@ -135,8 +134,8 @@ def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_
def load_profile_json(profile_file):
print(f"loading profile output {profile_file} ...")
with open(profile_file, "r") as f:
sess_time = json.load(f)
with open(profile_file, "r") as opened_file:
sess_time = json.load(opened_file)
assert isinstance(sess_time, list)
return sess_time
@ -167,18 +166,18 @@ def parse_profile_results(sess_time, kernel_time_only=False, threshold=0):
node_time[item["name"]] = item["dur"]
total += item["dur"]
results = []
lines = []
if (threshold > 0):
results.append(f"Threshold of Percentage > {threshold:.2f}%")
lines.append(f"Threshold of Percentage > {threshold:.2f}%")
results.append(f"Duration\tPercentage\tProvider\tName")
lines.append("Duration\tPercentage\tProvider\tName")
for k, v in sorted(node_time.items(), key=lambda x: x[1], reverse=True):
provider = node_provider[k] if k in node_provider else ""
ratio = v / total
if ratio > threshold:
results.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}")
lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}")
return results
return lines
def group_profile_results(sess_time, kernel_time_only=False, threshold=0):
@ -215,15 +214,15 @@ def group_profile_results(sess_time, kernel_time_only=False, threshold=0):
op_cpu_time[op_name] = item["dur"]
op_cpu_records[op_name] = 1
results = [f"Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"]
lines = ["Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"]
for k, v in sorted(op_time.items(), key=lambda x: x[1], reverse=True):
calls = op_records[k]
cpu_time = op_cpu_time[k] if k in op_cpu_time else 0
cpu_calls = op_cpu_records[k] if k in op_cpu_records else 0
ratio = v / total
if ratio > threshold:
results.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}")
return results
lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}")
return lines
def get_dim_from_type_proto(dim):
@ -240,11 +239,11 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples):
onnx_model = OnnxModel(onnx.load(onnx_model_path))
dummy_inputs = {}
for input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(input.type)
for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(graph_input.type)
symbol_dims = []
for i, dim in enumerate(shape):
if type(dim) == str:
if isinstance(dim, str):
symbol_dims.append(i)
# allowed symbolic dimensions: batch_size and sequence_length
@ -255,12 +254,12 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples):
if len(symbol_dims) > 1:
shape[symbol_dims[1]] = sequence_length
elem_type = input.type.tensor_type.elem_type
elem_type = graph_input.type.tensor_type.elem_type
assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)
data = numpy.ones(shape, dtype=data_type)
dummy_inputs[input.name] = data
dummy_inputs[graph_input.name] = data
all_inputs = [dummy_inputs for _ in range(samples)]
return all_inputs
@ -280,20 +279,20 @@ def create_gpt2_inputs(onnx_model_path, batch_size, sequence_length, past_sequen
}
dummy_inputs = {}
for input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(input.type)
for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(graph_input.type)
for i, dim in enumerate(shape):
if type(dim) == str and dim not in symbols.keys():
if isinstance(dim, str) and dim not in symbols.keys():
raise RuntimeError(f"symbol is not supported: {dim}")
else:
shape[i] = symbols[dim]
elem_type = input.type.tensor_type.elem_type
elem_type = graph_input.type.tensor_type.elem_type
assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)
data = numpy.ones(shape, dtype=data_type)
dummy_inputs[input.name] = data
dummy_inputs[graph_input.name] = data
all_inputs = [dummy_inputs for _ in range(samples)]
return all_inputs
@ -307,25 +306,25 @@ def create_longformer_inputs(onnx_model_path, batch_size, sequence_length, globa
symbols = {'batch_size': batch_size, 'sequence_length': sequence_length}
dummy_inputs = {}
for input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(input.type)
for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
shape = get_shape_from_type_proto(graph_input.type)
for i, dim in enumerate(shape):
if type(dim) == str and dim not in symbols.keys():
if isinstance(dim, str) and dim not in symbols.keys():
raise RuntimeError(f"symbol is not supported: {dim}")
else:
shape[i] = symbols[dim]
elem_type = input.type.tensor_type.elem_type
elem_type = graph_input.type.tensor_type.elem_type
assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)
if "global" in input.name:
if "global" in graph_input.name:
data = numpy.zeros(shape, dtype=data_type)
data[:, :global_length] = 1
else:
data = numpy.ones(shape, dtype=data_type)
dummy_inputs[input.name] = data
dummy_inputs[graph_input.name] = data
all_inputs = [dummy_inputs for _ in range(samples)]
return all_inputs
@ -351,8 +350,7 @@ def run(args):
else: # default
all_inputs = create_dummy_inputs(args.model, args.batch_size, args.sequence_length, args.samples)
profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, args.batch_size,
args.sequence_length, all_inputs)
profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, all_inputs)
profile_records = load_profile_json(profile_file)
@ -365,13 +363,13 @@ def run(args):
if __name__ == '__main__':
args = parse_arguments()
print("Arguments", args)
arguments = parse_arguments()
print("Arguments", arguments)
from benchmark_helper import setup_logger
setup_logger(args.verbose)
setup_logger(arguments.verbose)
results = run(args)
results = run(arguments)
print("Results:")
print("-" * 64)

View file

@ -5,10 +5,10 @@ psutil
py-cpuinfo
py3nvml
packaging
# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models.
# The change does not exist in 2.9.0 so we install from source instead.
git+https://github.com/huggingface/transformers.git
transformers
onnxruntime
onnxconverter_common
--find-links https://download.pytorch.org/whl/torch_stable.html
torch==1.5.0+cpu
torchvision==0.6.0+cpu
torch==1.7.1+cpu
torchvision==0.8.2+cpu
torchaudio===0.7.2

View file

@ -5,10 +5,10 @@ psutil
py-cpuinfo
py3nvml
packaging
# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models.
# The change does not exist in 2.9.0 so we install from source instead.
git+https://github.com/huggingface/transformers.git
transformers
onnxruntime-gpu
onnxconverter_common
--find-links https://download.pytorch.org/whl/torch_stable.html
torch==1.5.0+cu101
torchvision==0.6.0+cu101
torch===1.7.1
torchvision===0.8.2
torchaudio===0.7.2