From a038924bee0ddd3f258cadaf576fce6b7d6448d4 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Tue, 12 Jan 2021 00:10:56 -0800 Subject: [PATCH] update transformers required package versions (#6315) --- .../tools/transformers/benchmark_helper.py | 13 ++-- .../tools/transformers/onnx_model_bert.py | 18 ++--- .../python/tools/transformers/optimizer.py | 14 ++-- .../python/tools/transformers/profiler.py | 70 +++++++++---------- .../tools/transformers/requirements_cpu.txt | 10 +-- .../tools/transformers/requirements_gpu.txt | 10 +-- 6 files changed, 66 insertions(+), 69 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index 32a1cf0192..22a2d8fe36 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -53,10 +53,6 @@ def create_onnxruntime_session(onnx_model_path, if num_threads > 0: sess_options.intra_op_num_threads = num_threads logger.debug(f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}") - elif (not use_gpu) and (version.parse(onnxruntime_version) < version.parse('1.3.0')): - # Set intra_op_num_threads = 1 to enable OpenMP for onnxruntime 1.2.0 (cpu) - # onnxruntime-gpu is not built with openmp so it is better to use default (0) or cpu_count instead. - sess_options.intra_op_num_threads = 1 if verbose: sess_options.log_severity_level = 0 @@ -98,9 +94,10 @@ def prepare_environment(cache_dir, output_dir, use_gpu): logger.info(f'Transformers Version:{transformers.__version__}') logger.info(f'Onnxruntime Version:{onnxruntime.__version__}') + # Support three major versions of PyTorch and OnnxRuntime, and up to 6 months of transformers. from packaging import version - assert version.parse(torch.__version__) >= version.parse('1.4.0') - assert version.parse(transformers.__version__) >= version.parse('2.11.0') + assert version.parse(torch.__version__) >= version.parse('1.5.0') + assert version.parse(transformers.__version__) >= version.parse('3.0.0') assert version.parse(onnxruntime.__version__) >= version.parse('1.4.0') @@ -223,8 +220,8 @@ def inference_ort_with_io_binding(ort_session, allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device) for i in range(len(ort_output_names)): - io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32, ort_outputs[i].shape, - output_buffers[i].data_ptr()) + io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32, + ort_outputs[i].shape, output_buffers[i].data_ptr()) runtimes = timeit.repeat(lambda: ort_session.run_with_iobinding(io_binding), number=1, repeat=repeat_times) result.update(result_template) result.update({"io_binding": True}) diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py index 0c1c887e70..3595713faf 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py @@ -132,7 +132,6 @@ class BertOnnxModel(OnnxModel): new_graph_inputs = [] casted_bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(casted=True) - for input in graph.input: if input.name in casted_bert_graph_inputs: @@ -178,13 +177,13 @@ class BertOnnxModel(OnnxModel): def preprocess(self): self.adjust_reshape_and_expand() return - + def adjust_reshape_and_expand(self): nodes_to_remove = [] for node in self.nodes(): - if node.op_type == 'Reshape': + if node.op_type == 'Reshape': # Clean up unneccessary reshape nodes. - # Find reshape nodes with no actually data in "shape" attribute and remove. + # Find reshape nodes with no actually data in "shape" attribute and remove. reshape_shape = self.get_constant_value(node.input[1]) if reshape_shape is not None and reshape_shape.size == 0: nodes_to_remove.extend([node]) @@ -192,9 +191,9 @@ class BertOnnxModel(OnnxModel): continue # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by - # changing current reshape's input to output of slice. + # changing current reshape's input to output of slice. reshape_path = self.match_parent_path(node, ['Expand', 'Expand', 'Reshape', 'Slice'], [0, 0, 0, 0], - self.output_name_to_node()) + self.output_name_to_node()) if reshape_path is not None: expand_node = reshape_path[-3] expand_shape_value = self.get_constant_value(expand_node.input[1]) @@ -203,8 +202,9 @@ class BertOnnxModel(OnnxModel): shape_value = self.get_constant_value(reshape_before_expand.input[1]) slice_node = reshape_path[-1] - if expand_shape_value is not None and shape_value is not None and len(expand_shape_value) is 2 and len( - shape_value) is 1 and expand_shape_value[1] == shape_value[0]: + if expand_shape_value is not None and shape_value is not None and len( + expand_shape_value) is 2 and len( + shape_value) is 1 and expand_shape_value[1] == shape_value[0]: node.input[0] = slice_node.output[0] self.remove_nodes(nodes_to_remove) logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}") @@ -340,6 +340,6 @@ class BertOnnxModel(OnnxModel): logger.debug("Embed Layer not fused") if attention == 0: - logger.debug("Attention not fused") + logger.warning("Attention not fused") return is_perfect diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index cbadea2446..9a917a0fc4 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -214,7 +214,10 @@ def _parse_arguments(): default=0, help="onnxruntime optimization level. 0 will disable onnxruntime.") - parser.add_argument('--use_external_data_format', required=False, action='store_true', help="use external data format") + parser.add_argument('--use_external_data_format', + required=False, + action='store_true', + help="use external data format") parser.set_defaults(use_external_data_format=False) args = parser.parse_args() @@ -306,8 +309,9 @@ def optimize_model(input, os.remove(temp_model_path) logger.debug("Remove tempoary model: {}".format(temp_model_path)) - optimizer.model.producer_name = "onnxruntime_tools" - optimizer.model.producer_version = "1.5.2" + optimizer.model.producer_name = "onnxruntime.transformers" + from onnxruntime import __version__ as onnxruntime_version + optimizer.model.producer_version = onnxruntime_version return optimizer @@ -325,9 +329,7 @@ def main(): _setup_logger(args.verbose) if os.path.realpath(args.input) == os.path.realpath(args.output): - logger.warning( - f"Specified the same input and output path. Note that this may overwrite the original model" - ) + logger.warning(f"Specified the same input and output path. Note that this may overwrite the original model") optimization_options = _get_optimization_options(args) diff --git a/onnxruntime/python/tools/transformers/profiler.py b/onnxruntime/python/tools/transformers/profiler.py index dff252a744..05b1e9b957 100644 --- a/onnxruntime/python/tools/transformers/profiler.py +++ b/onnxruntime/python/tools/transformers/profiler.py @@ -75,7 +75,7 @@ def parse_arguments(argv=None): required=False, default='default', choices=['bert', 'gpt2', 'longformer', 'default'], - help="Way to create dummy inputs. If your model is not aa") + help="Type of dummy inputs. The default will create inputs with ones.") parser.add_argument('-g', '--use_gpu', required=False, action='store_true', help="use GPU") parser.set_defaults(use_gpu=False) @@ -96,8 +96,7 @@ def parse_arguments(argv=None): parser.add_argument('-v', '--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) - args = parser.parse_args(argv) - return args + return parser.parse_args(argv) def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_name, segment_ids_name, input_mask_name): @@ -116,7 +115,7 @@ def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_na return all_inputs -def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_size, sequence_length, all_inputs): +def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, all_inputs): from benchmark_helper import create_onnxruntime_session session = create_onnxruntime_session(onnx_model_path, @@ -135,8 +134,8 @@ def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_ def load_profile_json(profile_file): print(f"loading profile output {profile_file} ...") - with open(profile_file, "r") as f: - sess_time = json.load(f) + with open(profile_file, "r") as opened_file: + sess_time = json.load(opened_file) assert isinstance(sess_time, list) return sess_time @@ -167,18 +166,18 @@ def parse_profile_results(sess_time, kernel_time_only=False, threshold=0): node_time[item["name"]] = item["dur"] total += item["dur"] - results = [] + lines = [] if (threshold > 0): - results.append(f"Threshold of Percentage > {threshold:.2f}%") + lines.append(f"Threshold of Percentage > {threshold:.2f}%") - results.append(f"Duration\tPercentage\tProvider\tName") + lines.append("Duration\tPercentage\tProvider\tName") for k, v in sorted(node_time.items(), key=lambda x: x[1], reverse=True): provider = node_provider[k] if k in node_provider else "" ratio = v / total if ratio > threshold: - results.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}") + lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}") - return results + return lines def group_profile_results(sess_time, kernel_time_only=False, threshold=0): @@ -215,15 +214,15 @@ def group_profile_results(sess_time, kernel_time_only=False, threshold=0): op_cpu_time[op_name] = item["dur"] op_cpu_records[op_name] = 1 - results = [f"Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"] + lines = ["Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"] for k, v in sorted(op_time.items(), key=lambda x: x[1], reverse=True): calls = op_records[k] cpu_time = op_cpu_time[k] if k in op_cpu_time else 0 cpu_calls = op_cpu_records[k] if k in op_cpu_records else 0 ratio = v / total if ratio > threshold: - results.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}") - return results + lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}") + return lines def get_dim_from_type_proto(dim): @@ -240,11 +239,11 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples): onnx_model = OnnxModel(onnx.load(onnx_model_path)) dummy_inputs = {} - for input in onnx_model.get_graph_inputs_excluding_initializers(): - shape = get_shape_from_type_proto(input.type) + for graph_input in onnx_model.get_graph_inputs_excluding_initializers(): + shape = get_shape_from_type_proto(graph_input.type) symbol_dims = [] for i, dim in enumerate(shape): - if type(dim) == str: + if isinstance(dim, str): symbol_dims.append(i) # allowed symbolic dimensions: batch_size and sequence_length @@ -255,12 +254,12 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples): if len(symbol_dims) > 1: shape[symbol_dims[1]] = sequence_length - elem_type = input.type.tensor_type.elem_type + elem_type = graph_input.type.tensor_type.elem_type assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64] data_type = numpy.float32 if elem_type == TensorProto.FLOAT else ( numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32) data = numpy.ones(shape, dtype=data_type) - dummy_inputs[input.name] = data + dummy_inputs[graph_input.name] = data all_inputs = [dummy_inputs for _ in range(samples)] return all_inputs @@ -280,20 +279,20 @@ def create_gpt2_inputs(onnx_model_path, batch_size, sequence_length, past_sequen } dummy_inputs = {} - for input in onnx_model.get_graph_inputs_excluding_initializers(): - shape = get_shape_from_type_proto(input.type) + for graph_input in onnx_model.get_graph_inputs_excluding_initializers(): + shape = get_shape_from_type_proto(graph_input.type) for i, dim in enumerate(shape): - if type(dim) == str and dim not in symbols.keys(): + if isinstance(dim, str) and dim not in symbols.keys(): raise RuntimeError(f"symbol is not supported: {dim}") else: shape[i] = symbols[dim] - elem_type = input.type.tensor_type.elem_type + elem_type = graph_input.type.tensor_type.elem_type assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64] data_type = numpy.float32 if elem_type == TensorProto.FLOAT else ( numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32) data = numpy.ones(shape, dtype=data_type) - dummy_inputs[input.name] = data + dummy_inputs[graph_input.name] = data all_inputs = [dummy_inputs for _ in range(samples)] return all_inputs @@ -307,25 +306,25 @@ def create_longformer_inputs(onnx_model_path, batch_size, sequence_length, globa symbols = {'batch_size': batch_size, 'sequence_length': sequence_length} dummy_inputs = {} - for input in onnx_model.get_graph_inputs_excluding_initializers(): - shape = get_shape_from_type_proto(input.type) + for graph_input in onnx_model.get_graph_inputs_excluding_initializers(): + shape = get_shape_from_type_proto(graph_input.type) for i, dim in enumerate(shape): - if type(dim) == str and dim not in symbols.keys(): + if isinstance(dim, str) and dim not in symbols.keys(): raise RuntimeError(f"symbol is not supported: {dim}") else: shape[i] = symbols[dim] - elem_type = input.type.tensor_type.elem_type + elem_type = graph_input.type.tensor_type.elem_type assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64] data_type = numpy.float32 if elem_type == TensorProto.FLOAT else ( numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32) - if "global" in input.name: + if "global" in graph_input.name: data = numpy.zeros(shape, dtype=data_type) data[:, :global_length] = 1 else: data = numpy.ones(shape, dtype=data_type) - dummy_inputs[input.name] = data + dummy_inputs[graph_input.name] = data all_inputs = [dummy_inputs for _ in range(samples)] return all_inputs @@ -351,8 +350,7 @@ def run(args): else: # default all_inputs = create_dummy_inputs(args.model, args.batch_size, args.sequence_length, args.samples) - profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, args.batch_size, - args.sequence_length, all_inputs) + profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, all_inputs) profile_records = load_profile_json(profile_file) @@ -365,13 +363,13 @@ def run(args): if __name__ == '__main__': - args = parse_arguments() - print("Arguments", args) + arguments = parse_arguments() + print("Arguments", arguments) from benchmark_helper import setup_logger - setup_logger(args.verbose) + setup_logger(arguments.verbose) - results = run(args) + results = run(arguments) print("Results:") print("-" * 64) diff --git a/onnxruntime/python/tools/transformers/requirements_cpu.txt b/onnxruntime/python/tools/transformers/requirements_cpu.txt index 7603b88b6e..ff4e066ed9 100644 --- a/onnxruntime/python/tools/transformers/requirements_cpu.txt +++ b/onnxruntime/python/tools/transformers/requirements_cpu.txt @@ -5,10 +5,10 @@ psutil py-cpuinfo py3nvml packaging -# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models. -# The change does not exist in 2.9.0 so we install from source instead. -git+https://github.com/huggingface/transformers.git +transformers onnxruntime +onnxconverter_common --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.5.0+cpu -torchvision==0.6.0+cpu +torch==1.7.1+cpu +torchvision==0.8.2+cpu +torchaudio===0.7.2 diff --git a/onnxruntime/python/tools/transformers/requirements_gpu.txt b/onnxruntime/python/tools/transformers/requirements_gpu.txt index c15bbc891d..d3dc4e8aee 100644 --- a/onnxruntime/python/tools/transformers/requirements_gpu.txt +++ b/onnxruntime/python/tools/transformers/requirements_gpu.txt @@ -5,10 +5,10 @@ psutil py-cpuinfo py3nvml packaging -# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models. -# The change does not exist in 2.9.0 so we install from source instead. -git+https://github.com/huggingface/transformers.git +transformers onnxruntime-gpu +onnxconverter_common --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.5.0+cu101 -torchvision==0.6.0+cu101 +torch===1.7.1 +torchvision===0.8.2 +torchaudio===0.7.2 \ No newline at end of file