update transformers required package versions (#6315)

2026-07-13 18:08:13 +00:00 · 2021-01-12 00:10:56 -08:00 · 2021-01-12 00:10:56 -08:00 · a038924bee
commit a038924bee
parent c43ca45c4f
6 changed files with 66 additions and 69 deletions
--- a/onnxruntime/python/tools/transformers/benchmark_helper.py
+++ b/onnxruntime/python/tools/transformers/benchmark_helper.py
@ -53,10 +53,6 @@ def create_onnxruntime_session(onnx_model_path,
        if num_threads > 0:
            sess_options.intra_op_num_threads = num_threads
            logger.debug(f"Session option: intra_op_num_threads={sess_options.intra_op_num_threads}")
-        elif (not use_gpu) and (version.parse(onnxruntime_version) < version.parse('1.3.0')):
-            # Set intra_op_num_threads = 1 to enable OpenMP for onnxruntime 1.2.0 (cpu)
-            # onnxruntime-gpu is not built with openmp so it is better to use default (0) or cpu_count instead.
-            sess_options.intra_op_num_threads = 1

        if verbose:
            sess_options.log_severity_level = 0
@ -98,9 +94,10 @@ def prepare_environment(cache_dir, output_dir, use_gpu):
    logger.info(f'Transformers Version:{transformers.__version__}')
    logger.info(f'Onnxruntime Version:{onnxruntime.__version__}')

+    # Support three major versions of PyTorch and OnnxRuntime, and up to 6 months of transformers.
    from packaging import version
-    assert version.parse(torch.__version__) >= version.parse('1.4.0')
-    assert version.parse(transformers.__version__) >= version.parse('2.11.0')
+    assert version.parse(torch.__version__) >= version.parse('1.5.0')
+    assert version.parse(transformers.__version__) >= version.parse('3.0.0')
    assert version.parse(onnxruntime.__version__) >= version.parse('1.4.0')


@ -223,8 +220,8 @@ def inference_ort_with_io_binding(ort_session,
        allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device)

    for i in range(len(ort_output_names)):
-        io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32, ort_outputs[i].shape,
-                           output_buffers[i].data_ptr())
+        io_binding.bind_output(ort_output_names[i], output_buffers[i].device.type, 0, numpy.float32,
+                               ort_outputs[i].shape, output_buffers[i].data_ptr())
    runtimes = timeit.repeat(lambda: ort_session.run_with_iobinding(io_binding), number=1, repeat=repeat_times)
    result.update(result_template)
    result.update({"io_binding": True})
--- a/onnxruntime/python/tools/transformers/onnx_model_bert.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py
@ -132,7 +132,6 @@ class BertOnnxModel(OnnxModel):

        new_graph_inputs = []
        casted_bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(casted=True)
-        

        for input in graph.input:
            if input.name in casted_bert_graph_inputs:
@ -178,13 +177,13 @@ class BertOnnxModel(OnnxModel):
    def preprocess(self):
        self.adjust_reshape_and_expand()
        return
-    
+
    def adjust_reshape_and_expand(self):
        nodes_to_remove = []
        for node in self.nodes():
-            if node.op_type == 'Reshape':        
+            if node.op_type == 'Reshape':
                # Clean up unneccessary reshape nodes.
-                # Find reshape nodes with no actually data in "shape" attribute and remove. 
+                # Find reshape nodes with no actually data in "shape" attribute and remove.
                reshape_shape = self.get_constant_value(node.input[1])
                if reshape_shape is not None and reshape_shape.size == 0:
                    nodes_to_remove.extend([node])
@ -192,9 +191,9 @@ class BertOnnxModel(OnnxModel):
                    continue

                # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
-                # changing current reshape's input to output of slice. 
+                # changing current reshape's input to output of slice.
                reshape_path = self.match_parent_path(node, ['Expand', 'Expand', 'Reshape', 'Slice'], [0, 0, 0, 0],
-                                                            self.output_name_to_node())
+                                                      self.output_name_to_node())
                if reshape_path is not None:
                    expand_node = reshape_path[-3]
                    expand_shape_value = self.get_constant_value(expand_node.input[1])
@ -203,8 +202,9 @@ class BertOnnxModel(OnnxModel):
                    shape_value = self.get_constant_value(reshape_before_expand.input[1])

                    slice_node = reshape_path[-1]
-                    if expand_shape_value is not None and shape_value is not None and len(expand_shape_value) is 2 and len(
-                            shape_value) is 1 and expand_shape_value[1] == shape_value[0]:
+                    if expand_shape_value is not None and shape_value is not None and len(
+                            expand_shape_value) is 2 and len(
+                                shape_value) is 1 and expand_shape_value[1] == shape_value[0]:
                        node.input[0] = slice_node.output[0]
        self.remove_nodes(nodes_to_remove)
        logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
@ -340,6 +340,6 @@ class BertOnnxModel(OnnxModel):
            logger.debug("Embed Layer not fused")

        if attention == 0:
-            logger.debug("Attention not fused")
+            logger.warning("Attention not fused")

        return is_perfect
--- a/onnxruntime/python/tools/transformers/optimizer.py
+++ b/onnxruntime/python/tools/transformers/optimizer.py
@ -214,7 +214,10 @@ def _parse_arguments():
                        default=0,
                        help="onnxruntime optimization level. 0 will disable onnxruntime.")

-    parser.add_argument('--use_external_data_format', required=False, action='store_true', help="use external data format")
+    parser.add_argument('--use_external_data_format',
+                        required=False,
+                        action='store_true',
+                        help="use external data format")
    parser.set_defaults(use_external_data_format=False)

    args = parser.parse_args()
@ -306,8 +309,9 @@ def optimize_model(input,
        os.remove(temp_model_path)
        logger.debug("Remove tempoary model: {}".format(temp_model_path))

-    optimizer.model.producer_name = "onnxruntime_tools"
-    optimizer.model.producer_version = "1.5.2"
+    optimizer.model.producer_name = "onnxruntime.transformers"
+    from onnxruntime import __version__ as onnxruntime_version
+    optimizer.model.producer_version = onnxruntime_version

    return optimizer

@ -325,9 +329,7 @@ def main():
    _setup_logger(args.verbose)

    if os.path.realpath(args.input) == os.path.realpath(args.output):
-        logger.warning(
-            f"Specified the same input and output path. Note that this may overwrite the original model"
-        )
+        logger.warning(f"Specified the same input and output path. Note that this may overwrite the original model")

    optimization_options = _get_optimization_options(args)

--- a/onnxruntime/python/tools/transformers/profiler.py
+++ b/onnxruntime/python/tools/transformers/profiler.py
@ -75,7 +75,7 @@ def parse_arguments(argv=None):
                        required=False,
                        default='default',
                        choices=['bert', 'gpt2', 'longformer', 'default'],
-                        help="Way to create dummy inputs. If your model is not aa")
+                        help="Type of dummy inputs. The default will create inputs with ones.")

    parser.add_argument('-g', '--use_gpu', required=False, action='store_true', help="use GPU")
    parser.set_defaults(use_gpu=False)
@ -96,8 +96,7 @@ def parse_arguments(argv=None):
    parser.add_argument('-v', '--verbose', required=False, action='store_true')
    parser.set_defaults(verbose=False)

-    args = parser.parse_args(argv)
-    return args
+    return parser.parse_args(argv)


 def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_name, segment_ids_name, input_mask_name):
@ -116,7 +115,7 @@ def create_bert_inputs(model, batch_size, sequence_length, samples, input_ids_na
    return all_inputs


-def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_size, sequence_length, all_inputs):
+def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, all_inputs):
    from benchmark_helper import create_onnxruntime_session

    session = create_onnxruntime_session(onnx_model_path,
@ -135,8 +134,8 @@ def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, batch_
 def load_profile_json(profile_file):
    print(f"loading profile output {profile_file} ...")

-    with open(profile_file, "r") as f:
-        sess_time = json.load(f)
+    with open(profile_file, "r") as opened_file:
+        sess_time = json.load(opened_file)

    assert isinstance(sess_time, list)
    return sess_time
@ -167,18 +166,18 @@ def parse_profile_results(sess_time, kernel_time_only=False, threshold=0):
                node_time[item["name"]] = item["dur"]
            total += item["dur"]

-    results = []
+    lines = []
    if (threshold > 0):
-        results.append(f"Threshold of Percentage > {threshold:.2f}%")
+        lines.append(f"Threshold of Percentage > {threshold:.2f}%")

-    results.append(f"Duration\tPercentage\tProvider\tName")
+    lines.append("Duration\tPercentage\tProvider\tName")
    for k, v in sorted(node_time.items(), key=lambda x: x[1], reverse=True):
        provider = node_provider[k] if k in node_provider else ""
        ratio = v / total
        if ratio > threshold:
-            results.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}")
+            lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{provider}\t{k}")

-    return results
+    return lines


 def group_profile_results(sess_time, kernel_time_only=False, threshold=0):
@ -215,15 +214,15 @@ def group_profile_results(sess_time, kernel_time_only=False, threshold=0):
                    op_cpu_time[op_name] = item["dur"]
                    op_cpu_records[op_name] = 1

-    results = [f"Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"]
+    lines = ["Duration\tPercentage\tCalls\tCpu_Duration\tCpu_Calls\tName"]
    for k, v in sorted(op_time.items(), key=lambda x: x[1], reverse=True):
        calls = op_records[k]
        cpu_time = op_cpu_time[k] if k in op_cpu_time else 0
        cpu_calls = op_cpu_records[k] if k in op_cpu_records else 0
        ratio = v / total
        if ratio > threshold:
-            results.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}")
-    return results
+            lines.append(f"{v}\t{ratio * 100.0:5.2f}\t{calls}\t{cpu_time}\t{cpu_calls}\t{k}")
+    return lines


 def get_dim_from_type_proto(dim):
@ -240,11 +239,11 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples):

    onnx_model = OnnxModel(onnx.load(onnx_model_path))
    dummy_inputs = {}
-    for input in onnx_model.get_graph_inputs_excluding_initializers():
-        shape = get_shape_from_type_proto(input.type)
+    for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
+        shape = get_shape_from_type_proto(graph_input.type)
        symbol_dims = []
        for i, dim in enumerate(shape):
-            if type(dim) == str:
+            if isinstance(dim, str):
                symbol_dims.append(i)

        # allowed symbolic dimensions: batch_size and sequence_length
@ -255,12 +254,12 @@ def create_dummy_inputs(onnx_model_path, batch_size, sequence_length, samples):
        if len(symbol_dims) > 1:
            shape[symbol_dims[1]] = sequence_length

-        elem_type = input.type.tensor_type.elem_type
+        elem_type = graph_input.type.tensor_type.elem_type
        assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
        data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
            numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)
        data = numpy.ones(shape, dtype=data_type)
-        dummy_inputs[input.name] = data
+        dummy_inputs[graph_input.name] = data

    all_inputs = [dummy_inputs for _ in range(samples)]
    return all_inputs
@ -280,20 +279,20 @@ def create_gpt2_inputs(onnx_model_path, batch_size, sequence_length, past_sequen
    }

    dummy_inputs = {}
-    for input in onnx_model.get_graph_inputs_excluding_initializers():
-        shape = get_shape_from_type_proto(input.type)
+    for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
+        shape = get_shape_from_type_proto(graph_input.type)
        for i, dim in enumerate(shape):
-            if type(dim) == str and dim not in symbols.keys():
+            if isinstance(dim, str) and dim not in symbols.keys():
                raise RuntimeError(f"symbol is not supported: {dim}")
            else:
                shape[i] = symbols[dim]

-        elem_type = input.type.tensor_type.elem_type
+        elem_type = graph_input.type.tensor_type.elem_type
        assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
        data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
            numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)
        data = numpy.ones(shape, dtype=data_type)
-        dummy_inputs[input.name] = data
+        dummy_inputs[graph_input.name] = data

    all_inputs = [dummy_inputs for _ in range(samples)]
    return all_inputs
@ -307,25 +306,25 @@ def create_longformer_inputs(onnx_model_path, batch_size, sequence_length, globa
    symbols = {'batch_size': batch_size, 'sequence_length': sequence_length}

    dummy_inputs = {}
-    for input in onnx_model.get_graph_inputs_excluding_initializers():
-        shape = get_shape_from_type_proto(input.type)
+    for graph_input in onnx_model.get_graph_inputs_excluding_initializers():
+        shape = get_shape_from_type_proto(graph_input.type)
        for i, dim in enumerate(shape):
-            if type(dim) == str and dim not in symbols.keys():
+            if isinstance(dim, str) and dim not in symbols.keys():
                raise RuntimeError(f"symbol is not supported: {dim}")
            else:
                shape[i] = symbols[dim]

-        elem_type = input.type.tensor_type.elem_type
+        elem_type = graph_input.type.tensor_type.elem_type
        assert elem_type in [TensorProto.FLOAT, TensorProto.INT32, TensorProto.INT64]
        data_type = numpy.float32 if elem_type == TensorProto.FLOAT else (
            numpy.int64 if elem_type == TensorProto.INT64 else numpy.int32)

-        if "global" in input.name:
+        if "global" in graph_input.name:
            data = numpy.zeros(shape, dtype=data_type)
            data[:, :global_length] = 1
        else:
            data = numpy.ones(shape, dtype=data_type)
-        dummy_inputs[input.name] = data
+        dummy_inputs[graph_input.name] = data

    all_inputs = [dummy_inputs for _ in range(samples)]
    return all_inputs
@ -351,8 +350,7 @@ def run(args):
    else:  # default
        all_inputs = create_dummy_inputs(args.model, args.batch_size, args.sequence_length, args.samples)

-    profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, args.batch_size,
-                               args.sequence_length, all_inputs)
+    profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, all_inputs)

    profile_records = load_profile_json(profile_file)

@ -365,13 +363,13 @@ def run(args):


 if __name__ == '__main__':
-    args = parse_arguments()
-    print("Arguments", args)
+    arguments = parse_arguments()
+    print("Arguments", arguments)

    from benchmark_helper import setup_logger
-    setup_logger(args.verbose)
+    setup_logger(arguments.verbose)

-    results = run(args)
+    results = run(arguments)

    print("Results:")
    print("-" * 64)
--- a/onnxruntime/python/tools/transformers/requirements_cpu.txt
+++ b/onnxruntime/python/tools/transformers/requirements_cpu.txt
@ -5,10 +5,10 @@ psutil
 py-cpuinfo
 py3nvml
 packaging
-# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models.
-# The change does not exist in 2.9.0 so we install from source instead.
-git+https://github.com/huggingface/transformers.git
+transformers
 onnxruntime
+onnxconverter_common
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.5.0+cpu
-torchvision==0.6.0+cpu
+torch==1.7.1+cpu
+torchvision==0.8.2+cpu
+torchaudio===0.7.2
--- a/onnxruntime/python/tools/transformers/requirements_gpu.txt
+++ b/onnxruntime/python/tools/transformers/requirements_gpu.txt
@ -5,10 +5,10 @@ psutil
 py-cpuinfo
 py3nvml
 packaging
-# Gpt2 and Albert models need a change in transformers (https://github.com/huggingface/transformers/pull/4244) for exporting ONNX models.
-# The change does not exist in 2.9.0 so we install from source instead.
-git+https://github.com/huggingface/transformers.git
+transformers
 onnxruntime-gpu
+onnxconverter_common
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.5.0+cu101
-torchvision==0.6.0+cu101
+torch===1.7.1
+torchvision===0.8.2
+torchaudio===0.7.2