onnxruntime/tools/python/util/pytorch_export_helpers.py

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import inspect
from collections import abc

import torch


def _parse_inputs_for_onnx_export(all_input_parameters, inputs, kwargs):
    # extracted from https://github.com/microsoft/onnxruntime/blob/239c6ad3f021ff7cc2e6247eb074bd4208dc11e2/orttraining/orttraining/python/training/ortmodule/_io.py#L433

    def _add_input(name, input):
        """Returns number of expanded inputs that _add_input processed"""

        if input is None:
            # Drop all None inputs and return 0.
            return 0

        num_expanded_non_none_inputs = 0
        if isinstance(input, abc.Sequence):
            # If the input is a sequence (like a list), expand the list so that
            # each element of the list is an input by itself.
            for i, val in enumerate(input):
                # Name each input with the index appended to the original name of the
                # argument.
                num_expanded_non_none_inputs += _add_input(f"{name}_{i}", val)

            # Return here since the list by itself is not a valid input.
            # All the elements of the list have already been added as inputs individually.
            return num_expanded_non_none_inputs
        elif isinstance(input, abc.Mapping):
            # If the input is a mapping (like a dict), expand the dict so that
            # each element of the dict is an input by itself.
            for key, val in input.items():
                num_expanded_non_none_inputs += _add_input(f"{name}_{key}", val)

            # Return here since the dict by itself is not a valid input.
            # All the elements of the dict have already been added as inputs individually.
            return num_expanded_non_none_inputs

        # InputInfo should contain all the names irrespective of whether they are
        # a part of the onnx graph or not.
        input_names.append(name)

        # A single input non none input was processed, return 1
        return 1

    input_names = []
    var_positional_idx = 0
    num_expanded_non_none_positional_inputs = 0

    for input_idx, input_parameter in enumerate(all_input_parameters):
        if input_parameter.kind == inspect.Parameter.VAR_POSITIONAL:
            # VAR_POSITIONAL parameter carries all *args parameters from original forward method
            for args_i in range(input_idx, len(inputs)):
                name = f"{input_parameter.name}_{var_positional_idx}"
                var_positional_idx += 1
                inp = inputs[args_i]
                num_expanded_non_none_positional_inputs += _add_input(name, inp)
        elif (
            input_parameter.kind == inspect.Parameter.POSITIONAL_ONLY
            or input_parameter.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
            or input_parameter.kind == inspect.Parameter.KEYWORD_ONLY
        ):
            # All positional non-*args and non-**kwargs are processed here
            name = input_parameter.name
            inp = None
            input_idx += var_positional_idx  # noqa: PLW2901
            is_positional = True
            if input_idx < len(inputs) and inputs[input_idx] is not None:
                inp = inputs[input_idx]
            elif name in kwargs and kwargs[name] is not None:
                inp = kwargs[name]
                is_positional = False
            num_expanded_non_none_inputs_local = _add_input(name, inp)
            if is_positional:
                num_expanded_non_none_positional_inputs += num_expanded_non_none_inputs_local
        elif input_parameter.kind == inspect.Parameter.VAR_KEYWORD:
            # **kwargs is always the last argument of forward()
            for name, inp in kwargs.items():
                if name not in input_names:
                    _add_input(name, inp)

    return input_names


def _flatten_module_input(names, args, kwargs):
    """Flatten args and kwargs in a single tuple of tensors."""
    # extracted from https://github.com/microsoft/onnxruntime/blob/239c6ad3f021ff7cc2e6247eb074bd4208dc11e2/orttraining/orttraining/python/training/ortmodule/_io.py#L110

    def is_primitive_type(value):
        return type(value) in {int, bool, float}

    def to_tensor(value):
        return torch.tensor(value)

    ret = [to_tensor(arg) if is_primitive_type(arg) else arg for arg in args]
    ret += [
        to_tensor(kwargs[name]) if is_primitive_type(kwargs[name]) else kwargs[name] for name in names if name in kwargs
    ]

    # if kwargs is empty, append an empty dictionary at the end of the sample inputs to make exporter
    # happy. This is because the exporter is confused with kwargs and dictionary inputs otherwise.
    if not kwargs:
        ret.append({})

    return tuple(ret)


def infer_input_info(module: torch.nn.Module, *inputs, **kwargs):
    """
    Infer the input names and order from the arguments used to execute a PyTorch module for usage exporting
    the model via torch.onnx.export.
    Assumes model is on CPU. Use `module.to(torch.device('cpu'))` if it isn't.

    Example usage:
    input_names, inputs_as_tuple = infer_input_info(module, ...)
    torch.onnx.export(module, inputs_as_type, 'model.onnx', input_names=input_names, output_names=[...], ...)

    :param module: Module
    :param inputs: Positional inputs
    :param kwargs: Keyword argument inputs
    :return: Tuple of ordered input names and input values. These can be used directly with torch.onnx.export as the
            `input_names` and `inputs` arguments.
    """
    module_parameters = inspect.signature(module.forward).parameters.values()
    input_names = _parse_inputs_for_onnx_export(module_parameters, inputs, kwargs)
    inputs_as_tuple = _flatten_module_input(input_names, inputs, kwargs)

    return input_names, inputs_as_tuple
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`# Copyright (c) Microsoft Corporation. All rights reserved.`
			`# Licensed under the MIT License.`

			`import inspect`
			`from collections import abc`

Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`import torch`

Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00
			`def _parse_inputs_for_onnx_export(all_input_parameters, inputs, kwargs):`
Adopt linrtunner as the linting tool - take 2 (#15085) ### Description `lintrunner` is a linter runner successfully used by pytorch, onnx and onnx-script. It provides a uniform experience running linters locally and in CI. It supports all major dev systems: Windows, Linux and MacOs. The checks are enforced by the `Python format` workflow. This PR adopts `lintrunner` to onnxruntime and fixed ~2000 flake8 errors in Python code. `lintrunner` now runs all required python lints including `ruff`(replacing `flake8`), `black` and `isort`. Future lints like `clang-format` can be added. Most errors are auto-fixed by `ruff` and the fixes should be considered robust. Lints that are more complicated to fix are applied `# noqa` for now and should be fixed in follow up PRs. ### Notable changes 1. This PR removed some suboptimal patterns: - `not xxx in` -> `xxx not in` membership checks - bare excepts (`except:` -> `except Exception`) - unused imports The follow up PR will remove: - `import *` - mutable values as default in function definitions (`def func(a=[])`) - more unused imports - unused local variables 2. Use `ruff` to replace `flake8`. `ruff` is much (40x) faster than flake8 and is more robust. We are using it successfully in onnx and onnx-script. It also supports auto-fixing many flake8 errors. 3. Removed the legacy flake8 ci flow and updated docs. 4. The added workflow supports SARIF code scanning reports on github, example snapshot: ![image](https://user-images.githubusercontent.com/11205048/212598953-d60ce8a9-f242-4fa8-8674-8696b704604a.png) 5. Removed `onnxruntime-python-checks-ci-pipeline` as redundant ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Unified linting experience in CI and local. Replacing https://github.com/microsoft/onnxruntime/pull/14306 --------- Signed-off-by: Justin Chu <justinchu@microsoft.com> 2023-03-24 22:29:03 +00:00			`# extracted from https://github.com/microsoft/onnxruntime/blob/239c6ad3f021ff7cc2e6247eb074bd4208dc11e2/orttraining/orttraining/python/training/ortmodule/_io.py#L433`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00
			`def _add_input(name, input):`
			`"""Returns number of expanded inputs that _add_input processed"""`

			`if input is None:`
			`# Drop all None inputs and return 0.`
			`return 0`

			`num_expanded_non_none_inputs = 0`
			`if isinstance(input, abc.Sequence):`
			`# If the input is a sequence (like a list), expand the list so that`
			`# each element of the list is an input by itself.`
			`for i, val in enumerate(input):`
			`# Name each input with the index appended to the original name of the`
			`# argument.`
			`num_expanded_non_none_inputs += _add_input(f"{name}_{i}", val)`

			`# Return here since the list by itself is not a valid input.`
			`# All the elements of the list have already been added as inputs individually.`
			`return num_expanded_non_none_inputs`
			`elif isinstance(input, abc.Mapping):`
			`# If the input is a mapping (like a dict), expand the dict so that`
			`# each element of the dict is an input by itself.`
			`for key, val in input.items():`
			`num_expanded_non_none_inputs += _add_input(f"{name}_{key}", val)`

			`# Return here since the dict by itself is not a valid input.`
			`# All the elements of the dict have already been added as inputs individually.`
			`return num_expanded_non_none_inputs`

			`# InputInfo should contain all the names irrespective of whether they are`
			`# a part of the onnx graph or not.`
			`input_names.append(name)`

			`# A single input non none input was processed, return 1`
			`return 1`

			`input_names = []`
			`var_positional_idx = 0`
			`num_expanded_non_none_positional_inputs = 0`

			`for input_idx, input_parameter in enumerate(all_input_parameters):`
			`if input_parameter.kind == inspect.Parameter.VAR_POSITIONAL:`
			`# VAR_POSITIONAL parameter carries all *args parameters from original forward method`
			`for args_i in range(input_idx, len(inputs)):`
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`name = f"{input_parameter.name}_{var_positional_idx}"`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`var_positional_idx += 1`
			`inp = inputs[args_i]`
			`num_expanded_non_none_positional_inputs += _add_input(name, inp)`
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`elif (`
			`input_parameter.kind == inspect.Parameter.POSITIONAL_ONLY`
			`or input_parameter.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD`
			`or input_parameter.kind == inspect.Parameter.KEYWORD_ONLY`
			`):`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`# All positional non-args and non-*kwargs are processed here`
			`name = input_parameter.name`
			`inp = None`
Enable pylint and numpy rules (#15218) ### Description Enable pylint and numpy rules ### Motivation and Context Modernize numpy usage and enable more quality checks 2023-03-28 03:37:53 +00:00			`input_idx += var_positional_idx # noqa: PLW2901`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`is_positional = True`
			`if input_idx < len(inputs) and inputs[input_idx] is not None:`
			`inp = inputs[input_idx]`
			`elif name in kwargs and kwargs[name] is not None:`
			`inp = kwargs[name]`
			`is_positional = False`
			`num_expanded_non_none_inputs_local = _add_input(name, inp)`
			`if is_positional:`
			`num_expanded_non_none_positional_inputs += num_expanded_non_none_inputs_local`
			`elif input_parameter.kind == inspect.Parameter.VAR_KEYWORD:`
			`# **kwargs is always the last argument of forward()`
			`for name, inp in kwargs.items():`
			`if name not in input_names:`
			`_add_input(name, inp)`

			`return input_names`


			`def _flatten_module_input(names, args, kwargs):`
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`"""Flatten args and kwargs in a single tuple of tensors."""`
Adopt linrtunner as the linting tool - take 2 (#15085) ### Description `lintrunner` is a linter runner successfully used by pytorch, onnx and onnx-script. It provides a uniform experience running linters locally and in CI. It supports all major dev systems: Windows, Linux and MacOs. The checks are enforced by the `Python format` workflow. This PR adopts `lintrunner` to onnxruntime and fixed ~2000 flake8 errors in Python code. `lintrunner` now runs all required python lints including `ruff`(replacing `flake8`), `black` and `isort`. Future lints like `clang-format` can be added. Most errors are auto-fixed by `ruff` and the fixes should be considered robust. Lints that are more complicated to fix are applied `# noqa` for now and should be fixed in follow up PRs. ### Notable changes 1. This PR removed some suboptimal patterns: - `not xxx in` -> `xxx not in` membership checks - bare excepts (`except:` -> `except Exception`) - unused imports The follow up PR will remove: - `import *` - mutable values as default in function definitions (`def func(a=[])`) - more unused imports - unused local variables 2. Use `ruff` to replace `flake8`. `ruff` is much (40x) faster than flake8 and is more robust. We are using it successfully in onnx and onnx-script. It also supports auto-fixing many flake8 errors. 3. Removed the legacy flake8 ci flow and updated docs. 4. The added workflow supports SARIF code scanning reports on github, example snapshot: ![image](https://user-images.githubusercontent.com/11205048/212598953-d60ce8a9-f242-4fa8-8674-8696b704604a.png) 5. Removed `onnxruntime-python-checks-ci-pipeline` as redundant ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Unified linting experience in CI and local. Replacing https://github.com/microsoft/onnxruntime/pull/14306 --------- Signed-off-by: Justin Chu <justinchu@microsoft.com> 2023-03-24 22:29:03 +00:00			`# extracted from https://github.com/microsoft/onnxruntime/blob/239c6ad3f021ff7cc2e6247eb074bd4208dc11e2/orttraining/orttraining/python/training/ortmodule/_io.py#L110`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`def is_primitive_type(value):`
			`return type(value) in {int, bool, float}`

			`def to_tensor(value):`
			`return torch.tensor(value)`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00
			`ret = [to_tensor(arg) if is_primitive_type(arg) else arg for arg in args]`
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`ret += [`
			`to_tensor(kwargs[name]) if is_primitive_type(kwargs[name]) else kwargs[name] for name in names if name in kwargs`
			`]`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00
			`# if kwargs is empty, append an empty dictionary at the end of the sample inputs to make exporter`
			`# happy. This is because the exporter is confused with kwargs and dictionary inputs otherwise.`
			`if not kwargs:`
			`ret.append({})`

			`return tuple(ret)`


			`def infer_input_info(module: torch.nn.Module, inputs, *kwargs):`
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`"""`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`Infer the input names and order from the arguments used to execute a PyTorch module for usage exporting`
			`the model via torch.onnx.export.`
			Assumes model is on CPU. Use `module.to(torch.device('cpu'))` if it isn't.

			`Example usage:`
			`input_names, inputs_as_tuple = infer_input_info(module, ...)`
			`torch.onnx.export(module, inputs_as_type, 'model.onnx', input_names=input_names, output_names=[...], ...)`

			`:param module: Module`
			`:param inputs: Positional inputs`
			`:param kwargs: Keyword argument inputs`
			`:return: Tuple of ordered input names and input values. These can be used directly with torch.onnx.export as the`
			`input_names` and `inputs` arguments.
Format all python files under onnxruntime with black and isort (#11324) Description: Format all python files under onnxruntime with black and isort. After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame. #11315, #11316 2022-04-26 16:35:16 +00:00			`"""`
Add range of helpers for making usage of ORT Mobile easier. (#10458) * Add range of helpers for making usage of ORT Mobile easier. 2022-02-17 21:35:25 +00:00			`module_parameters = inspect.signature(module.forward).parameters.values()`
			`input_names = _parse_inputs_for_onnx_export(module_parameters, inputs, kwargs)`
			`inputs_as_tuple = _flatten_module_input(input_names, inputs, kwargs)`

			`return input_names, inputs_as_tuple`