onnxruntime/tools/python/ort_test_dir_utils.py

import glob
import numpy as np
import onnx
import onnx_test_data_utils
import onnxruntime as ort
import os
import shutil

from onnx import numpy_helper


def _get_numpy_type(model_info, name):
    for i in model_info:
        if i.name == name:
            type_name = i.type.WhichOneof('value')
            if type_name == 'tensor_type':
                return onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[i.type.tensor_type.elem_type]
            else:
                raise ValueError(f"Type is not handled: {type_name}")

    raise ValueError(f"{name} was not found in the model info.")


def _create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map):
    """
    Update name_input_map with random input for any missing values in the model inputs.

    :param model_inputs: model.graph.input from an onnx model
    :param name_input_map: Map of input names to values to update. Can be empty. Existing values are preserved.
    :param symbolic_dim_values_map: Map of symbolic dimension names to values to use if creating data.
    """
    for input in model_inputs:
        if input.name in name_input_map and name_input_map[input.name] is not None:
            continue

        input_type = input.type.WhichOneof('value')
        if input_type != 'tensor_type':
            raise ValueError(f'Unsupported model. Need to handle input type of {input_type}')

        shape = input.type.tensor_type.shape
        dims = []
        for dim in shape.dim:
            dim_type = dim.WhichOneof('value')
            if dim_type == 'dim_value':
                dims.append(dim.dim_value)
            elif dim_type == 'dim_param':
                if dim.dim_param not in symbolic_dim_values_map:
                    raise ValueError(f"Value for symbolic dim {dim.dim_param} was not provided.")

                dims.append(symbolic_dim_values_map[dim.dim_param])
            else:
                # TODO: see if we need to provide a way to specify these values. could ask for the whole
                # shape for the input name instead.
                raise ValueError("Unsupported model. Unknown dim with no value or symbolic name.")

        np_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[input.type.tensor_type.elem_type]
        # create random data. give it range -10 to 10 so if we convert to an integer type it's not all 0s and 1s
        data = (np.random.standard_normal(dims) * 10).astype(np_type)

        name_input_map[input.name] = data


def create_test_dir(model_path, root_path, test_name,
                    name_input_map=None, symbolic_dim_values_map=None,
                    name_output_map=None):
    """
    Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test.
    Generates random input data for any missing inputs.
    Saves output from running the model if name_output_map is not provided.

    :param model_path: Path to the onnx model file to use.
    :param root_path: Root path to create the test directory in.
    :param test_name: Name for test. Will be added to the root_path to create the test directory name.
    :param name_input_map: Map of input names to numpy ndarray data for each input.
    :param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating
                                    using random data.
    :param name_output_map: Optional map of output names to numpy ndarray expected output data.
                            If not provided, the model will be run with the input to generate output data to save.
    :return: None
    """

    model_path = os.path.abspath(model_path)
    root_path = os.path.abspath(root_path)
    test_dir = os.path.join(root_path, test_name)
    test_data_dir = os.path.join(test_dir, "test_data_set_0")

    if not os.path.exists(test_dir) or not os.path.exists(test_data_dir):
        os.makedirs(test_data_dir)

    model_filename = os.path.split(model_path)[-1]
    test_model_filename = os.path.join(test_dir, model_filename)
    shutil.copy(model_path, test_model_filename)

    model = onnx.load(model_path)
    model_inputs = model.graph.input
    model_outputs = model.graph.output

    def save_data(prefix, name_data_map, model_info):
        idx = 0
        for name, data in name_data_map.items():
            if isinstance(data, dict):
                # ignore. map<T1, T2> from traditional ML ops
                pass
            elif isinstance(data, list):
                # ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output
                pass
            else:
                np_type = _get_numpy_type(model_info, name)
                tensor = numpy_helper.from_array(data.astype(np_type), name)
                filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb")
                with open(filename, 'wb') as f:
                    f.write(tensor.SerializeToString())

            idx += 1

    if not name_input_map:
        name_input_map = {}

    if not symbolic_dim_values_map:
        symbolic_dim_values_map = {}

    _create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map)

    save_data("input", name_input_map, model_inputs)

    # save expected output data if provided. run model to create if not.
    if not name_output_map:
        output_names = [o.name for o in model_outputs]
        sess = ort.InferenceSession(test_model_filename)
        outputs = sess.run(output_names, name_input_map)
        name_output_map = {}
        for name, data in zip(output_names, outputs):
            name_output_map[name] = data

    save_data("output", name_output_map, model_outputs)


def read_test_dir(dir_name):
    """
    Read the input and output .pb files from the provided directory.
    Input files should have a prefix of 'input_'
    Output files, which are optional, should have a prefix of 'output_'
    :param dir_name: Directory to read files from
    :return: tuple(dictionary of input name to numpy.ndarray of data,
                   dictionary of output name to numpy.ndarray)
    """

    inputs = {}
    outputs = {}
    input_files = glob.glob(os.path.join(dir_name, 'input_*.pb'))
    output_files = glob.glob(os.path.join(dir_name, 'output_*.pb'))

    for i in input_files:
        name, data = onnx_test_data_utils.read_tensorproto_pb_file(i)
        inputs[name] = data

    for o in output_files:
        name, data = onnx_test_data_utils.read_tensorproto_pb_file(o)
        outputs[name] = data

    return inputs, outputs


def run_test_dir(model_or_dir):
    """
    Run the test/s from a directory in ONNX test format.
    All subdirectories with a prefix of 'test' are considered test input for one test run.

    :param model_or_dir: Path to onnx model in test directory,
                         or the test directory name if the directory only contains one .onnx model.
    :return: None
    """

    if os.path.isdir(model_or_dir):
        model_dir = os.path.abspath(model_or_dir)
        # check there's only one onnx file
        models = glob.glob(os.path.join(model_dir, '*.onnx'))
        if len(models) > 1:
            raise ValueError(f"'Multiple .onnx files found in {model_dir}. '"
                             "'Please provide specific .onnx file as input.")
        elif len(models) == 0:
            raise ValueError(f"'No .onnx files found in {model_dir}.")

        model_path = models[0]
    else:
        model_path = os.path.abspath(model_or_dir)
        model_dir = os.path.dirname(model_path)

    print(f'Running tests in {model_dir}')

    test_dirs = [d for d in glob.glob(os.path.join(model_dir, 'test*')) if os.path.isdir(d)]
    if not test_dirs:
        raise ValueError(f"No directories with name starting with 'test' were found in {model_dir}.")

    sess = ort.InferenceSession(model_path)

    for d in test_dirs:
        print(d)
        inputs, expected_outputs = read_test_dir(d)

        if expected_outputs:
            output_names = list(expected_outputs.keys())
        else:
            output_names = [o.name for o in sess.get_outputs()]

        run_outputs = sess.run(output_names, inputs)
        failed = False
        if expected_outputs:
            for idx in range(len(output_names)):
                expected = expected_outputs[output_names[idx]]
                actual = run_outputs[idx]

                if not np.isclose(expected, actual, rtol=1.e-3, atol=1.e-3).all():
                    print(f'Mismatch for {output_names[idx]}:\nExpected:{expected}\nGot:{actual}')
                    failed = True

        print('FAILED' if failed else 'PASS')
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`import glob`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`import numpy as np`
			`import onnx`
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`import onnx_test_data_utils`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`import onnxruntime as ort`
			`import os`
			`import shutil`

			`from onnx import numpy_helper`


			`def _get_numpy_type(model_info, name):`
			`for i in model_info:`
			`if i.name == name:`
			`type_name = i.type.WhichOneof('value')`
			`if type_name == 'tensor_type':`
			`return onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[i.type.tensor_type.elem_type]`
			`else:`
			`raise ValueError(f"Type is not handled: {type_name}")`

			`raise ValueError(f"{name} was not found in the model info.")`


Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`def _create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map):`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`"""`
			`Update name_input_map with random input for any missing values in the model inputs.`

			`:param model_inputs: model.graph.input from an onnx model`
			`:param name_input_map: Map of input names to values to update. Can be empty. Existing values are preserved.`
			`:param symbolic_dim_values_map: Map of symbolic dimension names to values to use if creating data.`
			`"""`
			`for input in model_inputs:`
			`if input.name in name_input_map and name_input_map[input.name] is not None:`
			`continue`

			`input_type = input.type.WhichOneof('value')`
			`if input_type != 'tensor_type':`
			`raise ValueError(f'Unsupported model. Need to handle input type of {input_type}')`

			`shape = input.type.tensor_type.shape`
			`dims = []`
			`for dim in shape.dim:`
			`dim_type = dim.WhichOneof('value')`
			`if dim_type == 'dim_value':`
			`dims.append(dim.dim_value)`
			`elif dim_type == 'dim_param':`
			`if dim.dim_param not in symbolic_dim_values_map:`
			`raise ValueError(f"Value for symbolic dim {dim.dim_param} was not provided.")`

			`dims.append(symbolic_dim_values_map[dim.dim_param])`
			`else:`
			`# TODO: see if we need to provide a way to specify these values. could ask for the whole`
			`# shape for the input name instead.`
			`raise ValueError("Unsupported model. Unknown dim with no value or symbolic name.")`

			`np_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[input.type.tensor_type.elem_type]`
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`# create random data. give it range -10 to 10 so if we convert to an integer type it's not all 0s and 1s`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`data = (np.random.standard_normal(dims) * 10).astype(np_type)`
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`name_input_map[input.name] = data`


			`def create_test_dir(model_path, root_path, test_name,`
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`name_input_map=None, symbolic_dim_values_map=None,`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`name_output_map=None):`
			`"""`
			`Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test.`
			`Generates random input data for any missing inputs.`
			`Saves output from running the model if name_output_map is not provided.`

			`:param model_path: Path to the onnx model file to use.`
			`:param root_path: Root path to create the test directory in.`
			`:param test_name: Name for test. Will be added to the root_path to create the test directory name.`
			`:param name_input_map: Map of input names to numpy ndarray data for each input.`
			`:param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating`
			`using random data.`
			`:param name_output_map: Optional map of output names to numpy ndarray expected output data.`
			`If not provided, the model will be run with the input to generate output data to save.`
			`:return: None`
			`"""`

			`model_path = os.path.abspath(model_path)`
			`root_path = os.path.abspath(root_path)`
			`test_dir = os.path.join(root_path, test_name)`
Enable running PEP8 on python scripts using flake8 (#3928) * Enable running PEP8 checks via flake8 as part of the build if flake8 is installed. Update scripts in \tools and \onnxruntime\python. Excluding \onnxruntime\python\tools which needs a lot more work to be PEP8 compliant. Also excluding orttraining\tools for the same reason. Install flake8 as part of the static_analysis build task in the Win-CPU CI so the checks are run in one CI build. Update coding standards doc. 2020-05-14 21:15:06 +00:00			`test_data_dir = os.path.join(test_dir, "test_data_set_0")`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00
			`if not os.path.exists(test_dir) or not os.path.exists(test_data_dir):`
			`os.makedirs(test_data_dir)`

Add option for onnx_test_runner can pause after launch, make create_test_dir work on non-windows os (#4618) * minor fix for test dir util * add pause option for onnx_test_runner * add flush std to show pause prompt text Co-authored-by: gwang0000 <62914304+gwang0000@users.noreply.github.com> 2020-07-29 18:47:01 +00:00			`model_filename = os.path.split(model_path)[-1]`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00			`test_model_filename = os.path.join(test_dir, model_filename)`
			`shutil.copy(model_path, test_model_filename)`

			`model = onnx.load(model_path)`
			`model_inputs = model.graph.input`
			`model_outputs = model.graph.output`

			`def save_data(prefix, name_data_map, model_info):`
			`idx = 0`
			`for name, data in name_data_map.items():`
			`if isinstance(data, dict):`
			`# ignore. map<T1, T2> from traditional ML ops`
			`pass`
			`elif isinstance(data, list):`
			`# ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output`
			`pass`
			`else:`
			`np_type = _get_numpy_type(model_info, name)`
			`tensor = numpy_helper.from_array(data.astype(np_type), name)`
			`filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb")`
			`with open(filename, 'wb') as f:`
			`f.write(tensor.SerializeToString())`

			`idx += 1`

Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`if not name_input_map:`
			`name_input_map = {}`

			`if not symbolic_dim_values_map:`
			`symbolic_dim_values_map = {}`

			`_create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map)`
Add a couple of utility scripts to tools/python (#3621) * Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way 2020-05-02 07:35:43 +00:00
			`save_data("input", name_input_map, model_inputs)`

			`# save expected output data if provided. run model to create if not.`
			`if not name_output_map:`
			`output_names = [o.name for o in model_outputs]`
			`sess = ort.InferenceSession(test_model_filename)`
			`outputs = sess.run(output_names, name_input_map)`
			`name_output_map = {}`
			`for name, data in zip(output_names, outputs):`
			`name_output_map[name] = data`

			`save_data("output", name_output_map, model_outputs)`

Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00
			`def read_test_dir(dir_name):`
			`"""`
			`Read the input and output .pb files from the provided directory.`
			`Input files should have a prefix of 'input_'`
			`Output files, which are optional, should have a prefix of 'output_'`
			`:param dir_name: Directory to read files from`
			`:return: tuple(dictionary of input name to numpy.ndarray of data,`
			`dictionary of output name to numpy.ndarray)`
			`"""`

			`inputs = {}`
			`outputs = {}`
			`input_files = glob.glob(os.path.join(dir_name, 'input_*.pb'))`
			`output_files = glob.glob(os.path.join(dir_name, 'output_*.pb'))`

			`for i in input_files:`
			`name, data = onnx_test_data_utils.read_tensorproto_pb_file(i)`
			`inputs[name] = data`

			`for o in output_files:`
			`name, data = onnx_test_data_utils.read_tensorproto_pb_file(o)`
			`outputs[name] = data`

			`return inputs, outputs`


			`def run_test_dir(model_or_dir):`
			`"""`
			`Run the test/s from a directory in ONNX test format.`
			`All subdirectories with a prefix of 'test' are considered test input for one test run.`

			`:param model_or_dir: Path to onnx model in test directory,`
			`or the test directory name if the directory only contains one .onnx model.`
			`:return: None`
			`"""`

			`if os.path.isdir(model_or_dir):`
			`model_dir = os.path.abspath(model_or_dir)`
			`# check there's only one onnx file`
			`models = glob.glob(os.path.join(model_dir, '*.onnx'))`
			`if len(models) > 1:`
			`raise ValueError(f"'Multiple .onnx files found in {model_dir}. '"`
Enable running PEP8 on python scripts using flake8 (#3928) * Enable running PEP8 checks via flake8 as part of the build if flake8 is installed. Update scripts in \tools and \onnxruntime\python. Excluding \onnxruntime\python\tools which needs a lot more work to be PEP8 compliant. Also excluding orttraining\tools for the same reason. Install flake8 as part of the static_analysis build task in the Win-CPU CI so the checks are run in one CI build. Update coding standards doc. 2020-05-14 21:15:06 +00:00			`"'Please provide specific .onnx file as input.")`
Add a few more things to the helper python scripts. (#3842) * Add a few more things to the helper python scripts. Add documentation with usage examples. * Address PR comments 2020-05-11 23:59:40 +00:00			`elif len(models) == 0:`
			`raise ValueError(f"'No .onnx files found in {model_dir}.")`

			`model_path = models[0]`
			`else:`
			`model_path = os.path.abspath(model_or_dir)`
			`model_dir = os.path.dirname(model_path)`

			`print(f'Running tests in {model_dir}')`

			`test_dirs = [d for d in glob.glob(os.path.join(model_dir, 'test*')) if os.path.isdir(d)]`
			`if not test_dirs:`
			`raise ValueError(f"No directories with name starting with 'test' were found in {model_dir}.")`

			`sess = ort.InferenceSession(model_path)`

			`for d in test_dirs:`
			`print(d)`
			`inputs, expected_outputs = read_test_dir(d)`

			`if expected_outputs:`
			`output_names = list(expected_outputs.keys())`
			`else:`
			`output_names = [o.name for o in sess.get_outputs()]`

			`run_outputs = sess.run(output_names, inputs)`
			`failed = False`
			`if expected_outputs:`
			`for idx in range(len(output_names)):`
			`expected = expected_outputs[output_names[idx]]`
			`actual = run_outputs[idx]`

			`if not np.isclose(expected, actual, rtol=1.e-3, atol=1.e-3).all():`
			`print(f'Mismatch for {output_names[idx]}:\nExpected:{expected}\nGot:{actual}')`
			`failed = True`

			`print('FAILED' if failed else 'PASS')`