Add a couple of utility scripts to tools/python (#3621)

* Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test.
Add example script that can be used as a base for performance testing a model with a variety of input sizes.
Add __init__.py so files in this directory can be imported in other scripts.

* Fix some flake8 warnings.
Add example of specifying attribute for op.

* Add ability for test dir creation to fill in all missing input data with random values.
Add example of using test dir creation this way
This commit is contained in:
Scott McKay 2020-05-02 17:35:43 +10:00 committed by GitHub
parent 440f361363
commit 42cf971ca2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 274 additions and 0 deletions

0
tools/python/__init__.py Normal file
View file

View file

@ -0,0 +1,146 @@
"""
Example python code for creating a model with a single operator and performance testing it with various
input combinations.
"""
import onnx
from onnx import helper
from onnx import TensorProto
import numpy as np
import time
import timeit
import onnxruntime as rt
# if you copy this script elsewhere you may need to add the tools\python dir to the sys.path for this
# import to work.
# e.g. sys.path.append(r'<path to onnxruntime source>\tools\python')
import ort_test_dir_utils
# make input deterministic
np.random.seed(123)
#
# Example code to create a model with just the operator to test. Adjust as necessary for what you want to test.
#
def create_model(model_name):
graph_def = helper.make_graph(
nodes=[
helper.make_node(op_type="TopK", inputs=['X', 'K'], outputs=['Values', 'Indices'], name='topk',
# attributes are also key-value pairs using the attribute name and appropriate type
largest=1),
],
name='test-model',
inputs=[
# create inputs with symbolic dims so we can use any input sizes
helper.make_tensor_value_info("X", TensorProto.FLOAT, ['batch', 'items']),
helper.make_tensor_value_info("K", TensorProto.INT64, [1]),
],
outputs=[
helper.make_tensor_value_info("Values", TensorProto.FLOAT, ['batch', 'k']),
helper.make_tensor_value_info("Indices", TensorProto.INT64, ['batch', 'k']),
],
initializer=[
]
)
model = helper.make_model(graph_def, opset_imports=[helper.make_operatorsetid("", 11)])
onnx.checker.check_model(model)
onnx.save_model(model, model_name)
#
# Example code to create random input. Adjust as necessary for the input your model requires
#
def create_test_input(n, num_items, k):
x = np.random.randn(n, num_items).astype(np.float32)
k_in = np.asarray([k]).astype(np.int64)
inputs = {'X': x, 'K': k_in}
return inputs
#
# Example code that tests various combinations of input sizes.
#
def run_perf_tests(model_path, num_threads=1):
so = rt.SessionOptions()
so.intra_op_num_threads = num_threads
sess = rt.InferenceSession(model_path, sess_options=so)
batches = [10, 25, 50]
batch_size = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
k_vals = [1, 2, 4, 6, 8, 16, 24, 32, 48, 64, 128]
# exploit scope to access variables from below for each iteration
def run_test():
num_seconds = 1 * 1000 * 1000 * 1000 # seconds in ns
iters = 0
total = 0
total_iters = 0
# For a simple model execution can be faster than time.time_ns() updates. Due to this we want to estimate
# a number of iterations per measurement.
# Estimate based on iterations in 5ms, but note that 5ms includes all the time_ns calls
# which are excluded in the real measurement. The actual time that many iterations
# takes will be much lower if the individual execution time is very small.
start = time.time_ns()
while time.time_ns() - start < 5 * 1000 * 1000: # 5 ms
sess.run(None, inputs)
iters += 1
# run the model and measure time after 'iters' calls
while total < num_seconds:
start = time.time_ns()
for i in range(iters):
# ignore the outputs as we're not validating them in a performance test
sess.run(None, inputs)
end = time.time_ns()
assert (end - start > 0)
total += end - start
total_iters += iters
# Adjust the output you want as needed
print(f'n={n},items={num_items},k={k},avg:{total / total_iters:.4f}')
# combine the various input parameters and create input for each test
for n in batches:
for num_items in batch_size:
for k in k_vals:
if k < num_items:
# adjust as necessary for the inputs your model requires
inputs = create_test_input(n, num_items, k)
# use timeit to disable gc etc. but let each test measure total time and average time
# as multiple iterations may be required between each measurement
timeit.timeit(lambda: run_test(), number=1)
#
# example for creating a test directory for use with onnx_test_runner or onnxruntime_perf_test
# so that the model can be easily run directly or from a debugger.
#
def create_example_test_directory():
# fill in the inputs that we want to use specific values for
input_data = {}
input_data['K'] = np.asarray([64]).astype(np.int64)
# provide symbolic dim values as needed
symbolic_dim_values = {'batch': 25, 'items': 256}
# create the directory. random input will be created for any missing inputs.
# the model will be run and the output will be saved as expected output for future runs
ort_test_dir_utils.create_test_dir('topk.onnx', 'PerfTests', 'test1', input_data, symbolic_dim_values)
# this will create the model file in the current directory
create_model('topk.onnx')
# this will create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test
create_example_test_directory()
# this can loop over various combinations of input, using the specified number of threads
run_perf_tests('topk.onnx', 1)

View file

@ -0,0 +1,128 @@
import numpy as np
import onnx
import onnxruntime as ort
import os
import shutil
from onnx import numpy_helper
def _get_numpy_type(model_info, name):
for i in model_info:
if i.name == name:
type_name = i.type.WhichOneof('value')
if type_name == 'tensor_type':
return onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[i.type.tensor_type.elem_type]
else:
raise ValueError(f"Type is not handled: {type_name}")
raise ValueError(f"{name} was not found in the model info.")
def create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map):
"""
Update name_input_map with random input for any missing values in the model inputs.
:param model_inputs: model.graph.input from an onnx model
:param name_input_map: Map of input names to values to update. Can be empty. Existing values are preserved.
:param symbolic_dim_values_map: Map of symbolic dimension names to values to use if creating data.
"""
for input in model_inputs:
if input.name in name_input_map and name_input_map[input.name] is not None:
continue
input_type = input.type.WhichOneof('value')
if input_type != 'tensor_type':
raise ValueError(f'Unsupported model. Need to handle input type of {input_type}')
shape = input.type.tensor_type.shape
dims = []
for dim in shape.dim:
dim_type = dim.WhichOneof('value')
if dim_type == 'dim_value':
dims.append(dim.dim_value)
elif dim_type == 'dim_param':
if dim.dim_param not in symbolic_dim_values_map:
raise ValueError(f"Value for symbolic dim {dim.dim_param} was not provided.")
dims.append(symbolic_dim_values_map[dim.dim_param])
else:
# TODO: see if we need to provide a way to specify these values. could ask for the whole
# shape for the input name instead.
raise ValueError("Unsupported model. Unknown dim with no value or symbolic name.")
# create random data. give it range -10 to 10 so if we convert to an integer type it's not all 0s and 1s
# TODO: consider if the range should be configurable
np_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[input.type.tensor_type.elem_type]
data = (np.random.standard_normal(dims) * 10).astype(np_type)
name_input_map[input.name] = data
def create_test_dir(model_path, root_path, test_name,
name_input_map={}, symbolic_dim_values_map={},
name_output_map=None):
"""
Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test.
Generates random input data for any missing inputs.
Saves output from running the model if name_output_map is not provided.
:param model_path: Path to the onnx model file to use.
:param root_path: Root path to create the test directory in.
:param test_name: Name for test. Will be added to the root_path to create the test directory name.
:param name_input_map: Map of input names to numpy ndarray data for each input.
:param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating
using random data.
:param name_output_map: Optional map of output names to numpy ndarray expected output data.
If not provided, the model will be run with the input to generate output data to save.
:return: None
"""
model_path = os.path.abspath(model_path)
root_path = os.path.abspath(root_path)
test_dir = os.path.join(root_path, test_name)
test_data_dir = os.path.join(test_dir, f"test_data_set_0")
if not os.path.exists(test_dir) or not os.path.exists(test_data_dir):
os.makedirs(test_data_dir)
model_filename = model_path.split('\\')[-1]
test_model_filename = os.path.join(test_dir, model_filename)
shutil.copy(model_path, test_model_filename)
model = onnx.load(model_path)
model_inputs = model.graph.input
model_outputs = model.graph.output
def save_data(prefix, name_data_map, model_info):
idx = 0
for name, data in name_data_map.items():
if isinstance(data, dict):
# ignore. map<T1, T2> from traditional ML ops
pass
elif isinstance(data, list):
# ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output
pass
else:
np_type = _get_numpy_type(model_info, name)
tensor = numpy_helper.from_array(data.astype(np_type), name)
filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb")
with open(filename, 'wb') as f:
f.write(tensor.SerializeToString())
idx += 1
create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map)
save_data("input", name_input_map, model_inputs)
# save expected output data if provided. run model to create if not.
if not name_output_map:
output_names = [o.name for o in model_outputs]
sess = ort.InferenceSession(test_model_filename)
outputs = sess.run(output_names, name_input_map)
name_output_map = {}
for name, data in zip(output_names, outputs):
name_output_map[name] = data
save_data("output", name_output_map, model_outputs)