mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-11 00:49:31 +00:00
Add a couple of utility scripts to tools/python (#3621)
* Add a helper script to more easily create a test directory for use with onnx_test_runner or onnxruntime_perf_test. Add example script that can be used as a base for performance testing a model with a variety of input sizes. Add __init__.py so files in this directory can be imported in other scripts. * Fix some flake8 warnings. Add example of specifying attribute for op. * Add ability for test dir creation to fill in all missing input data with random values. Add example of using test dir creation this way
This commit is contained in:
parent
440f361363
commit
42cf971ca2
3 changed files with 274 additions and 0 deletions
0
tools/python/__init__.py
Normal file
0
tools/python/__init__.py
Normal file
146
tools/python/example_operator_perf_test.py
Normal file
146
tools/python/example_operator_perf_test.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""
|
||||
Example python code for creating a model with a single operator and performance testing it with various
|
||||
input combinations.
|
||||
"""
|
||||
|
||||
import onnx
|
||||
from onnx import helper
|
||||
from onnx import TensorProto
|
||||
import numpy as np
|
||||
import time
|
||||
import timeit
|
||||
import onnxruntime as rt
|
||||
|
||||
# if you copy this script elsewhere you may need to add the tools\python dir to the sys.path for this
|
||||
# import to work.
|
||||
# e.g. sys.path.append(r'<path to onnxruntime source>\tools\python')
|
||||
import ort_test_dir_utils
|
||||
|
||||
# make input deterministic
|
||||
np.random.seed(123)
|
||||
|
||||
|
||||
#
|
||||
# Example code to create a model with just the operator to test. Adjust as necessary for what you want to test.
|
||||
#
|
||||
def create_model(model_name):
|
||||
graph_def = helper.make_graph(
|
||||
nodes=[
|
||||
helper.make_node(op_type="TopK", inputs=['X', 'K'], outputs=['Values', 'Indices'], name='topk',
|
||||
# attributes are also key-value pairs using the attribute name and appropriate type
|
||||
largest=1),
|
||||
],
|
||||
name='test-model',
|
||||
inputs=[
|
||||
# create inputs with symbolic dims so we can use any input sizes
|
||||
helper.make_tensor_value_info("X", TensorProto.FLOAT, ['batch', 'items']),
|
||||
helper.make_tensor_value_info("K", TensorProto.INT64, [1]),
|
||||
],
|
||||
outputs=[
|
||||
helper.make_tensor_value_info("Values", TensorProto.FLOAT, ['batch', 'k']),
|
||||
helper.make_tensor_value_info("Indices", TensorProto.INT64, ['batch', 'k']),
|
||||
],
|
||||
initializer=[
|
||||
]
|
||||
)
|
||||
|
||||
model = helper.make_model(graph_def, opset_imports=[helper.make_operatorsetid("", 11)])
|
||||
onnx.checker.check_model(model)
|
||||
|
||||
onnx.save_model(model, model_name)
|
||||
|
||||
|
||||
#
|
||||
# Example code to create random input. Adjust as necessary for the input your model requires
|
||||
#
|
||||
def create_test_input(n, num_items, k):
|
||||
x = np.random.randn(n, num_items).astype(np.float32)
|
||||
k_in = np.asarray([k]).astype(np.int64)
|
||||
inputs = {'X': x, 'K': k_in}
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
#
|
||||
# Example code that tests various combinations of input sizes.
|
||||
#
|
||||
def run_perf_tests(model_path, num_threads=1):
|
||||
|
||||
so = rt.SessionOptions()
|
||||
so.intra_op_num_threads = num_threads
|
||||
sess = rt.InferenceSession(model_path, sess_options=so)
|
||||
|
||||
batches = [10, 25, 50]
|
||||
batch_size = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
|
||||
k_vals = [1, 2, 4, 6, 8, 16, 24, 32, 48, 64, 128]
|
||||
|
||||
# exploit scope to access variables from below for each iteration
|
||||
def run_test():
|
||||
num_seconds = 1 * 1000 * 1000 * 1000 # seconds in ns
|
||||
iters = 0
|
||||
total = 0
|
||||
total_iters = 0
|
||||
|
||||
# For a simple model execution can be faster than time.time_ns() updates. Due to this we want to estimate
|
||||
# a number of iterations per measurement.
|
||||
# Estimate based on iterations in 5ms, but note that 5ms includes all the time_ns calls
|
||||
# which are excluded in the real measurement. The actual time that many iterations
|
||||
# takes will be much lower if the individual execution time is very small.
|
||||
start = time.time_ns()
|
||||
while time.time_ns() - start < 5 * 1000 * 1000: # 5 ms
|
||||
sess.run(None, inputs)
|
||||
iters += 1
|
||||
|
||||
# run the model and measure time after 'iters' calls
|
||||
while total < num_seconds:
|
||||
start = time.time_ns()
|
||||
for i in range(iters):
|
||||
# ignore the outputs as we're not validating them in a performance test
|
||||
sess.run(None, inputs)
|
||||
end = time.time_ns()
|
||||
assert (end - start > 0)
|
||||
total += end - start
|
||||
total_iters += iters
|
||||
|
||||
# Adjust the output you want as needed
|
||||
print(f'n={n},items={num_items},k={k},avg:{total / total_iters:.4f}')
|
||||
|
||||
# combine the various input parameters and create input for each test
|
||||
for n in batches:
|
||||
for num_items in batch_size:
|
||||
for k in k_vals:
|
||||
if k < num_items:
|
||||
# adjust as necessary for the inputs your model requires
|
||||
inputs = create_test_input(n, num_items, k)
|
||||
|
||||
# use timeit to disable gc etc. but let each test measure total time and average time
|
||||
# as multiple iterations may be required between each measurement
|
||||
timeit.timeit(lambda: run_test(), number=1)
|
||||
|
||||
|
||||
#
|
||||
# example for creating a test directory for use with onnx_test_runner or onnxruntime_perf_test
|
||||
# so that the model can be easily run directly or from a debugger.
|
||||
#
|
||||
def create_example_test_directory():
|
||||
|
||||
# fill in the inputs that we want to use specific values for
|
||||
input_data = {}
|
||||
input_data['K'] = np.asarray([64]).astype(np.int64)
|
||||
|
||||
# provide symbolic dim values as needed
|
||||
symbolic_dim_values = {'batch': 25, 'items': 256}
|
||||
|
||||
# create the directory. random input will be created for any missing inputs.
|
||||
# the model will be run and the output will be saved as expected output for future runs
|
||||
ort_test_dir_utils.create_test_dir('topk.onnx', 'PerfTests', 'test1', input_data, symbolic_dim_values)
|
||||
|
||||
|
||||
# this will create the model file in the current directory
|
||||
create_model('topk.onnx')
|
||||
|
||||
# this will create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test
|
||||
create_example_test_directory()
|
||||
|
||||
# this can loop over various combinations of input, using the specified number of threads
|
||||
run_perf_tests('topk.onnx', 1)
|
||||
128
tools/python/ort_test_dir_utils.py
Normal file
128
tools/python/ort_test_dir_utils.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import numpy as np
|
||||
import onnx
|
||||
import onnxruntime as ort
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from onnx import numpy_helper
|
||||
|
||||
|
||||
def _get_numpy_type(model_info, name):
|
||||
for i in model_info:
|
||||
if i.name == name:
|
||||
type_name = i.type.WhichOneof('value')
|
||||
if type_name == 'tensor_type':
|
||||
return onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[i.type.tensor_type.elem_type]
|
||||
else:
|
||||
raise ValueError(f"Type is not handled: {type_name}")
|
||||
|
||||
raise ValueError(f"{name} was not found in the model info.")
|
||||
|
||||
|
||||
def create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map):
|
||||
"""
|
||||
Update name_input_map with random input for any missing values in the model inputs.
|
||||
|
||||
:param model_inputs: model.graph.input from an onnx model
|
||||
:param name_input_map: Map of input names to values to update. Can be empty. Existing values are preserved.
|
||||
:param symbolic_dim_values_map: Map of symbolic dimension names to values to use if creating data.
|
||||
"""
|
||||
for input in model_inputs:
|
||||
if input.name in name_input_map and name_input_map[input.name] is not None:
|
||||
continue
|
||||
|
||||
input_type = input.type.WhichOneof('value')
|
||||
if input_type != 'tensor_type':
|
||||
raise ValueError(f'Unsupported model. Need to handle input type of {input_type}')
|
||||
|
||||
shape = input.type.tensor_type.shape
|
||||
dims = []
|
||||
for dim in shape.dim:
|
||||
dim_type = dim.WhichOneof('value')
|
||||
if dim_type == 'dim_value':
|
||||
dims.append(dim.dim_value)
|
||||
elif dim_type == 'dim_param':
|
||||
if dim.dim_param not in symbolic_dim_values_map:
|
||||
raise ValueError(f"Value for symbolic dim {dim.dim_param} was not provided.")
|
||||
|
||||
dims.append(symbolic_dim_values_map[dim.dim_param])
|
||||
else:
|
||||
# TODO: see if we need to provide a way to specify these values. could ask for the whole
|
||||
# shape for the input name instead.
|
||||
raise ValueError("Unsupported model. Unknown dim with no value or symbolic name.")
|
||||
|
||||
# create random data. give it range -10 to 10 so if we convert to an integer type it's not all 0s and 1s
|
||||
# TODO: consider if the range should be configurable
|
||||
np_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[input.type.tensor_type.elem_type]
|
||||
data = (np.random.standard_normal(dims) * 10).astype(np_type)
|
||||
name_input_map[input.name] = data
|
||||
|
||||
|
||||
def create_test_dir(model_path, root_path, test_name,
|
||||
name_input_map={}, symbolic_dim_values_map={},
|
||||
name_output_map=None):
|
||||
"""
|
||||
Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test.
|
||||
Generates random input data for any missing inputs.
|
||||
Saves output from running the model if name_output_map is not provided.
|
||||
|
||||
:param model_path: Path to the onnx model file to use.
|
||||
:param root_path: Root path to create the test directory in.
|
||||
:param test_name: Name for test. Will be added to the root_path to create the test directory name.
|
||||
:param name_input_map: Map of input names to numpy ndarray data for each input.
|
||||
:param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating
|
||||
using random data.
|
||||
:param name_output_map: Optional map of output names to numpy ndarray expected output data.
|
||||
If not provided, the model will be run with the input to generate output data to save.
|
||||
:return: None
|
||||
"""
|
||||
|
||||
model_path = os.path.abspath(model_path)
|
||||
root_path = os.path.abspath(root_path)
|
||||
test_dir = os.path.join(root_path, test_name)
|
||||
test_data_dir = os.path.join(test_dir, f"test_data_set_0")
|
||||
|
||||
if not os.path.exists(test_dir) or not os.path.exists(test_data_dir):
|
||||
os.makedirs(test_data_dir)
|
||||
|
||||
model_filename = model_path.split('\\')[-1]
|
||||
test_model_filename = os.path.join(test_dir, model_filename)
|
||||
shutil.copy(model_path, test_model_filename)
|
||||
|
||||
model = onnx.load(model_path)
|
||||
model_inputs = model.graph.input
|
||||
model_outputs = model.graph.output
|
||||
|
||||
def save_data(prefix, name_data_map, model_info):
|
||||
idx = 0
|
||||
for name, data in name_data_map.items():
|
||||
if isinstance(data, dict):
|
||||
# ignore. map<T1, T2> from traditional ML ops
|
||||
pass
|
||||
elif isinstance(data, list):
|
||||
# ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output
|
||||
pass
|
||||
else:
|
||||
np_type = _get_numpy_type(model_info, name)
|
||||
tensor = numpy_helper.from_array(data.astype(np_type), name)
|
||||
filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb")
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(tensor.SerializeToString())
|
||||
|
||||
idx += 1
|
||||
|
||||
create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map)
|
||||
|
||||
save_data("input", name_input_map, model_inputs)
|
||||
|
||||
# save expected output data if provided. run model to create if not.
|
||||
if not name_output_map:
|
||||
output_names = [o.name for o in model_outputs]
|
||||
sess = ort.InferenceSession(test_model_filename)
|
||||
outputs = sess.run(output_names, name_input_map)
|
||||
name_output_map = {}
|
||||
for name, data in zip(output_names, outputs):
|
||||
name_output_map[name] = data
|
||||
|
||||
save_data("output", name_output_map, model_outputs)
|
||||
|
||||
Loading…
Reference in a new issue