diff --git a/tools/python/__init__.py b/tools/python/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tools/python/example_operator_perf_test.py b/tools/python/example_operator_perf_test.py
new file mode 100644
index 0000000000..4192a616d0
--- /dev/null
+++ b/tools/python/example_operator_perf_test.py
@@ -0,0 +1,146 @@
+"""
+Example python code for creating a model with a single operator and performance testing it with various
+input combinations.
+"""
+
+import onnx
+from onnx import helper
+from onnx import TensorProto
+import numpy as np
+import time
+import timeit
+import onnxruntime as rt
+
+# if you copy this script elsewhere you may need to add the tools\python dir to the sys.path for this
+# import to work.
+# e.g. sys.path.append(r'<path to onnxruntime source>\tools\python')
+import ort_test_dir_utils
+
+# make input deterministic
+np.random.seed(123)
+
+
+#
+# Example code to create a model with just the operator to test. Adjust as necessary for what you want to test.
+#
+def create_model(model_name):
+    graph_def = helper.make_graph(
+        nodes=[
+            helper.make_node(op_type="TopK", inputs=['X', 'K'], outputs=['Values', 'Indices'], name='topk',
+                             # attributes are also key-value pairs using the attribute name and appropriate type
+                             largest=1),
+        ],
+        name='test-model',
+        inputs=[
+            # create inputs with symbolic dims so we can use any input sizes
+            helper.make_tensor_value_info("X", TensorProto.FLOAT, ['batch', 'items']),
+            helper.make_tensor_value_info("K", TensorProto.INT64, [1]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("Values", TensorProto.FLOAT, ['batch', 'k']),
+            helper.make_tensor_value_info("Indices", TensorProto.INT64, ['batch', 'k']),
+        ],
+        initializer=[
+        ]
+    )
+
+    model = helper.make_model(graph_def, opset_imports=[helper.make_operatorsetid("", 11)])
+    onnx.checker.check_model(model)
+
+    onnx.save_model(model, model_name)
+
+
+#
+# Example code to create random input. Adjust as necessary for the input your model requires
+#
+def create_test_input(n, num_items, k):
+    x = np.random.randn(n, num_items).astype(np.float32)
+    k_in = np.asarray([k]).astype(np.int64)
+    inputs = {'X': x, 'K': k_in}
+
+    return inputs
+
+
+#
+# Example code that tests various combinations of input sizes.
+#
+def run_perf_tests(model_path, num_threads=1):
+
+    so = rt.SessionOptions()
+    so.intra_op_num_threads = num_threads
+    sess = rt.InferenceSession(model_path, sess_options=so)
+
+    batches = [10, 25, 50]
+    batch_size = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
+    k_vals = [1, 2, 4, 6, 8, 16, 24, 32, 48, 64, 128]
+
+    # exploit scope to access variables from below for each iteration
+    def run_test():
+        num_seconds = 1 * 1000 * 1000 * 1000  # seconds in ns
+        iters = 0
+        total = 0
+        total_iters = 0
+
+        # For a simple model execution can be faster than time.time_ns() updates. Due to this we want to estimate
+        # a number of iterations per measurement.
+        # Estimate based on iterations in 5ms, but note that 5ms includes all the time_ns calls
+        # which are excluded in the real measurement. The actual time that many iterations
+        # takes will be much lower if the individual execution time is very small.
+        start = time.time_ns()
+        while time.time_ns() - start < 5 * 1000 * 1000:  # 5 ms
+            sess.run(None, inputs)
+            iters += 1
+
+        # run the model and measure time after 'iters' calls
+        while total < num_seconds:
+            start = time.time_ns()
+            for i in range(iters):
+                # ignore the outputs as we're not validating them in a performance test
+                sess.run(None, inputs)
+            end = time.time_ns()
+            assert (end - start > 0)
+            total += end - start
+            total_iters += iters
+
+        # Adjust the output you want as needed
+        print(f'n={n},items={num_items},k={k},avg:{total / total_iters:.4f}')
+
+    # combine the various input parameters and create input for each test
+    for n in batches:
+        for num_items in batch_size:
+            for k in k_vals:
+                if k < num_items:
+                    # adjust as necessary for the inputs your model requires
+                    inputs = create_test_input(n, num_items, k)
+
+                    # use timeit to disable gc etc. but let each test measure total time and average time
+                    # as multiple iterations may be required between each measurement
+                    timeit.timeit(lambda: run_test(), number=1)
+
+
+#
+# example for creating a test directory for use with onnx_test_runner or onnxruntime_perf_test
+# so that the model can be easily run directly or from a debugger.
+#
+def create_example_test_directory():
+
+    # fill in the inputs that we want to use specific values for
+    input_data = {}
+    input_data['K'] = np.asarray([64]).astype(np.int64)
+
+    # provide symbolic dim values as needed
+    symbolic_dim_values = {'batch': 25, 'items': 256}
+
+    # create the directory. random input will be created for any missing inputs.
+    # the model will be run and the output will be saved as expected output for future runs
+    ort_test_dir_utils.create_test_dir('topk.onnx', 'PerfTests', 'test1', input_data, symbolic_dim_values)
+
+
+# this will create the model file in the current directory
+create_model('topk.onnx')
+
+# this will create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test
+create_example_test_directory()
+
+# this can loop over various combinations of input, using the specified number of threads
+run_perf_tests('topk.onnx', 1)
diff --git a/tools/python/ort_test_dir_utils.py b/tools/python/ort_test_dir_utils.py
new file mode 100644
index 0000000000..97c0c7fb53
--- /dev/null
+++ b/tools/python/ort_test_dir_utils.py
@@ -0,0 +1,128 @@
+import numpy as np
+import onnx
+import onnxruntime as ort
+import os
+import shutil
+
+from onnx import numpy_helper
+
+
+def _get_numpy_type(model_info, name):
+    for i in model_info:
+        if i.name == name:
+            type_name = i.type.WhichOneof('value')
+            if type_name == 'tensor_type':
+                return onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[i.type.tensor_type.elem_type]
+            else:
+                raise ValueError(f"Type is not handled: {type_name}")
+
+    raise ValueError(f"{name} was not found in the model info.")
+
+
+def create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map):
+    """
+    Update name_input_map with random input for any missing values in the model inputs.
+
+    :param model_inputs: model.graph.input from an onnx model
+    :param name_input_map: Map of input names to values to update. Can be empty. Existing values are preserved.
+    :param symbolic_dim_values_map: Map of symbolic dimension names to values to use if creating data.
+    """
+    for input in model_inputs:
+        if input.name in name_input_map and name_input_map[input.name] is not None:
+            continue
+
+        input_type = input.type.WhichOneof('value')
+        if input_type != 'tensor_type':
+            raise ValueError(f'Unsupported model. Need to handle input type of {input_type}')
+
+        shape = input.type.tensor_type.shape
+        dims = []
+        for dim in shape.dim:
+            dim_type = dim.WhichOneof('value')
+            if dim_type == 'dim_value':
+                dims.append(dim.dim_value)
+            elif dim_type == 'dim_param':
+                if dim.dim_param not in symbolic_dim_values_map:
+                    raise ValueError(f"Value for symbolic dim {dim.dim_param} was not provided.")
+
+                dims.append(symbolic_dim_values_map[dim.dim_param])
+            else:
+                # TODO: see if we need to provide a way to specify these values. could ask for the whole
+                # shape for the input name instead.
+                raise ValueError("Unsupported model. Unknown dim with no value or symbolic name.")
+
+        # create random data. give it range -10 to 10 so if we convert to an integer type it's not all 0s and 1s
+        # TODO: consider if the range should be configurable
+        np_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[input.type.tensor_type.elem_type]
+        data = (np.random.standard_normal(dims) * 10).astype(np_type)
+        name_input_map[input.name] = data
+
+
+def create_test_dir(model_path, root_path, test_name,
+                    name_input_map={}, symbolic_dim_values_map={},
+                    name_output_map=None):
+    """
+    Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test.
+    Generates random input data for any missing inputs.
+    Saves output from running the model if name_output_map is not provided.
+
+    :param model_path: Path to the onnx model file to use.
+    :param root_path: Root path to create the test directory in.
+    :param test_name: Name for test. Will be added to the root_path to create the test directory name.
+    :param name_input_map: Map of input names to numpy ndarray data for each input.
+    :param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating
+                                    using random data.
+    :param name_output_map: Optional map of output names to numpy ndarray expected output data.
+                            If not provided, the model will be run with the input to generate output data to save.
+    :return: None
+    """
+
+    model_path = os.path.abspath(model_path)
+    root_path = os.path.abspath(root_path)
+    test_dir = os.path.join(root_path, test_name)
+    test_data_dir = os.path.join(test_dir, f"test_data_set_0")
+
+    if not os.path.exists(test_dir) or not os.path.exists(test_data_dir):
+        os.makedirs(test_data_dir)
+
+    model_filename = model_path.split('\\')[-1]
+    test_model_filename = os.path.join(test_dir, model_filename)
+    shutil.copy(model_path, test_model_filename)
+
+    model = onnx.load(model_path)
+    model_inputs = model.graph.input
+    model_outputs = model.graph.output
+
+    def save_data(prefix, name_data_map, model_info):
+        idx = 0
+        for name, data in name_data_map.items():
+            if isinstance(data, dict):
+                # ignore. map<T1, T2> from traditional ML ops
+                pass
+            elif isinstance(data, list):
+                # ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output
+                pass
+            else:
+                np_type = _get_numpy_type(model_info, name)
+                tensor = numpy_helper.from_array(data.astype(np_type), name)
+                filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb")
+                with open(filename, 'wb') as f:
+                    f.write(tensor.SerializeToString())
+
+            idx += 1
+
+    create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map)
+
+    save_data("input", name_input_map, model_inputs)
+
+    # save expected output data if provided. run model to create if not.
+    if not name_output_map:
+        output_names = [o.name for o in model_outputs]
+        sess = ort.InferenceSession(test_model_filename)
+        outputs = sess.run(output_names, name_input_map)
+        name_output_map = {}
+        for name, data in zip(output_names, outputs):
+            name_output_map[name] = data
+
+    save_data("output", name_output_map, model_outputs)
+