onnxruntime/tools/python/onnx_test_data_utils.py
Justin Chu fdce4fa6af
Format all python files under onnxruntime with black and isort (#11324)
Description: Format all python files under onnxruntime with black and isort.

After checking in, we can use .git-blame-ignore-revs to ignore the formatting PR in git blame.

#11315, #11316
2022-04-26 09:35:16 -07:00

211 lines
7.7 KiB
Python

import argparse
import glob
import os
import sys
import numpy as np
import onnx
from onnx import numpy_helper
def read_tensorproto_pb_file(filename):
"""Return tuple of tensor name and numpy.ndarray of the data from a pb file containing a TensorProto."""
tensor = onnx.load_tensor(filename)
np_array = numpy_helper.to_array(tensor)
return tensor.name, np_array
def dump_tensorproto_pb_file(filename):
"""Dump the data from a pb file containing a TensorProto."""
name, data = read_tensorproto_pb_file(filename)
print("Name: {}".format(name))
print("Shape: {}".format(data.shape))
print(data)
def dump_pb(dir_or_filename):
"""Dump the data from either a single .pb file, or all .pb files in a directory.
All files must contain a serialized TensorProto."""
if os.path.isdir(dir_or_filename):
for f in glob.glob(os.path.join(dir_or_filename, "*.pb")):
print(f)
dump_tensorproto_pb_file(f)
else:
dump_tensorproto_pb_file(dir_or_filename)
def numpy_to_pb(name, np_data, out_filename):
"""Convert numpy data to a protobuf file."""
tensor = numpy_helper.from_array(np_data, name)
onnx.save_tensor(tensor, out_filename)
def image_to_numpy(filename, shape, channels_last, add_batch_dim):
"""Convert an image file into a numpy array."""
import PIL.Image # from 'Pillow' package
img = PIL.Image.open(filename)
if shape:
w, h = img.size
new_w = shape[1]
new_h = shape[0]
# use the dimension that needs to shrink the least to resize to an image where that dimension matches the
# target size.
w_ratio = new_w / w
h_ratio = new_h / h
ratio = w_ratio if w_ratio > h_ratio else h_ratio
interim_w = int(w * ratio)
interim_h = int(h * ratio)
img = img.resize((interim_w, interim_h), PIL.Image.ANTIALIAS)
# center crop to the final target size
left = (interim_w - new_w) / 2
top = (interim_h - new_h) / 2
right = (interim_w + new_w) / 2
bottom = (interim_h + new_h) / 2
img = img.crop((left, top, right, bottom))
img_as_np = np.array(img).astype(np.float32)
if not channels_last:
# HWC to CHW
img_as_np = np.transpose(img_as_np, (2, 0, 1))
if add_batch_dim:
# to NCHW or NHWC
img_as_np = np.expand_dims(img_as_np, axis=0)
return img_as_np
def create_random_data(shape, type, minvalue, maxvalue, seed):
nptype = np.dtype(type)
np.random.seed(seed)
return ((maxvalue - minvalue) * np.random.sample(shape) + minvalue).astype(nptype)
def update_name_in_pb(filename, name, output_filename):
"""Update the name of the tensor in the pb file."""
tensor = onnx.load_tensor(filename)
tensor.name = name
if not output_filename:
output_filename = filename
onnx.save_tensor(tensor, output_filename)
def get_arg_parser():
parser = argparse.ArgumentParser(
description="""
Utilities for working with the input/output protobuf files used by the ONNX test cases and onnx_test_runner.
These are expected to only contain a serialized TensorProto.
dump_pb: Dumps the TensorProto data from an individual pb file, or all pb files in a directory.
numpy_to_pb: Convert numpy array saved to a file with numpy.save() to a TensorProto, and serialize to a pb file.
image_to_pb: Convert data from an image file into a TensorProto, and serialize to a pb file.
random_to_pb: Create a TensorProto with random data, and serialize to a pb file.
update_name_in_pb: Update the TensorProto.name value in a pb file.
Updates the input file unless --output <filename> is specified.
""",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--action",
help="Action to perform",
choices=["dump_pb", "numpy_to_pb", "image_to_pb", "random_to_pb", "update_name_in_pb"],
required=True,
)
parser.add_argument("--input", help="The input filename or directory name")
parser.add_argument("--name", help="The value to set TensorProto.name to if creating/updating one.")
parser.add_argument("--output", help="Filename to serialize the TensorProto to.")
image_to_pb_group = parser.add_argument_group("image_to_pb", "image_to_pb specific options")
image_to_pb_group.add_argument(
"--resize",
default=None,
type=lambda s: [int(item) for item in s.split(",")],
help="Provide the height and width to resize to as comma separated values."
" e.g. --shape 200,300 will resize to height 200 and width 300.",
)
image_to_pb_group.add_argument(
"--channels_last", action="store_true", help="Transpose image from channels first to channels last."
)
image_to_pb_group.add_argument(
"--add_batch_dim",
action="store_true",
help="Prepend a batch dimension with value of 1 to the shape. " "i.e. convert from CHW to NCHW",
)
random_to_pb_group = parser.add_argument_group("random_to_pb", "random_to_pb specific options")
random_to_pb_group.add_argument(
"--shape",
type=lambda s: [int(item) for item in s.split(",")],
help="Provide the shape as comma separated values e.g. --shape 200,200",
)
random_to_pb_group.add_argument(
"--datatype",
help="numpy dtype value for the data type. e.g. f4=float32, i8=int64. "
"See: https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html",
)
random_to_pb_group.add_argument(
"--min_value", default=0, type=int, help="Limit the generated values to this minimum."
)
random_to_pb_group.add_argument(
"--max_value", default=1, type=int, help="Limit the generated values to this maximum."
)
random_to_pb_group.add_argument(
"--seed", default=None, type=int, help="seed to use for the random values so they're deterministic."
)
return parser
if __name__ == "__main__":
arg_parser = get_arg_parser()
args = arg_parser.parse_args()
if args.action == "dump_pb":
if not args.input:
print("Missing argument. Need input to be specified.", file=sys.stderr)
sys.exit(-1)
np.set_printoptions(precision=10)
dump_pb(args.input)
elif args.action == "numpy_to_pb":
if not args.input or not args.output or not args.name:
print("Missing argument. Need input, output and name to be specified.", file=sys.stderr)
sys.exit(-1)
# read data saved with numpy
data = np.load(args.input)
numpy_to_pb(args.name, data, args.output)
elif args.action == "image_to_pb":
if not args.input or not args.output or not args.name:
print("Missing argument. Need input, output, name to be specified.", file=sys.stderr)
sys.exit(-1)
img_np = image_to_numpy(args.input, args.resize, args.channels_last, args.add_batch_dim)
numpy_to_pb(args.name, img_np, args.output)
elif args.action == "random_to_pb":
if not args.output or not args.shape or not args.datatype or not args.name:
print("Missing argument. Need output, shape, datatype and name to be specified.", file=sys.stderr)
sys.exit(-1)
data = create_random_data(args.shape, args.datatype, args.min_value, args.max_value, args.seed)
numpy_to_pb(args.name, data, args.output)
elif args.action == "update_name_in_pb":
if not args.input or not args.name:
print("Missing argument. Need input and name to be specified.", file=sys.stderr)
sys.exit(-1)
update_name_in_pb(args.input, args.name, args.output)
else:
print("Unknown action.", file=sys.stderr)
arg_parser.print_help(sys.stderr)