From d198fbc4d5e6e60ddf80bc2dcfbf7c3cf9cb62d4 Mon Sep 17 00:00:00 2001 From: Ye Wang <52801275+wangyems@users.noreply.github.com> Date: Wed, 16 Feb 2022 14:40:03 -0800 Subject: [PATCH] Add a script for randomizing onnx weights (#10551) * Add a script for randomizing onnx weights Required by customer that when sharing an onnx model for 3rd party debugging, a tool is needed to randomize all the weights in the model. * Update onnx_randomizer.py more comments --- onnxruntime/python/tools/onnx_randomizer.py | 90 +++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 onnxruntime/python/tools/onnx_randomizer.py diff --git a/onnxruntime/python/tools/onnx_randomizer.py b/onnxruntime/python/tools/onnx_randomizer.py new file mode 100644 index 0000000000..f4b736fa0a --- /dev/null +++ b/onnxruntime/python/tools/onnx_randomizer.py @@ -0,0 +1,90 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +#-------------------------------------------------------------------------- + +# An offline standalone script to declassify an ONNX model by randomizing the tensor data in initializers. +# The ORT Performance may change especially on generative models. + +import argparse +import numpy as np +from onnx import onnx_pb, numpy_helper, save_model, load_model +from pathlib import Path + +# An experimental small value for differentiating shape data and weights. +# The tensor data with larger size can't be shape data. +# User may adjust this value as needed. +SIZE_THRESHOLD = 10 + +def graph_iterator(model, func): + graph_queue = [model.graph] + while graph_queue: + graph = graph_queue.pop(0) + func(graph) + for node in graph.node: + for attr in node.attribute: + if attr.type == onnx_pb.AttributeProto.AttributeType.GRAPH: + assert (isinstance(attr.g, onnx_pb.GraphProto)) + graph_queue.append(attr.g) + if attr.type == onnx_pb.AttributeProto.AttributeType.GRAPHS: + for g in attr.graphs: + assert (isinstance(g, onnx_pb.GraphProto)) + graph_queue.append(g) + + +def randomize_graph_initializer(graph): + for i_tensor in graph.initializer: + array = numpy_helper.to_array(i_tensor) + # TODO: need to find a better way to differentiate shape data and weights. + if array.size > SIZE_THRESHOLD: + random_array = np.random.uniform(array.min(), + array.max(), + size=array.shape).astype( + array.dtype) + o_tensor = numpy_helper.from_array(random_array, i_tensor.name) + i_tensor.CopyFrom(o_tensor) + + +def main(): + parser = argparse.ArgumentParser( + description='Randomize the weights of an ONNX model') + parser.add_argument('-m', + type=str, + required=True, + help='input onnx model path') + parser.add_argument('-o', + type=str, + required=True, + help='output onnx model path') + parser.add_argument("--use_external_data_format", + required=False, + action="store_true", + help="Store or Save in external data format") + parser.add_argument("--all_tensors_to_one_file", + required=False, + action="store_true", + help="Save all tensors to one file") + args = parser.parse_args() + + data_path = None + if args.use_external_data_format: + if Path(args.m).parent == Path(args.o).parent: + raise RuntimeError( + "Please specify output directory with different parent path to input directory." + ) + if args.all_tensors_to_one_file: + data_path = Path(args.o).name + ".data" + + Path(args.o).parent.mkdir(parents=True, exist_ok=True) + onnx_model = load_model(args.m, + load_external_data=args.use_external_data_format) + graph_iterator(onnx_model, randomize_graph_initializer) + save_model(onnx_model, + args.o, + save_as_external_data=args.use_external_data_format, + all_tensors_to_one_file=args.all_tensors_to_one_file, + location=data_path) + + +if __name__ == '__main__': + main()