diff --git a/onnxruntime/python/tools/automl/README.md b/onnxruntime/python/tools/automl/README.md new file mode 100644 index 0000000000..5eda02a491 --- /dev/null +++ b/onnxruntime/python/tools/automl/README.md @@ -0,0 +1,27 @@ +# DataFrameTool overview + +This tool helps to feed data from an an instance of pandas DataFrame to a loaded ONNX model using ONNX Runtime API. + +## Example of usage + +See example of usage in feed_inputs_test.py in the same directory. + +```python +import onnxruntime as onnxrt +import numpy as np +import pandas as pd + +from feed_inputs import DataFrameTool + +# Load the onnx model +sess_options = onnxrt.SessionOptions() +sess_options.enable_profiling = args.profile +sess = onnxrt.InferenceSession(args.model_path, sess_options) + +df = pd.DataFrame([['string_input', True, np.float32(0.25)]], index=[0], columns=['F2', 'Label', 'F1']) + +feed_helper = DataFrameTool(sess) +feeds = feed_helper.feed_nputs(df) + +sess.run([], feeds) +``` diff --git a/onnxruntime/python/tools/automl/create_test_model.py b/onnxruntime/python/tools/automl/create_test_model.py new file mode 100644 index 0000000000..ae85677716 --- /dev/null +++ b/onnxruntime/python/tools/automl/create_test_model.py @@ -0,0 +1,91 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +#-------------------------------------------------------------------------- + +import onnx +import numpy as np +import os +import sys +import argparse +from onnx import numpy_helper +from onnx import helper +from onnx import utils +from onnx import AttributeProto, TensorProto, GraphProto + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_file", required=True, help="Model file name to save") + return parser.parse_args() + +def create_model(): + """ + This function creates a test feed model that consists of a single node that takes + Tensors of all inputs + """ + args = parse_arguments() + + # bool_identity + bool_input = helper.make_tensor_value_info('BoolInput', TensorProto.BOOL, [1,1]) + # Create output for Identity + bool_output = helper.make_tensor_value_info('BoolOutput', TensorProto.BOOL, [1,1]) + # Create node def + bool_identity_def = helper.make_node('Identity', inputs=['BoolInput'], outputs=['BoolOutput'], name='BoolIdentity') + + # Create string_identity + string_input = helper.make_tensor_value_info('StringInput', TensorProto.STRING, [1,1]) + string_output = helper.make_tensor_value_info('StringOutput', TensorProto.STRING, [1,1]) + string_identity_def = helper.make_node('Identity', inputs=['StringInput'], outputs=['StringOutput'], name='StringIdentity') + + # double + double_input = helper.make_tensor_value_info('DoubleInput', TensorProto.DOUBLE, [1,1]) + double_output = helper.make_tensor_value_info('DoubleOutput', TensorProto.DOUBLE, [1,1]) + double_identity_def = helper.make_node('Identity', inputs=['DoubleInput'], outputs=['DoubleOutput'], name='DoubleIdentity') + + # int8 + int8_input = helper.make_tensor_value_info('Int8Input', TensorProto.INT8, [1,1]) + int8_output = helper.make_tensor_value_info('Int8Output', TensorProto.INT8, [1,1]) + int8_identity_def = helper.make_node('Identity', inputs=['Int8Input'], outputs=['Int8Output'], name='Int8Identity') + + # int16 + int16_input = helper.make_tensor_value_info('Int16Input', TensorProto.INT16, [1,1]) + int16_output = helper.make_tensor_value_info('Int16Output', TensorProto.INT16, [1,1]) + int16_identity_def = helper.make_node('Identity', inputs=['Int16Input'], outputs=['Int16Output'], name='Int16Identity') + + # int32 + int32_input = helper.make_tensor_value_info('Int32Input', TensorProto.INT32, [1,1]) + int32_output = helper.make_tensor_value_info('Int32Output', TensorProto.INT32, [1,1]) + int32_identity_def = helper.make_node('Identity', inputs=['Int32Input'], outputs=['Int32Output'], name='Int32Identity') + + # int64 + int64_input = helper.make_tensor_value_info('Int64Input', TensorProto.INT64, [1,1]) + int64_output = helper.make_tensor_value_info('Int64Output', TensorProto.INT64, [1,1]) + int64_identity_def = helper.make_node('Identity', inputs=['Int64Input'], outputs=['Int64Output'], name='Int64Identity') + + ##### Optional input as it has initializer. This one is interesting bc it needs float32 which + # Pandas do not have + # Create Initializer with optional input with default value from the initializer + float32_input = helper.make_tensor_value_info('Float32Input', TensorProto.FLOAT, [1,1]) + float32_output = helper.make_tensor_value_info('Float32Output', TensorProto.FLOAT, [1,1]) + optional_identity_def = helper.make_node('Identity', inputs=['Float32Input'], outputs=['Float32Output'], name='OptionalIdentity') + + # Create a default initializer for float32_input. + tensor_float32 = helper.make_tensor(name='Float32Input', data_type=TensorProto.FLOAT, dims=[1,1], + vals=np.array([[.0]]).astype(np.float32), raw=False) + + # Make a graph + graph_def = helper.make_graph(nodes=[bool_identity_def, string_identity_def, double_identity_def, int8_identity_def, + int16_identity_def, int32_identity_def, int64_identity_def, optional_identity_def], + name='optional_input_graph', + inputs=[bool_input, string_input, double_input, int8_input, int16_input, int32_input, int64_input, float32_input], + outputs=[bool_output, string_output, double_output, int8_output, int16_output, int32_output, int64_output, float32_output], + initializer=[tensor_float32]) + + model_def = helper.make_model(graph_def, producer_name='feed_inputs_test') + final_model = onnx.utils.polish_model(model_def) + onnx.save(final_model, args.output_file) + +if __name__ == "__main__": + sys.exit(create_model()) + + diff --git a/onnxruntime/python/tools/automl/data_frame_tool.py b/onnxruntime/python/tools/automl/data_frame_tool.py new file mode 100644 index 0000000000..3566860c2b --- /dev/null +++ b/onnxruntime/python/tools/automl/data_frame_tool.py @@ -0,0 +1,138 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +#-------------------------------------------------------------------------- + +import numpy as np +import onnxruntime as onnxrt + +ort_float_set = set([np.float32, np.float64]) + +pd_float_set = set(['float64']) + +ort_int_set = set([np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64]) + +pd_int_set = set(['int64']) + +types_dict = { + 'tensor(float16)': np.float16, + 'tensor(float)' : np.float32, + 'tensor(double)' : np.float64, + + 'tensor(int8)' : np.int8, + 'tensor(uint8)' : np.uint8, + 'tensor(int16)' : np.int16, + 'tensor(uint16)' : np.uint16, + 'tensor(int32)' : np.int32, + 'tensor(uint32)' : np.uint32, + 'tensor(int64)' : np.int64, + 'tensor(uint64)' : np.uint64, + + 'tensor(bool)' : np.bool, + 'tensor(string)' : np.object +} + +class DataFrameTool(): + """ + This is a utility class used to run a model with pandas.DataFrame input + """ + def __init__(self, model_path, sess_options=None): + """ + :param model_path: path to the model to be loaded + :param sess_options: see onnxruntime.SessionsOptions + """ + self._model_path = model_path + self._sess_options = sess_options + self._sess = onnxrt.InferenceSession(self._model_path, self._sess_options) + + def _process_input_list(self, df, input_metas, require): + """ + Return a dictionary of input_name : a typed and shaped np.array of values for a given input_meta + The function does the heavy lifting for _get_input_feeds() + + :param df: See :class:`pandas.DataFrame`. + :param input_metas: a list of name/type pairs + :require is a boolean. If True this helper throws on a missing input. + + """ + feeds = {} + # Process mandadory inputs. Raise an error if anything is not present + for input_meta in input_metas: + shape = [dim if dim else 1 for dim in input_meta.shape] + # We fully expect all the types are in the above dictionary + assert input_meta.type in types_dict, "Update types_dict for the new type" + if input_meta.name in df.columns: + expected_type = types_dict[input_meta.type] + # float16 and bool will always require exact match + # We attempt to convert any type to a string if it is required. + # With strings we always want to put this into a flat array, cast to np.object and then reshape as object + if input_meta.type == 'tensor(string)': + #print('Col: {} processed as string type: {} '.format(input_meta.name, df[input_meta.name].dtype)) + feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape) + elif expected_type == df[input_meta.name].dtype: # If there is an exact match we take as is + #print('Col: {} processed exact match type: {} '.format(input_meta.name, df[input_meta.name].dtype)) + feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape) + elif expected_type in ort_float_set and str(df[input_meta.name].dtype) in pd_float_set: + #print('Col: {} processed as floating type: {} '.format(input_meta.name, df[input_meta.name].dtype)) + feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape) + elif expected_type in ort_int_set and str(df[input_meta.name].dtype) in pd_int_set: + #print('Col: {} processed as integer type: {} '.format(input_meta.name, df[input_meta.name].dtype)) + feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape) + else: + raise TypeError("Input {} expected to be of type: {} got {} ".format( + input_meta.name, expected_type, df[input_meta.name].dtype)) + elif require: + raise RuntimeError("This model requires input {} of type {} but it is not found in the DataFrame".format( + input_meta.name, types_dict[input_meta.type])) + return feeds + + + def _get_input_feeds(self, df, sess): + """ + Return a dictionary of input_name : a typed and shaped np.array of values + This function accepts Pandas DataFrame as the first argument and onnxruntime + session with a loaded model. The function interrogates the model for the inputs + and matches the model input names to the DataFrame instance column names. + It requires exact matches for bool and float16 types. It attempts to convert to + string any input type if string is required. + It attempts to convert floating types to each other and does the same for all of the + integer types without requiring an exact match. + + :param df: See :class:`pandas.DataFrame`. The function only considers the first row (0) of each column + and feeds the data to the appropriate model inputs. + + :param sess: See :class:`onnxruntime.InferenceSession`. + + :: + For example: pd.DataFrame([[0], [4],[20]],index=[0], columns=['A', 'B', 'C']) + + """ + if df.empty: + raise RuntimeError('input DataFrame is empty') + + # Process mandadory inputs. Raise an error if anything is not present + feeds = self._process_input_list(df, sess.get_inputs(), True) + # Process optional overridable initializers. If present the initialzier value + # is overriden by the input. If not, the initialzier value embedded in the model takes effect. + initializers = self._process_input_list(df, sess.get_overridable_initializers(), False) + + feeds.update(initializers) + + return feeds + + def execute(self, df, output_names, run_options=None): + "Return a list of output values restricted to output names if not empty" + """ + Compute the predictions. + + :param df: See :class:`pandas.DataFrame`. + :param output_names: name of the outputs that we are interested in + :param run_options: See :class:`onnxruntime.RunOptions`. + + :: + + sess.run([output_name], {input_name: x}) + """ + input_feed = self._get_input_feeds(df, self._sess); + return self._sess.run(output_names, input_feed, run_options) + diff --git a/onnxruntime/python/tools/automl/data_frame_tool_test.py b/onnxruntime/python/tools/automl/data_frame_tool_test.py new file mode 100644 index 0000000000..3a86da3a64 --- /dev/null +++ b/onnxruntime/python/tools/automl/data_frame_tool_test.py @@ -0,0 +1,35 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +#-------------------------------------------------------------------------- + +import argparse +import onnxruntime as onnxrt +import numpy as np +import pandas as pd +from data_frame_tool import DataFrameTool +import os +import sys + +def main(): + parser = argparse.ArgumentParser(description='Test Feed Inputs utility') + parser.add_argument('model_path', help='model path') + parser.add_argument('-profile', action='store_true', + help='enable chrome timeline trace profiling.') + args = parser.parse_args() + + # Create options and the tool + sess_options = onnxrt.SessionOptions() + sess_options.enable_profiling = args.profile + + df_tool = DataFrameTool(args.model_path, sess_options) + + # Create a DataFrame that holds 3 inputs, string, bool, float in their respective columns + df = pd.DataFrame([['string_input', 3.25, 8, 16, 32, 64, True, 0.25]], + columns=['StringInput', 'DoubleInput', 'Int8Input', 'Int16Input', 'Int32Input', 'Int64Input', 'BoolInput', 'Float32Input']) + + outputs = df_tool.execute(df, []) + print('Outputs: ', outputs) + +if __name__ == "__main__": + sys.exit(main())