mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-27 03:11:28 +00:00
Add DataFrameTool (#2456)
Add DataFrameTool to feed inputs from Panda DataFrame
This commit is contained in:
parent
89824b35e9
commit
ec88f6d8d6
4 changed files with 291 additions and 0 deletions
27
onnxruntime/python/tools/automl/README.md
Normal file
27
onnxruntime/python/tools/automl/README.md
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# DataFrameTool overview
|
||||
|
||||
This tool helps to feed data from an an instance of pandas DataFrame to a loaded ONNX model using ONNX Runtime API.
|
||||
|
||||
## Example of usage
|
||||
|
||||
See example of usage in feed_inputs_test.py in the same directory.
|
||||
|
||||
```python
|
||||
import onnxruntime as onnxrt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from feed_inputs import DataFrameTool
|
||||
|
||||
# Load the onnx model
|
||||
sess_options = onnxrt.SessionOptions()
|
||||
sess_options.enable_profiling = args.profile
|
||||
sess = onnxrt.InferenceSession(args.model_path, sess_options)
|
||||
|
||||
df = pd.DataFrame([['string_input', True, np.float32(0.25)]], index=[0], columns=['F2', 'Label', 'F1'])
|
||||
|
||||
feed_helper = DataFrameTool(sess)
|
||||
feeds = feed_helper.feed_nputs(df)
|
||||
|
||||
sess.run([], feeds)
|
||||
```
|
||||
91
onnxruntime/python/tools/automl/create_test_model.py
Normal file
91
onnxruntime/python/tools/automl/create_test_model.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
#-------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
import onnx
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from onnx import numpy_helper
|
||||
from onnx import helper
|
||||
from onnx import utils
|
||||
from onnx import AttributeProto, TensorProto, GraphProto
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output_file", required=True, help="Model file name to save")
|
||||
return parser.parse_args()
|
||||
|
||||
def create_model():
|
||||
"""
|
||||
This function creates a test feed model that consists of a single node that takes
|
||||
Tensors of all inputs
|
||||
"""
|
||||
args = parse_arguments()
|
||||
|
||||
# bool_identity
|
||||
bool_input = helper.make_tensor_value_info('BoolInput', TensorProto.BOOL, [1,1])
|
||||
# Create output for Identity
|
||||
bool_output = helper.make_tensor_value_info('BoolOutput', TensorProto.BOOL, [1,1])
|
||||
# Create node def
|
||||
bool_identity_def = helper.make_node('Identity', inputs=['BoolInput'], outputs=['BoolOutput'], name='BoolIdentity')
|
||||
|
||||
# Create string_identity
|
||||
string_input = helper.make_tensor_value_info('StringInput', TensorProto.STRING, [1,1])
|
||||
string_output = helper.make_tensor_value_info('StringOutput', TensorProto.STRING, [1,1])
|
||||
string_identity_def = helper.make_node('Identity', inputs=['StringInput'], outputs=['StringOutput'], name='StringIdentity')
|
||||
|
||||
# double
|
||||
double_input = helper.make_tensor_value_info('DoubleInput', TensorProto.DOUBLE, [1,1])
|
||||
double_output = helper.make_tensor_value_info('DoubleOutput', TensorProto.DOUBLE, [1,1])
|
||||
double_identity_def = helper.make_node('Identity', inputs=['DoubleInput'], outputs=['DoubleOutput'], name='DoubleIdentity')
|
||||
|
||||
# int8
|
||||
int8_input = helper.make_tensor_value_info('Int8Input', TensorProto.INT8, [1,1])
|
||||
int8_output = helper.make_tensor_value_info('Int8Output', TensorProto.INT8, [1,1])
|
||||
int8_identity_def = helper.make_node('Identity', inputs=['Int8Input'], outputs=['Int8Output'], name='Int8Identity')
|
||||
|
||||
# int16
|
||||
int16_input = helper.make_tensor_value_info('Int16Input', TensorProto.INT16, [1,1])
|
||||
int16_output = helper.make_tensor_value_info('Int16Output', TensorProto.INT16, [1,1])
|
||||
int16_identity_def = helper.make_node('Identity', inputs=['Int16Input'], outputs=['Int16Output'], name='Int16Identity')
|
||||
|
||||
# int32
|
||||
int32_input = helper.make_tensor_value_info('Int32Input', TensorProto.INT32, [1,1])
|
||||
int32_output = helper.make_tensor_value_info('Int32Output', TensorProto.INT32, [1,1])
|
||||
int32_identity_def = helper.make_node('Identity', inputs=['Int32Input'], outputs=['Int32Output'], name='Int32Identity')
|
||||
|
||||
# int64
|
||||
int64_input = helper.make_tensor_value_info('Int64Input', TensorProto.INT64, [1,1])
|
||||
int64_output = helper.make_tensor_value_info('Int64Output', TensorProto.INT64, [1,1])
|
||||
int64_identity_def = helper.make_node('Identity', inputs=['Int64Input'], outputs=['Int64Output'], name='Int64Identity')
|
||||
|
||||
##### Optional input as it has initializer. This one is interesting bc it needs float32 which
|
||||
# Pandas do not have
|
||||
# Create Initializer with optional input with default value from the initializer
|
||||
float32_input = helper.make_tensor_value_info('Float32Input', TensorProto.FLOAT, [1,1])
|
||||
float32_output = helper.make_tensor_value_info('Float32Output', TensorProto.FLOAT, [1,1])
|
||||
optional_identity_def = helper.make_node('Identity', inputs=['Float32Input'], outputs=['Float32Output'], name='OptionalIdentity')
|
||||
|
||||
# Create a default initializer for float32_input.
|
||||
tensor_float32 = helper.make_tensor(name='Float32Input', data_type=TensorProto.FLOAT, dims=[1,1],
|
||||
vals=np.array([[.0]]).astype(np.float32), raw=False)
|
||||
|
||||
# Make a graph
|
||||
graph_def = helper.make_graph(nodes=[bool_identity_def, string_identity_def, double_identity_def, int8_identity_def,
|
||||
int16_identity_def, int32_identity_def, int64_identity_def, optional_identity_def],
|
||||
name='optional_input_graph',
|
||||
inputs=[bool_input, string_input, double_input, int8_input, int16_input, int32_input, int64_input, float32_input],
|
||||
outputs=[bool_output, string_output, double_output, int8_output, int16_output, int32_output, int64_output, float32_output],
|
||||
initializer=[tensor_float32])
|
||||
|
||||
model_def = helper.make_model(graph_def, producer_name='feed_inputs_test')
|
||||
final_model = onnx.utils.polish_model(model_def)
|
||||
onnx.save(final_model, args.output_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(create_model())
|
||||
|
||||
|
||||
138
onnxruntime/python/tools/automl/data_frame_tool.py
Normal file
138
onnxruntime/python/tools/automl/data_frame_tool.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
#-------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as onnxrt
|
||||
|
||||
ort_float_set = set([np.float32, np.float64])
|
||||
|
||||
pd_float_set = set(['float64'])
|
||||
|
||||
ort_int_set = set([np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64])
|
||||
|
||||
pd_int_set = set(['int64'])
|
||||
|
||||
types_dict = {
|
||||
'tensor(float16)': np.float16,
|
||||
'tensor(float)' : np.float32,
|
||||
'tensor(double)' : np.float64,
|
||||
|
||||
'tensor(int8)' : np.int8,
|
||||
'tensor(uint8)' : np.uint8,
|
||||
'tensor(int16)' : np.int16,
|
||||
'tensor(uint16)' : np.uint16,
|
||||
'tensor(int32)' : np.int32,
|
||||
'tensor(uint32)' : np.uint32,
|
||||
'tensor(int64)' : np.int64,
|
||||
'tensor(uint64)' : np.uint64,
|
||||
|
||||
'tensor(bool)' : np.bool,
|
||||
'tensor(string)' : np.object
|
||||
}
|
||||
|
||||
class DataFrameTool():
|
||||
"""
|
||||
This is a utility class used to run a model with pandas.DataFrame input
|
||||
"""
|
||||
def __init__(self, model_path, sess_options=None):
|
||||
"""
|
||||
:param model_path: path to the model to be loaded
|
||||
:param sess_options: see onnxruntime.SessionsOptions
|
||||
"""
|
||||
self._model_path = model_path
|
||||
self._sess_options = sess_options
|
||||
self._sess = onnxrt.InferenceSession(self._model_path, self._sess_options)
|
||||
|
||||
def _process_input_list(self, df, input_metas, require):
|
||||
"""
|
||||
Return a dictionary of input_name : a typed and shaped np.array of values for a given input_meta
|
||||
The function does the heavy lifting for _get_input_feeds()
|
||||
|
||||
:param df: See :class:`pandas.DataFrame`.
|
||||
:param input_metas: a list of name/type pairs
|
||||
:require is a boolean. If True this helper throws on a missing input.
|
||||
|
||||
"""
|
||||
feeds = {}
|
||||
# Process mandadory inputs. Raise an error if anything is not present
|
||||
for input_meta in input_metas:
|
||||
shape = [dim if dim else 1 for dim in input_meta.shape]
|
||||
# We fully expect all the types are in the above dictionary
|
||||
assert input_meta.type in types_dict, "Update types_dict for the new type"
|
||||
if input_meta.name in df.columns:
|
||||
expected_type = types_dict[input_meta.type]
|
||||
# float16 and bool will always require exact match
|
||||
# We attempt to convert any type to a string if it is required.
|
||||
# With strings we always want to put this into a flat array, cast to np.object and then reshape as object
|
||||
if input_meta.type == 'tensor(string)':
|
||||
#print('Col: {} processed as string type: {} '.format(input_meta.name, df[input_meta.name].dtype))
|
||||
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
|
||||
elif expected_type == df[input_meta.name].dtype: # If there is an exact match we take as is
|
||||
#print('Col: {} processed exact match type: {} '.format(input_meta.name, df[input_meta.name].dtype))
|
||||
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
|
||||
elif expected_type in ort_float_set and str(df[input_meta.name].dtype) in pd_float_set:
|
||||
#print('Col: {} processed as floating type: {} '.format(input_meta.name, df[input_meta.name].dtype))
|
||||
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
|
||||
elif expected_type in ort_int_set and str(df[input_meta.name].dtype) in pd_int_set:
|
||||
#print('Col: {} processed as integer type: {} '.format(input_meta.name, df[input_meta.name].dtype))
|
||||
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
|
||||
else:
|
||||
raise TypeError("Input {} expected to be of type: {} got {} ".format(
|
||||
input_meta.name, expected_type, df[input_meta.name].dtype))
|
||||
elif require:
|
||||
raise RuntimeError("This model requires input {} of type {} but it is not found in the DataFrame".format(
|
||||
input_meta.name, types_dict[input_meta.type]))
|
||||
return feeds
|
||||
|
||||
|
||||
def _get_input_feeds(self, df, sess):
|
||||
"""
|
||||
Return a dictionary of input_name : a typed and shaped np.array of values
|
||||
This function accepts Pandas DataFrame as the first argument and onnxruntime
|
||||
session with a loaded model. The function interrogates the model for the inputs
|
||||
and matches the model input names to the DataFrame instance column names.
|
||||
It requires exact matches for bool and float16 types. It attempts to convert to
|
||||
string any input type if string is required.
|
||||
It attempts to convert floating types to each other and does the same for all of the
|
||||
integer types without requiring an exact match.
|
||||
|
||||
:param df: See :class:`pandas.DataFrame`. The function only considers the first row (0) of each column
|
||||
and feeds the data to the appropriate model inputs.
|
||||
|
||||
:param sess: See :class:`onnxruntime.InferenceSession`.
|
||||
|
||||
::
|
||||
For example: pd.DataFrame([[0], [4],[20]],index=[0], columns=['A', 'B', 'C'])
|
||||
|
||||
"""
|
||||
if df.empty:
|
||||
raise RuntimeError('input DataFrame is empty')
|
||||
|
||||
# Process mandadory inputs. Raise an error if anything is not present
|
||||
feeds = self._process_input_list(df, sess.get_inputs(), True)
|
||||
# Process optional overridable initializers. If present the initialzier value
|
||||
# is overriden by the input. If not, the initialzier value embedded in the model takes effect.
|
||||
initializers = self._process_input_list(df, sess.get_overridable_initializers(), False)
|
||||
|
||||
feeds.update(initializers)
|
||||
|
||||
return feeds
|
||||
|
||||
def execute(self, df, output_names, run_options=None):
|
||||
"Return a list of output values restricted to output names if not empty"
|
||||
"""
|
||||
Compute the predictions.
|
||||
|
||||
:param df: See :class:`pandas.DataFrame`.
|
||||
:param output_names: name of the outputs that we are interested in
|
||||
:param run_options: See :class:`onnxruntime.RunOptions`.
|
||||
|
||||
::
|
||||
|
||||
sess.run([output_name], {input_name: x})
|
||||
"""
|
||||
input_feed = self._get_input_feeds(df, self._sess);
|
||||
return self._sess.run(output_names, input_feed, run_options)
|
||||
|
||||
35
onnxruntime/python/tools/automl/data_frame_tool_test.py
Normal file
35
onnxruntime/python/tools/automl/data_frame_tool_test.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#-------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
import argparse
|
||||
import onnxruntime as onnxrt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_frame_tool import DataFrameTool
|
||||
import os
|
||||
import sys
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Test Feed Inputs utility')
|
||||
parser.add_argument('model_path', help='model path')
|
||||
parser.add_argument('-profile', action='store_true',
|
||||
help='enable chrome timeline trace profiling.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create options and the tool
|
||||
sess_options = onnxrt.SessionOptions()
|
||||
sess_options.enable_profiling = args.profile
|
||||
|
||||
df_tool = DataFrameTool(args.model_path, sess_options)
|
||||
|
||||
# Create a DataFrame that holds 3 inputs, string, bool, float in their respective columns
|
||||
df = pd.DataFrame([['string_input', 3.25, 8, 16, 32, 64, True, 0.25]],
|
||||
columns=['StringInput', 'DoubleInput', 'Int8Input', 'Int16Input', 'Int32Input', 'Int64Input', 'BoolInput', 'Float32Input'])
|
||||
|
||||
outputs = df_tool.execute(df, [])
|
||||
print('Outputs: ', outputs)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue