Add DataFrameTool (#2456)

Add DataFrameTool to feed inputs from Panda DataFrame
This commit is contained in:
Dmitri Smirnov 2019-12-02 10:12:03 -08:00 committed by GitHub
parent 89824b35e9
commit ec88f6d8d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 291 additions and 0 deletions

View file

@ -0,0 +1,27 @@
# DataFrameTool overview
This tool helps to feed data from an an instance of pandas DataFrame to a loaded ONNX model using ONNX Runtime API.
## Example of usage
See example of usage in feed_inputs_test.py in the same directory.
```python
import onnxruntime as onnxrt
import numpy as np
import pandas as pd
from feed_inputs import DataFrameTool
# Load the onnx model
sess_options = onnxrt.SessionOptions()
sess_options.enable_profiling = args.profile
sess = onnxrt.InferenceSession(args.model_path, sess_options)
df = pd.DataFrame([['string_input', True, np.float32(0.25)]], index=[0], columns=['F2', 'Label', 'F1'])
feed_helper = DataFrameTool(sess)
feeds = feed_helper.feed_nputs(df)
sess.run([], feeds)
```

View file

@ -0,0 +1,91 @@
#-------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
#--------------------------------------------------------------------------
import onnx
import numpy as np
import os
import sys
import argparse
from onnx import numpy_helper
from onnx import helper
from onnx import utils
from onnx import AttributeProto, TensorProto, GraphProto
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--output_file", required=True, help="Model file name to save")
return parser.parse_args()
def create_model():
"""
This function creates a test feed model that consists of a single node that takes
Tensors of all inputs
"""
args = parse_arguments()
# bool_identity
bool_input = helper.make_tensor_value_info('BoolInput', TensorProto.BOOL, [1,1])
# Create output for Identity
bool_output = helper.make_tensor_value_info('BoolOutput', TensorProto.BOOL, [1,1])
# Create node def
bool_identity_def = helper.make_node('Identity', inputs=['BoolInput'], outputs=['BoolOutput'], name='BoolIdentity')
# Create string_identity
string_input = helper.make_tensor_value_info('StringInput', TensorProto.STRING, [1,1])
string_output = helper.make_tensor_value_info('StringOutput', TensorProto.STRING, [1,1])
string_identity_def = helper.make_node('Identity', inputs=['StringInput'], outputs=['StringOutput'], name='StringIdentity')
# double
double_input = helper.make_tensor_value_info('DoubleInput', TensorProto.DOUBLE, [1,1])
double_output = helper.make_tensor_value_info('DoubleOutput', TensorProto.DOUBLE, [1,1])
double_identity_def = helper.make_node('Identity', inputs=['DoubleInput'], outputs=['DoubleOutput'], name='DoubleIdentity')
# int8
int8_input = helper.make_tensor_value_info('Int8Input', TensorProto.INT8, [1,1])
int8_output = helper.make_tensor_value_info('Int8Output', TensorProto.INT8, [1,1])
int8_identity_def = helper.make_node('Identity', inputs=['Int8Input'], outputs=['Int8Output'], name='Int8Identity')
# int16
int16_input = helper.make_tensor_value_info('Int16Input', TensorProto.INT16, [1,1])
int16_output = helper.make_tensor_value_info('Int16Output', TensorProto.INT16, [1,1])
int16_identity_def = helper.make_node('Identity', inputs=['Int16Input'], outputs=['Int16Output'], name='Int16Identity')
# int32
int32_input = helper.make_tensor_value_info('Int32Input', TensorProto.INT32, [1,1])
int32_output = helper.make_tensor_value_info('Int32Output', TensorProto.INT32, [1,1])
int32_identity_def = helper.make_node('Identity', inputs=['Int32Input'], outputs=['Int32Output'], name='Int32Identity')
# int64
int64_input = helper.make_tensor_value_info('Int64Input', TensorProto.INT64, [1,1])
int64_output = helper.make_tensor_value_info('Int64Output', TensorProto.INT64, [1,1])
int64_identity_def = helper.make_node('Identity', inputs=['Int64Input'], outputs=['Int64Output'], name='Int64Identity')
##### Optional input as it has initializer. This one is interesting bc it needs float32 which
# Pandas do not have
# Create Initializer with optional input with default value from the initializer
float32_input = helper.make_tensor_value_info('Float32Input', TensorProto.FLOAT, [1,1])
float32_output = helper.make_tensor_value_info('Float32Output', TensorProto.FLOAT, [1,1])
optional_identity_def = helper.make_node('Identity', inputs=['Float32Input'], outputs=['Float32Output'], name='OptionalIdentity')
# Create a default initializer for float32_input.
tensor_float32 = helper.make_tensor(name='Float32Input', data_type=TensorProto.FLOAT, dims=[1,1],
vals=np.array([[.0]]).astype(np.float32), raw=False)
# Make a graph
graph_def = helper.make_graph(nodes=[bool_identity_def, string_identity_def, double_identity_def, int8_identity_def,
int16_identity_def, int32_identity_def, int64_identity_def, optional_identity_def],
name='optional_input_graph',
inputs=[bool_input, string_input, double_input, int8_input, int16_input, int32_input, int64_input, float32_input],
outputs=[bool_output, string_output, double_output, int8_output, int16_output, int32_output, int64_output, float32_output],
initializer=[tensor_float32])
model_def = helper.make_model(graph_def, producer_name='feed_inputs_test')
final_model = onnx.utils.polish_model(model_def)
onnx.save(final_model, args.output_file)
if __name__ == "__main__":
sys.exit(create_model())

View file

@ -0,0 +1,138 @@
#-------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
#--------------------------------------------------------------------------
import numpy as np
import onnxruntime as onnxrt
ort_float_set = set([np.float32, np.float64])
pd_float_set = set(['float64'])
ort_int_set = set([np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64])
pd_int_set = set(['int64'])
types_dict = {
'tensor(float16)': np.float16,
'tensor(float)' : np.float32,
'tensor(double)' : np.float64,
'tensor(int8)' : np.int8,
'tensor(uint8)' : np.uint8,
'tensor(int16)' : np.int16,
'tensor(uint16)' : np.uint16,
'tensor(int32)' : np.int32,
'tensor(uint32)' : np.uint32,
'tensor(int64)' : np.int64,
'tensor(uint64)' : np.uint64,
'tensor(bool)' : np.bool,
'tensor(string)' : np.object
}
class DataFrameTool():
"""
This is a utility class used to run a model with pandas.DataFrame input
"""
def __init__(self, model_path, sess_options=None):
"""
:param model_path: path to the model to be loaded
:param sess_options: see onnxruntime.SessionsOptions
"""
self._model_path = model_path
self._sess_options = sess_options
self._sess = onnxrt.InferenceSession(self._model_path, self._sess_options)
def _process_input_list(self, df, input_metas, require):
"""
Return a dictionary of input_name : a typed and shaped np.array of values for a given input_meta
The function does the heavy lifting for _get_input_feeds()
:param df: See :class:`pandas.DataFrame`.
:param input_metas: a list of name/type pairs
:require is a boolean. If True this helper throws on a missing input.
"""
feeds = {}
# Process mandadory inputs. Raise an error if anything is not present
for input_meta in input_metas:
shape = [dim if dim else 1 for dim in input_meta.shape]
# We fully expect all the types are in the above dictionary
assert input_meta.type in types_dict, "Update types_dict for the new type"
if input_meta.name in df.columns:
expected_type = types_dict[input_meta.type]
# float16 and bool will always require exact match
# We attempt to convert any type to a string if it is required.
# With strings we always want to put this into a flat array, cast to np.object and then reshape as object
if input_meta.type == 'tensor(string)':
#print('Col: {} processed as string type: {} '.format(input_meta.name, df[input_meta.name].dtype))
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
elif expected_type == df[input_meta.name].dtype: # If there is an exact match we take as is
#print('Col: {} processed exact match type: {} '.format(input_meta.name, df[input_meta.name].dtype))
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
elif expected_type in ort_float_set and str(df[input_meta.name].dtype) in pd_float_set:
#print('Col: {} processed as floating type: {} '.format(input_meta.name, df[input_meta.name].dtype))
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
elif expected_type in ort_int_set and str(df[input_meta.name].dtype) in pd_int_set:
#print('Col: {} processed as integer type: {} '.format(input_meta.name, df[input_meta.name].dtype))
feeds[input_meta.name] = np.array([df[input_meta.name][0]]).astype(expected_type).reshape(shape)
else:
raise TypeError("Input {} expected to be of type: {} got {} ".format(
input_meta.name, expected_type, df[input_meta.name].dtype))
elif require:
raise RuntimeError("This model requires input {} of type {} but it is not found in the DataFrame".format(
input_meta.name, types_dict[input_meta.type]))
return feeds
def _get_input_feeds(self, df, sess):
"""
Return a dictionary of input_name : a typed and shaped np.array of values
This function accepts Pandas DataFrame as the first argument and onnxruntime
session with a loaded model. The function interrogates the model for the inputs
and matches the model input names to the DataFrame instance column names.
It requires exact matches for bool and float16 types. It attempts to convert to
string any input type if string is required.
It attempts to convert floating types to each other and does the same for all of the
integer types without requiring an exact match.
:param df: See :class:`pandas.DataFrame`. The function only considers the first row (0) of each column
and feeds the data to the appropriate model inputs.
:param sess: See :class:`onnxruntime.InferenceSession`.
::
For example: pd.DataFrame([[0], [4],[20]],index=[0], columns=['A', 'B', 'C'])
"""
if df.empty:
raise RuntimeError('input DataFrame is empty')
# Process mandadory inputs. Raise an error if anything is not present
feeds = self._process_input_list(df, sess.get_inputs(), True)
# Process optional overridable initializers. If present the initialzier value
# is overriden by the input. If not, the initialzier value embedded in the model takes effect.
initializers = self._process_input_list(df, sess.get_overridable_initializers(), False)
feeds.update(initializers)
return feeds
def execute(self, df, output_names, run_options=None):
"Return a list of output values restricted to output names if not empty"
"""
Compute the predictions.
:param df: See :class:`pandas.DataFrame`.
:param output_names: name of the outputs that we are interested in
:param run_options: See :class:`onnxruntime.RunOptions`.
::
sess.run([output_name], {input_name: x})
"""
input_feed = self._get_input_feeds(df, self._sess);
return self._sess.run(output_names, input_feed, run_options)

View file

@ -0,0 +1,35 @@
#-------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
#--------------------------------------------------------------------------
import argparse
import onnxruntime as onnxrt
import numpy as np
import pandas as pd
from data_frame_tool import DataFrameTool
import os
import sys
def main():
parser = argparse.ArgumentParser(description='Test Feed Inputs utility')
parser.add_argument('model_path', help='model path')
parser.add_argument('-profile', action='store_true',
help='enable chrome timeline trace profiling.')
args = parser.parse_args()
# Create options and the tool
sess_options = onnxrt.SessionOptions()
sess_options.enable_profiling = args.profile
df_tool = DataFrameTool(args.model_path, sess_options)
# Create a DataFrame that holds 3 inputs, string, bool, float in their respective columns
df = pd.DataFrame([['string_input', 3.25, 8, 16, 32, 64, True, 0.25]],
columns=['StringInput', 'DoubleInput', 'Int8Input', 'Int16Input', 'Int32Input', 'Int64Input', 'BoolInput', 'Float32Input'])
outputs = df_tool.execute(df, [])
print('Outputs: ', outputs)
if __name__ == "__main__":
sys.exit(main())