C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user buffers and pass custom binary compatible types (#5886)

Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
This commit is contained in:
Dmitri Smirnov 2020-11-24 14:10:14 -08:00 committed by GitHub
parent 705d093167
commit c2d610066a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 173 additions and 34 deletions

View file

@ -1,3 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Buffers;
@ -5,7 +8,7 @@ using System.Buffers;
namespace Microsoft.ML.OnnxRuntime
{
/// <summary>
/// Represents an Onnx Value with its underlying buffer pinned
/// Represents an OrtValue with its underlying buffer pinned
/// </summary>
public class FixedBufferOnnxValue : IDisposable
{
@ -28,11 +31,14 @@ namespace Microsoft.ML.OnnxRuntime
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="value"></param>
/// <returns></returns>
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
{
MemoryHandle? memHandle;
var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
// memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
/// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
/// happen for string type where each element has its own allocation.
if (memHandle.HasValue)
{
return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
@ -43,6 +49,84 @@ namespace Microsoft.ML.OnnxRuntime
}
}
/// <summary>
/// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
/// on top of a buffer. Internally, it will pin managed buffer and will create
/// an OrtValue containing a tensor that will not own the memory.
/// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
/// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
/// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
/// I.e. those that have the same binary representation as the original type. This includes all existing types
/// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
/// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
/// See example below.
/// </summary>
/// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
/// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
/// <param name="memory"></param>
/// <param name="elementType">TensorElementType</param>
/// <param name="shape">shape of the tensor to be created</param>
/// <param name="bytesSize">size of the allocation in bytes</param>
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
/// <example>
/// Here is an example of using a 3rd party library class for processing float16/bfloat16.
/// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
/// so DenseTensor can recognize it.
///
/// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
/// matches native one and its size is 16-bits, you can use the following conceptual example
/// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
///
/// \code{.cs}
/// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
/// Half[] input = new Half[] { 5646, 12345 };
/// var input_shape = new long[] {input.Length};
/// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
/// var output_shape = new long[] {output.Length};
///
/// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
///
/// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
/// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
/// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
/// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
/// {
/// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
/// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
/// session.Run(inputNames, inputValues, outputNames, outputValues);
/// // Output is now in output[]
/// }
/// \endcode
/// </example>
public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
TensorElementType elementType, long[] shape, long bytesSize)
{
if(elementType == TensorElementType.String)
{
throw new ArgumentException("String data type is not supported");
}
var memHandle = memory.Pin();
try
{
IntPtr memPtr;
unsafe
{
memPtr = (IntPtr)memHandle.Pointer;
}
var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
elementType,
shape,
memPtr, bytesSize);
return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
}
catch (Exception e)
{
memHandle.Dispose();
throw e;
}
}
#region IDisposable Support
/// <summary>
@ -51,7 +135,7 @@ namespace Microsoft.ML.OnnxRuntime
/// <param name="disposing">true if invoked from Dispose()</param>
protected virtual void Dispose(bool disposing)
{
if(_disposed)
if (_disposed)
{
return;
}

View file

@ -455,7 +455,7 @@ namespace Microsoft.ML.OnnxRuntime
/// Create OrtIoBinding instance to bind pre-allocated buffers
/// to input/output
/// </summary>
/// <returns></returns>
/// <returns>A new instance of OrtIoBinding</returns>
public OrtIoBinding CreateIoBinding()
{
return new OrtIoBinding(this);
@ -469,8 +469,8 @@ namespace Microsoft.ML.OnnxRuntime
/// the expense of fetching them and pairing with names.
/// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
/// </summary>
/// <param name="runOptions"></param>
/// <param name="ioBinding"></param>
/// <param name="runOptions">runOptions</param>
/// <param name="ioBinding">ioBinding instance to use</param>
public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));

View file

@ -2,7 +2,6 @@
// Licensed under the MIT License.
using System;
using System.Runtime.InteropServices;
namespace Microsoft.ML.OnnxRuntime
{

View file

@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
/// <summary>
/// This class enable to bind inputs and outputs to pre-allocated
/// memory. This enables interesting scenarios. For example, if your input
/// already resides in some pre-allocated memory even if on a device you bind
/// already resides in some pre-allocated memory like GPU, you can bind
/// that piece of memory to an input name and shape and onnxruntime will use that as input.
/// Other traditional inputs can also be bound that already exists as Tensors
/// Other traditional inputs can also be bound that already exists as Tensors.
///
/// Note, that this arrangement is designed to minimize data copies and to that effect
/// your memory allocations must match what is expected by the model, whether you run on
/// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
/// match the one expected by the model. However, copies with OrtIoBindings are only done once,
/// at the time of the binding, not at run time. This means, that if your input data required a copy,
/// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
/// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
/// for your use case.
///
/// The fact that data copy is not made during runtime also has performance implications.
/// </summary>
public class OrtIoBinding : SafeHandle
{

View file

@ -90,14 +90,14 @@ namespace Microsoft.ML.OnnxRuntime
public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
long[] shape,
IntPtr dataBuffer,
uint bufferLength)
long bufferLength)
{
Type type;
int width;
TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
if(width == 0)
if(width < 1)
{
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
}
var shapeSize = ArrayUtilities.GetSizeForShape(shape);

View file

@ -4,8 +4,6 @@
using System;
using System.Runtime.InteropServices;
using System.Text;
using System.Runtime.InteropServices;
using System.IO;
namespace Microsoft.ML.OnnxRuntime
{

View file

@ -229,16 +229,26 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
// Set the graph optimization level for this session.
SessionOptions options = new SessionOptions();
options.GraphOptimizationLevel = graphOptimizationLevel;
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
using (var session = new InferenceSession(modelPath, options))
using (var cleanUp = new DisposableList<IDisposable>())
{
// Set the graph optimization level for this session.
SessionOptions options = new SessionOptions();
options.GraphOptimizationLevel = graphOptimizationLevel;
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
cleanUp.Add(options);
var session = new InferenceSession(modelPath, options);
cleanUp.Add(session);
var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var container = new List<NamedOnnxValue>();
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
string[] expectedOutputNames = new string[] { "softmaxout_1" };
float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
foreach (var name in inputMeta.Keys)
@ -249,8 +259,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
}
ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
string[] expectedOutputNames = new string[] { "softmaxout_1" };
// Run inference with named inputs and outputs created with in Run()
using (var results = session.Run(container)) // results is an IReadOnlyList<NamedOnnxValue> container
@ -291,9 +299,40 @@ namespace Microsoft.ML.OnnxRuntime.Tests
}
}
// Run inference with outputs pinned from buffers
using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
{
var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
// Create inputs
Assert.Single(inputMeta.Keys);
var inputNames = inputMeta.Keys.ToArray();
var inputName = inputNames[0];
Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
Assert.True(inputMeta[inputName].IsTensor);
var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
TensorElementType.Float, longShape, byteSize));
// Prepare output buffer
Assert.Single(outputMeta.Keys);
var outputNames = outputMeta.Keys.ToArray();
var outputName = outputNames[0];
Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
Assert.True(outputMeta[outputName].IsTensor);
longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
float[] outputBuffer = new float[expectedOutput.Length];
pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer,
TensorElementType.Float, longShape, byteSize));
session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
}
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
// Run inference with named inputs and named outputs
{
// correct pre-allocated outputs
@ -1954,6 +1993,10 @@ namespace Microsoft.ML.OnnxRuntime.Tests
var inputTensor = tuple.Item3;
var outputData = tuple.Item4;
dispList.Add(session);
var runOptions = new RunOptions();
dispList.Add(runOptions);
var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
@ -1967,8 +2010,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
var cyrName = "несуществующийВыход";
var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
string[] outputs = ioBinding.GetOutputNames();
Assert.Equal(2, outputs.Length);
Assert.Equal(outputName, outputs[0]);
@ -1982,7 +2025,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, fixeInputBuffer);
ioBinding.BindOutput(outputName, fixedOutputBuffer);
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
@ -2000,7 +2043,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
ioBinding.BindInput(inputName, fixedInputBuffer);
ioBinding.BindOutputToDevice(outputName, allocator.Info);
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
@ -2040,7 +2083,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
}
var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
SessionOptions options = new SessionOptions();
options.AddInitializer("W", sharedInitializer);

View file

@ -1,8 +1,9 @@
using Microsoft.ML.OnnxRuntime.Tensors;
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest;
@ -47,6 +48,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
var inputTensor = tuple.Item3;
var outputData = tuple.Item4;
dispList.Add(session);
var runOptions = new RunOptions();
dispList.Add(runOptions);
var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
@ -69,7 +73,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, fixedInputBuffer);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.ElementAt(0);
@ -84,7 +88,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.ElementAt(0);