mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user buffers and pass custom binary compatible types (#5886)
Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
This commit is contained in:
parent
705d093167
commit
c2d610066a
8 changed files with 173 additions and 34 deletions
|
|
@ -1,3 +1,6 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using System;
|
||||
using System.Buffers;
|
||||
|
|
@ -5,7 +8,7 @@ using System.Buffers;
|
|||
namespace Microsoft.ML.OnnxRuntime
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents an Onnx Value with its underlying buffer pinned
|
||||
/// Represents an OrtValue with its underlying buffer pinned
|
||||
/// </summary>
|
||||
public class FixedBufferOnnxValue : IDisposable
|
||||
{
|
||||
|
|
@ -28,11 +31,14 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
/// <param name="value"></param>
|
||||
/// <returns></returns>
|
||||
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
|
||||
public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
|
||||
{
|
||||
MemoryHandle? memHandle;
|
||||
var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
|
||||
// memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
|
||||
/// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
|
||||
/// happen for string type where each element has its own allocation.
|
||||
if (memHandle.HasValue)
|
||||
{
|
||||
return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
|
||||
|
|
@ -43,6 +49,84 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
|
||||
/// on top of a buffer. Internally, it will pin managed buffer and will create
|
||||
/// an OrtValue containing a tensor that will not own the memory.
|
||||
/// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
|
||||
/// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
|
||||
/// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
|
||||
/// I.e. those that have the same binary representation as the original type. This includes all existing types
|
||||
/// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
|
||||
/// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
|
||||
/// See example below.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
|
||||
/// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
|
||||
/// <param name="memory"></param>
|
||||
/// <param name="elementType">TensorElementType</param>
|
||||
/// <param name="shape">shape of the tensor to be created</param>
|
||||
/// <param name="bytesSize">size of the allocation in bytes</param>
|
||||
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
|
||||
/// <example>
|
||||
/// Here is an example of using a 3rd party library class for processing float16/bfloat16.
|
||||
/// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
|
||||
/// so DenseTensor can recognize it.
|
||||
///
|
||||
/// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
|
||||
/// matches native one and its size is 16-bits, you can use the following conceptual example
|
||||
/// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
|
||||
///
|
||||
/// \code{.cs}
|
||||
/// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
|
||||
/// Half[] input = new Half[] { 5646, 12345 };
|
||||
/// var input_shape = new long[] {input.Length};
|
||||
/// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
|
||||
/// var output_shape = new long[] {output.Length};
|
||||
///
|
||||
/// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
|
||||
///
|
||||
/// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
|
||||
/// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
|
||||
/// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
|
||||
/// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
|
||||
/// {
|
||||
/// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
|
||||
/// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
|
||||
/// session.Run(inputNames, inputValues, outputNames, outputValues);
|
||||
/// // Output is now in output[]
|
||||
/// }
|
||||
/// \endcode
|
||||
/// </example>
|
||||
public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
|
||||
TensorElementType elementType, long[] shape, long bytesSize)
|
||||
{
|
||||
if(elementType == TensorElementType.String)
|
||||
{
|
||||
throw new ArgumentException("String data type is not supported");
|
||||
}
|
||||
|
||||
var memHandle = memory.Pin();
|
||||
try
|
||||
{
|
||||
IntPtr memPtr;
|
||||
unsafe
|
||||
{
|
||||
memPtr = (IntPtr)memHandle.Pointer;
|
||||
}
|
||||
var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
|
||||
elementType,
|
||||
shape,
|
||||
memPtr, bytesSize);
|
||||
return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
memHandle.Dispose();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
#region IDisposable Support
|
||||
|
||||
/// <summary>
|
||||
|
|
@ -51,7 +135,7 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
/// <param name="disposing">true if invoked from Dispose()</param>
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if(_disposed)
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -455,7 +455,7 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
/// Create OrtIoBinding instance to bind pre-allocated buffers
|
||||
/// to input/output
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
/// <returns>A new instance of OrtIoBinding</returns>
|
||||
public OrtIoBinding CreateIoBinding()
|
||||
{
|
||||
return new OrtIoBinding(this);
|
||||
|
|
@ -469,8 +469,8 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
/// the expense of fetching them and pairing with names.
|
||||
/// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
|
||||
/// </summary>
|
||||
/// <param name="runOptions"></param>
|
||||
/// <param name="ioBinding"></param>
|
||||
/// <param name="runOptions">runOptions</param>
|
||||
/// <param name="ioBinding">ioBinding instance to use</param>
|
||||
public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
|
||||
{
|
||||
NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Microsoft.ML.OnnxRuntime
|
||||
{
|
||||
|
|
|
|||
|
|
@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
/// <summary>
|
||||
/// This class enable to bind inputs and outputs to pre-allocated
|
||||
/// memory. This enables interesting scenarios. For example, if your input
|
||||
/// already resides in some pre-allocated memory even if on a device you bind
|
||||
/// already resides in some pre-allocated memory like GPU, you can bind
|
||||
/// that piece of memory to an input name and shape and onnxruntime will use that as input.
|
||||
/// Other traditional inputs can also be bound that already exists as Tensors
|
||||
/// Other traditional inputs can also be bound that already exists as Tensors.
|
||||
///
|
||||
/// Note, that this arrangement is designed to minimize data copies and to that effect
|
||||
/// your memory allocations must match what is expected by the model, whether you run on
|
||||
/// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
|
||||
/// match the one expected by the model. However, copies with OrtIoBindings are only done once,
|
||||
/// at the time of the binding, not at run time. This means, that if your input data required a copy,
|
||||
/// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
|
||||
/// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
|
||||
/// for your use case.
|
||||
///
|
||||
/// The fact that data copy is not made during runtime also has performance implications.
|
||||
/// </summary>
|
||||
public class OrtIoBinding : SafeHandle
|
||||
{
|
||||
|
|
|
|||
|
|
@ -90,14 +90,14 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
|
||||
long[] shape,
|
||||
IntPtr dataBuffer,
|
||||
uint bufferLength)
|
||||
long bufferLength)
|
||||
{
|
||||
Type type;
|
||||
int width;
|
||||
TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
|
||||
if(width == 0)
|
||||
if(width < 1)
|
||||
{
|
||||
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
|
||||
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
|
||||
}
|
||||
|
||||
var shapeSize = ArrayUtilities.GetSizeForShape(shape);
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.ML.OnnxRuntime
|
||||
{
|
||||
|
|
|
|||
|
|
@ -229,16 +229,26 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
{
|
||||
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
|
||||
|
||||
// Set the graph optimization level for this session.
|
||||
SessionOptions options = new SessionOptions();
|
||||
options.GraphOptimizationLevel = graphOptimizationLevel;
|
||||
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
|
||||
|
||||
using (var session = new InferenceSession(modelPath, options))
|
||||
using (var cleanUp = new DisposableList<IDisposable>())
|
||||
{
|
||||
// Set the graph optimization level for this session.
|
||||
SessionOptions options = new SessionOptions();
|
||||
options.GraphOptimizationLevel = graphOptimizationLevel;
|
||||
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
|
||||
cleanUp.Add(options);
|
||||
|
||||
var session = new InferenceSession(modelPath, options);
|
||||
cleanUp.Add(session);
|
||||
|
||||
var inputMeta = session.InputMetadata;
|
||||
var outputMeta = session.OutputMetadata;
|
||||
var container = new List<NamedOnnxValue>();
|
||||
|
||||
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
|
||||
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
|
||||
ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
|
||||
string[] expectedOutputNames = new string[] { "softmaxout_1" };
|
||||
|
||||
float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
|
||||
|
||||
foreach (var name in inputMeta.Keys)
|
||||
|
|
@ -249,8 +259,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
|
||||
}
|
||||
|
||||
ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
|
||||
string[] expectedOutputNames = new string[] { "softmaxout_1" };
|
||||
|
||||
// Run inference with named inputs and outputs created with in Run()
|
||||
using (var results = session.Run(container)) // results is an IReadOnlyList<NamedOnnxValue> container
|
||||
|
|
@ -291,9 +299,40 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
}
|
||||
}
|
||||
|
||||
// Run inference with outputs pinned from buffers
|
||||
using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
|
||||
using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
|
||||
{
|
||||
var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
|
||||
|
||||
// Create inputs
|
||||
Assert.Single(inputMeta.Keys);
|
||||
var inputNames = inputMeta.Keys.ToArray();
|
||||
var inputName = inputNames[0];
|
||||
Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
|
||||
Assert.True(inputMeta[inputName].IsTensor);
|
||||
var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
|
||||
var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
|
||||
pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
|
||||
TensorElementType.Float, longShape, byteSize));
|
||||
|
||||
|
||||
// Prepare output buffer
|
||||
Assert.Single(outputMeta.Keys);
|
||||
var outputNames = outputMeta.Keys.ToArray();
|
||||
var outputName = outputNames[0];
|
||||
Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
|
||||
Assert.True(outputMeta[outputName].IsTensor);
|
||||
longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
|
||||
byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
|
||||
float[] outputBuffer = new float[expectedOutput.Length];
|
||||
pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer,
|
||||
TensorElementType.Float, longShape, byteSize));
|
||||
|
||||
session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
|
||||
Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
|
||||
}
|
||||
|
||||
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
|
||||
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
|
||||
// Run inference with named inputs and named outputs
|
||||
{
|
||||
// correct pre-allocated outputs
|
||||
|
|
@ -1954,6 +1993,10 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
var inputTensor = tuple.Item3;
|
||||
var outputData = tuple.Item4;
|
||||
dispList.Add(session);
|
||||
var runOptions = new RunOptions();
|
||||
dispList.Add(runOptions);
|
||||
|
||||
var inputMeta = session.InputMetadata;
|
||||
var outputMeta = session.OutputMetadata;
|
||||
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
|
||||
|
||||
|
|
@ -1967,8 +2010,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
{
|
||||
var cyrName = "несуществующийВыход";
|
||||
var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
|
||||
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
|
||||
ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
|
||||
ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
|
||||
ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
|
||||
string[] outputs = ioBinding.GetOutputNames();
|
||||
Assert.Equal(2, outputs.Length);
|
||||
Assert.Equal(outputName, outputs[0]);
|
||||
|
|
@ -1982,7 +2025,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
{
|
||||
ioBinding.BindInput(inputName, fixeInputBuffer);
|
||||
ioBinding.BindOutput(outputName, fixedOutputBuffer);
|
||||
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
|
||||
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
|
||||
{
|
||||
Assert.Equal(1, outputs.Count);
|
||||
var output = outputs.First();
|
||||
|
|
@ -2000,7 +2043,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
ioBinding.BindInput(inputName, fixedInputBuffer);
|
||||
ioBinding.BindOutputToDevice(outputName, allocator.Info);
|
||||
|
||||
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
|
||||
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
|
||||
{
|
||||
Assert.Equal(1, outputs.Count);
|
||||
var output = outputs.First();
|
||||
|
|
@ -2040,7 +2083,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
}
|
||||
var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
|
||||
var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
|
||||
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
|
||||
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
|
||||
|
||||
SessionOptions options = new SessionOptions();
|
||||
options.AddInitializer("W", sharedInitializer);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest;
|
||||
|
||||
|
|
@ -47,6 +48,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
var inputTensor = tuple.Item3;
|
||||
var outputData = tuple.Item4;
|
||||
dispList.Add(session);
|
||||
var runOptions = new RunOptions();
|
||||
dispList.Add(runOptions);
|
||||
|
||||
var inputMeta = session.InputMetadata;
|
||||
var outputMeta = session.OutputMetadata;
|
||||
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
|
||||
|
|
@ -69,7 +73,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
{
|
||||
ioBinding.BindInput(inputName, fixedInputBuffer);
|
||||
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
|
||||
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
|
||||
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
|
||||
{
|
||||
Assert.Equal(1, outputs.Count);
|
||||
var output = outputs.ElementAt(0);
|
||||
|
|
@ -84,7 +88,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
{
|
||||
ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput);
|
||||
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
|
||||
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
|
||||
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
|
||||
{
|
||||
Assert.Equal(1, outputs.Count);
|
||||
var output = outputs.ElementAt(0);
|
||||
|
|
|
|||
Loading…
Reference in a new issue