C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user buffers and pass custom binary compatible types (#5886)

Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
2026-07-16 18:31:27 +00:00 · 2020-11-24 14:10:14 -08:00 · 2020-11-24 14:10:14 -08:00 · c2d610066a
commit c2d610066a
parent 705d093167
8 changed files with 173 additions and 34 deletions
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
@ -5,7 +8,7 @@ using System.Buffers;
 namespace Microsoft.ML.OnnxRuntime
 {
    /// <summary>
-    /// Represents an Onnx Value with its underlying buffer pinned
+    /// Represents an OrtValue with its underlying buffer pinned
    /// </summary>
    public class FixedBufferOnnxValue : IDisposable
    {
@ -28,11 +31,14 @@ namespace Microsoft.ML.OnnxRuntime
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="value"></param>
-        /// <returns></returns>
+        /// <returns>a disposable instance of FixedBufferOnnxValue</returns>
        public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
        {
            MemoryHandle? memHandle;
            var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
+            // memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
+            /// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
+            /// happen for string type where each element has its own allocation.
            if (memHandle.HasValue)
            {
                return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
@ -43,6 +49,84 @@ namespace Microsoft.ML.OnnxRuntime
            }
        }

+        /// <summary>
+        /// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
+        /// on top of a buffer. Internally, it will pin managed buffer and will create
+        /// an OrtValue containing a tensor that will not own the memory.
+        /// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
+        /// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
+        /// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
+        /// I.e. those that have the same binary representation as the original type. This includes all existing types
+        /// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
+        /// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
+        /// See example below.
+        /// </summary>
+        /// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
+        /// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
+        /// <param name="memory"></param>
+        /// <param name="elementType">TensorElementType</param>
+        /// <param name="shape">shape of the tensor to be created</param>
+        /// <param name="bytesSize">size of the allocation in bytes</param>
+        /// <returns>a disposable instance of FixedBufferOnnxValue</returns>
+        /// <example>
+        /// Here is an example of using a 3rd party library class for processing float16/bfloat16.
+        /// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
+        /// so DenseTensor can recognize it.
+        /// 
+        /// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
+        /// matches native one and its size is 16-bits, you can use the following conceptual example
+        /// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
+        ///
+        /// \code{.cs}
+        /// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
+        /// Half[] input = new Half[] { 5646, 12345 };
+        /// var input_shape = new long[] {input.Length};
+        /// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
+        /// var output_shape = new long[] {output.Length};
+        /// 
+        /// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+        ///
+        /// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
+        ///                         input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
+        /// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
+        ///                               output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
+        /// {
+        ///    FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
+        ///    FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
+        ///    session.Run(inputNames, inputValues, outputNames, outputValues);
+        ///   // Output is now in output[]
+        /// }
+        /// \endcode
+        /// </example>
+        public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
+            TensorElementType elementType, long[] shape, long bytesSize)
+        {
+            if(elementType == TensorElementType.String)
+            {
+                throw new ArgumentException("String data type is not supported");
+            }
+
+            var memHandle = memory.Pin();
+            try
+            {
+                IntPtr memPtr;
+                unsafe
+                {
+                    memPtr = (IntPtr)memHandle.Pointer;
+                }
+                var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
+                                                        elementType,
+                                                        shape,
+                                                        memPtr, bytesSize);
+                return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+            }
+            catch (Exception e)
+            {
+                memHandle.Dispose();
+                throw e;
+            }
+        }
+
        #region IDisposable Support

        /// <summary>
@ -51,7 +135,7 @@ namespace Microsoft.ML.OnnxRuntime
        /// <param name="disposing">true if invoked from Dispose()</param>
        protected virtual void Dispose(bool disposing)
        {
-            if(_disposed)
+            if (_disposed)
            {
                return;
            }
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@ -455,7 +455,7 @@ namespace Microsoft.ML.OnnxRuntime
        /// Create OrtIoBinding instance to bind pre-allocated buffers
        /// to input/output
        /// </summary>
-        /// <returns></returns>
+        /// <returns>A new instance of OrtIoBinding</returns>
        public OrtIoBinding CreateIoBinding()
        {
            return new OrtIoBinding(this);
@ -469,8 +469,8 @@ namespace Microsoft.ML.OnnxRuntime
        /// the expense of fetching them and pairing with names.
        /// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
        /// </summary>
-        /// <param name="runOptions"></param>
-        /// <param name="ioBinding"></param>
+        /// <param name="runOptions">runOptions</param>
+        /// <param name="ioBinding">ioBinding instance to use</param>
        public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
        {
            NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
@ -2,7 +2,6 @@
 // Licensed under the MIT License.

 using System;
-using System.Runtime.InteropServices;

 namespace Microsoft.ML.OnnxRuntime
 {
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
    /// <summary>
    /// This class enable to bind inputs and outputs to pre-allocated
    /// memory. This enables interesting scenarios. For example, if your input
-    /// already resides in some pre-allocated memory even if on a device you bind
+    /// already resides in some pre-allocated memory like GPU, you can bind
    /// that piece of memory to an input name and shape and onnxruntime will use that as input.
-    /// Other traditional inputs can also be bound that already exists as Tensors
+    /// Other traditional inputs can also be bound that already exists as Tensors.
+    ///
+    /// Note, that this arrangement is designed to minimize data copies and to that effect
+    /// your memory allocations must match what is expected by the model, whether you run on
+    /// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
+    /// match the one expected by the model. However, copies with OrtIoBindings are only done once,
+    /// at the time of the binding, not at run time. This means, that if your input data required a copy,
+    /// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
+    /// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
+    /// for your use case.
+    ///
+    /// The fact that data copy is not made during runtime also has performance implications.
    /// </summary>
    public class OrtIoBinding : SafeHandle
    {
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@ -90,14 +90,14 @@ namespace Microsoft.ML.OnnxRuntime
        public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
                                                         long[] shape,
                                                         IntPtr dataBuffer,
-                                                         uint bufferLength)
+                                                         long bufferLength)
        {
            Type type;
            int width;
            TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
-            if(width == 0)
+            if(width < 1)
            {
-                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
            }

            var shapeSize = ArrayUtilities.GetSizeForShape(shape);
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@ -4,8 +4,6 @@
 using System;
 using System.Runtime.InteropServices;
 using System.Text;
-using System.Runtime.InteropServices;
-using System.IO;

 namespace Microsoft.ML.OnnxRuntime
 {
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@ -229,16 +229,26 @@ namespace Microsoft.ML.OnnxRuntime.Tests
        {
            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");

-            // Set the graph optimization level for this session.
-            SessionOptions options = new SessionOptions();
-            options.GraphOptimizationLevel = graphOptimizationLevel;
-            if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
-
-            using (var session = new InferenceSession(modelPath, options))
+            using (var cleanUp = new DisposableList<IDisposable>())
            {
+                // Set the graph optimization level for this session.
+                SessionOptions options = new SessionOptions();
+                options.GraphOptimizationLevel = graphOptimizationLevel;
+                if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
+                cleanUp.Add(options);
+
+                var session = new InferenceSession(modelPath, options);
+                cleanUp.Add(session);
+
                var inputMeta = session.InputMetadata;
+                var outputMeta = session.OutputMetadata;
                var container = new List<NamedOnnxValue>();

+                float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
+                int[] expectedDimensions = { 1, 1000, 1, 1 };  // hardcoded for now for the test data
+                ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
+                string[] expectedOutputNames = new string[] { "softmaxout_1" };
+
                float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model

                foreach (var name in inputMeta.Keys)
@ -249,8 +259,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
                }

-                ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
-                string[] expectedOutputNames = new string[] { "softmaxout_1" };

                // Run inference with named inputs and outputs created with in Run()
                using (var results = session.Run(container))  // results is an IReadOnlyList<NamedOnnxValue> container
@ -291,9 +299,40 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                    }
                }

+                // Run inference with outputs pinned from buffers
+                using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
+                using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
+                {
+                    var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+
+                    // Create inputs
+                    Assert.Single(inputMeta.Keys);
+                    var inputNames = inputMeta.Keys.ToArray();
+                    var inputName = inputNames[0];
+                    Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
+                    Assert.True(inputMeta[inputName].IsTensor);
+                    var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
+                    var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+                    pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
+                        TensorElementType.Float, longShape, byteSize));
+
+
+                    // Prepare output buffer
+                    Assert.Single(outputMeta.Keys);
+                    var outputNames = outputMeta.Keys.ToArray();
+                    var outputName = outputNames[0];
+                    Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
+                    Assert.True(outputMeta[outputName].IsTensor);
+                    longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
+                    byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+                    float[] outputBuffer = new float[expectedOutput.Length];
+                    pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer, 
+                        TensorElementType.Float, longShape, byteSize));
+
+                    session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
+                    Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
+                }

-                float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
-                int[] expectedDimensions = { 1, 1000, 1, 1 };  // hardcoded for now for the test data
                // Run inference with named inputs and named outputs
                {
                    // correct pre-allocated outputs
@ -1954,6 +1993,10 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                var inputTensor = tuple.Item3;
                var outputData = tuple.Item4;
                dispList.Add(session);
+                var runOptions = new RunOptions();
+                dispList.Add(runOptions);
+
+                var inputMeta = session.InputMetadata;
                var outputMeta = session.OutputMetadata;
                var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);

@ -1967,8 +2010,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                {
                    var cyrName = "несуществующийВыход";
                    var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
-                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
-                    ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+                    ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
+                    ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
                    string[] outputs = ioBinding.GetOutputNames();
                    Assert.Equal(2, outputs.Length);
                    Assert.Equal(outputName, outputs[0]);
@ -1982,7 +2025,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                {
                    ioBinding.BindInput(inputName, fixeInputBuffer);
                    ioBinding.BindOutput(outputName, fixedOutputBuffer);
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                    {
                        Assert.Equal(1, outputs.Count);
                        var output = outputs.First();
@ -2000,7 +2043,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                    ioBinding.BindInput(inputName, fixedInputBuffer);
                    ioBinding.BindOutputToDevice(outputName, allocator.Info);

-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                    {
                        Assert.Equal(1, outputs.Count);
                        var output = outputs.First();
@ -2040,7 +2083,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                }
                var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
                var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
-                dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
+                                        dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);

                SessionOptions options = new SessionOptions();
                options.AddInitializer("W", sharedInitializer);
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
@ -1,8 +1,9 @@
-using Microsoft.ML.OnnxRuntime.Tensors;
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
-using System.Collections.Generic;
 using System.Linq;
-using System.Text;
 using Xunit;
 using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest;

@ -47,6 +48,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                var inputTensor = tuple.Item3;
                var outputData = tuple.Item4;
                dispList.Add(session);
+                var runOptions = new RunOptions();
+                dispList.Add(runOptions);
+
                var inputMeta = session.InputMetadata;
                var outputMeta = session.OutputMetadata;
                var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
@ -69,7 +73,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                {
                    ioBinding.BindInput(inputName, fixedInputBuffer);
                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                    {
                        Assert.Equal(1, outputs.Count);
                        var output = outputs.ElementAt(0);
@ -84,7 +88,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
                {
                    ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput);
                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                    {
                        Assert.Equal(1, outputs.Count);
                        var output = outputs.ElementAt(0);