diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs index 77994186d8..b9a4705370 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Buffers; @@ -5,7 +8,7 @@ using System.Buffers; namespace Microsoft.ML.OnnxRuntime { /// - /// Represents an Onnx Value with its underlying buffer pinned + /// Represents an OrtValue with its underlying buffer pinned /// public class FixedBufferOnnxValue : IDisposable { @@ -28,11 +31,14 @@ namespace Microsoft.ML.OnnxRuntime /// /// /// - /// + /// a disposable instance of FixedBufferOnnxValue public static FixedBufferOnnxValue CreateFromTensor(Tensor value) { MemoryHandle? memHandle; var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType); + // memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be + /// disposed (unpinned) when all is said is done. This is the case for blittable types but does not + /// happen for string type where each element has its own allocation. if (memHandle.HasValue) { return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType); @@ -43,6 +49,84 @@ namespace Microsoft.ML.OnnxRuntime } } + /// + /// This is a factory method that creates a disposable instance of FixedBufferOnnxValue + /// on top of a buffer. Internally, it will pin managed buffer and will create + /// an OrtValue containing a tensor that will not own the memory. + /// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run() + /// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types + /// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types + /// I.e. those that have the same binary representation as the original type. This includes all existing types + /// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size. + /// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue + /// See example below. + /// + /// Blittable data type, compatible with supported types + /// memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance + /// + /// TensorElementType + /// shape of the tensor to be created + /// size of the allocation in bytes + /// a disposable instance of FixedBufferOnnxValue + /// + /// Here is an example of using a 3rd party library class for processing float16/bfloat16. + /// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures + /// so DenseTensor can recognize it. + /// + /// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation + /// matches native one and its size is 16-bits, you can use the following conceptual example + /// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[] + /// + /// \code{.cs} + /// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); } + /// Half[] input = new Half[] { 5646, 12345 }; + /// var input_shape = new long[] {input.Length}; + /// Half[] output = new Half[40]; // Whatever the expected len/shape is must match + /// var output_shape = new long[] {output.Length}; + /// + /// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU + /// + /// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory(memInfo, + /// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort)) + /// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory(memInfo, + /// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort)) + /// { + /// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput}; + /// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput}; + /// session.Run(inputNames, inputValues, outputNames, outputValues); + /// // Output is now in output[] + /// } + /// \endcode + /// + public static FixedBufferOnnxValue CreateFromMemory(OrtMemoryInfo memoryInfo, Memory memory, + TensorElementType elementType, long[] shape, long bytesSize) + { + if(elementType == TensorElementType.String) + { + throw new ArgumentException("String data type is not supported"); + } + + var memHandle = memory.Pin(); + try + { + IntPtr memPtr; + unsafe + { + memPtr = (IntPtr)memHandle.Pointer; + } + var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo, + elementType, + shape, + memPtr, bytesSize); + return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType); + } + catch (Exception e) + { + memHandle.Dispose(); + throw e; + } + } + #region IDisposable Support /// @@ -51,7 +135,7 @@ namespace Microsoft.ML.OnnxRuntime /// true if invoked from Dispose() protected virtual void Dispose(bool disposing) { - if(_disposed) + if (_disposed) { return; } diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs index e63efe5ff5..d78d2bfed9 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs @@ -455,7 +455,7 @@ namespace Microsoft.ML.OnnxRuntime /// Create OrtIoBinding instance to bind pre-allocated buffers /// to input/output /// - /// + /// A new instance of OrtIoBinding public OrtIoBinding CreateIoBinding() { return new OrtIoBinding(this); @@ -469,8 +469,8 @@ namespace Microsoft.ML.OnnxRuntime /// the expense of fetching them and pairing with names. /// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues() /// - /// - /// + /// runOptions + /// ioBinding instance to use public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding) { NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle)); diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs index bbd961309a..699cf4ebbd 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs @@ -2,7 +2,6 @@ // Licensed under the MIT License. using System; -using System.Runtime.InteropServices; namespace Microsoft.ML.OnnxRuntime { diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs index 09c86ad330..4bc68e54a0 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs @@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime /// /// This class enable to bind inputs and outputs to pre-allocated /// memory. This enables interesting scenarios. For example, if your input - /// already resides in some pre-allocated memory even if on a device you bind + /// already resides in some pre-allocated memory like GPU, you can bind /// that piece of memory to an input name and shape and onnxruntime will use that as input. - /// Other traditional inputs can also be bound that already exists as Tensors + /// Other traditional inputs can also be bound that already exists as Tensors. + /// + /// Note, that this arrangement is designed to minimize data copies and to that effect + /// your memory allocations must match what is expected by the model, whether you run on + /// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not + /// match the one expected by the model. However, copies with OrtIoBindings are only done once, + /// at the time of the binding, not at run time. This means, that if your input data required a copy, + /// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is + /// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match + /// for your use case. + /// + /// The fact that data copy is not made during runtime also has performance implications. /// public class OrtIoBinding : SafeHandle { diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs index b75d6df1a8..bb0e5b6804 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs @@ -90,14 +90,14 @@ namespace Microsoft.ML.OnnxRuntime public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType, long[] shape, IntPtr dataBuffer, - uint bufferLength) + long bufferLength) { Type type; int width; TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width); - if(width == 0) + if(width < 1) { - throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type"); + throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)"); } var shapeSize = ArrayUtilities.GetSizeForShape(shape); diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index 548359bc29..0d4a9a090f 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -4,8 +4,6 @@ using System; using System.Runtime.InteropServices; using System.Text; -using System.Runtime.InteropServices; -using System.IO; namespace Microsoft.ML.OnnxRuntime { diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index f06135ed1f..4df2a124ff 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -229,16 +229,26 @@ namespace Microsoft.ML.OnnxRuntime.Tests { string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); - // Set the graph optimization level for this session. - SessionOptions options = new SessionOptions(); - options.GraphOptimizationLevel = graphOptimizationLevel; - if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL; - - using (var session = new InferenceSession(modelPath, options)) + using (var cleanUp = new DisposableList()) { + // Set the graph optimization level for this session. + SessionOptions options = new SessionOptions(); + options.GraphOptimizationLevel = graphOptimizationLevel; + if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL; + cleanUp.Add(options); + + var session = new InferenceSession(modelPath, options); + cleanUp.Add(session); + var inputMeta = session.InputMetadata; + var outputMeta = session.OutputMetadata; var container = new List(); + float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out"); + int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data + ReadOnlySpan expectedOutputDimensions = expectedDimensions; + string[] expectedOutputNames = new string[] { "softmaxout_1" }; + float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model foreach (var name in inputMeta.Keys) @@ -249,8 +259,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests container.Add(NamedOnnxValue.CreateFromTensor(name, tensor)); } - ReadOnlySpan expectedOutputDimensions = new int[] { 1, 1000, 1, 1 }; - string[] expectedOutputNames = new string[] { "softmaxout_1" }; // Run inference with named inputs and outputs created with in Run() using (var results = session.Run(container)) // results is an IReadOnlyList container @@ -291,9 +299,40 @@ namespace Microsoft.ML.OnnxRuntime.Tests } } + // Run inference with outputs pinned from buffers + using (var pinnedInputs = new DisposableListTest()) + using(var pinnedOutputs = new DisposableListTest()) + { + var memInfo = OrtMemoryInfo.DefaultInstance; // CPU + + // Create inputs + Assert.Single(inputMeta.Keys); + var inputNames = inputMeta.Keys.ToArray(); + var inputName = inputNames[0]; + Assert.Equal(typeof(float), inputMeta[inputName].ElementType); + Assert.True(inputMeta[inputName].IsTensor); + var longShape = Array.ConvertAll(inputMeta[inputName].Dimensions, d => d); + var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float); + pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, inputData, + TensorElementType.Float, longShape, byteSize)); + + + // Prepare output buffer + Assert.Single(outputMeta.Keys); + var outputNames = outputMeta.Keys.ToArray(); + var outputName = outputNames[0]; + Assert.Equal(typeof(float), outputMeta[outputName].ElementType); + Assert.True(outputMeta[outputName].IsTensor); + longShape = Array.ConvertAll(outputMeta[outputName].Dimensions, d => d); + byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float); + float[] outputBuffer = new float[expectedOutput.Length]; + pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, outputBuffer, + TensorElementType.Float, longShape, byteSize)); + + session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs); + Assert.Equal(expectedOutput, outputBuffer, new floatComparer()); + } - float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out"); - int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data // Run inference with named inputs and named outputs { // correct pre-allocated outputs @@ -1954,6 +1993,10 @@ namespace Microsoft.ML.OnnxRuntime.Tests var inputTensor = tuple.Item3; var outputData = tuple.Item4; dispList.Add(session); + var runOptions = new RunOptions(); + dispList.Add(runOptions); + + var inputMeta = session.InputMetadata; var outputMeta = session.OutputMetadata; var outputTensor = new DenseTensor(outputData, outputMeta[outputName].Dimensions); @@ -1967,8 +2010,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests { var cyrName = "несуществующийВыход"; var longShape = Array.ConvertAll(outputMeta[outputName].Dimensions, i => i); - ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput); - ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput); + ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput); + ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput); string[] outputs = ioBinding.GetOutputNames(); Assert.Equal(2, outputs.Length); Assert.Equal(outputName, outputs[0]); @@ -1982,7 +2025,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests { ioBinding.BindInput(inputName, fixeInputBuffer); ioBinding.BindOutput(outputName, fixedOutputBuffer); - using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding)) + using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding)) { Assert.Equal(1, outputs.Count); var output = outputs.First(); @@ -2000,7 +2043,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests ioBinding.BindInput(inputName, fixedInputBuffer); ioBinding.BindOutputToDevice(outputName, allocator.Info); - using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding)) + using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding)) { Assert.Equal(1, outputs.Count); var output = outputs.First(); @@ -2040,7 +2083,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests } var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float); var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float, - dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes); + dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes); SessionOptions options = new SessionOptions(); options.AddInitializer("W", sharedInitializer); diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs index ac28829ad5..eee9599875 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs @@ -1,8 +1,9 @@ -using Microsoft.ML.OnnxRuntime.Tensors; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.ML.OnnxRuntime.Tensors; using System; -using System.Collections.Generic; using System.Linq; -using System.Text; using Xunit; using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest; @@ -47,6 +48,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests var inputTensor = tuple.Item3; var outputData = tuple.Item4; dispList.Add(session); + var runOptions = new RunOptions(); + dispList.Add(runOptions); + var inputMeta = session.InputMetadata; var outputMeta = session.OutputMetadata; var outputTensor = new DenseTensor(outputData, outputMeta[outputName].Dimensions); @@ -69,7 +73,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests { ioBinding.BindInput(inputName, fixedInputBuffer); ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput); - using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding)) + using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding)) { Assert.Equal(1, outputs.Count); var output = outputs.ElementAt(0); @@ -84,7 +88,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests { ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput); ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput); - using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding)) + using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding)) { Assert.Equal(1, outputs.Count); var output = outputs.ElementAt(0);