diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
index 77994186d8..b9a4705370 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Buffers;
@@ -5,7 +8,7 @@ using System.Buffers;
namespace Microsoft.ML.OnnxRuntime
{
///
- /// Represents an Onnx Value with its underlying buffer pinned
+ /// Represents an OrtValue with its underlying buffer pinned
///
public class FixedBufferOnnxValue : IDisposable
{
@@ -28,11 +31,14 @@ namespace Microsoft.ML.OnnxRuntime
///
///
///
- ///
+ /// a disposable instance of FixedBufferOnnxValue
public static FixedBufferOnnxValue CreateFromTensor(Tensor value)
{
MemoryHandle? memHandle;
var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
+ // memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
+ /// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
+ /// happen for string type where each element has its own allocation.
if (memHandle.HasValue)
{
return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
@@ -43,6 +49,84 @@ namespace Microsoft.ML.OnnxRuntime
}
}
+ ///
+ /// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
+ /// on top of a buffer. Internally, it will pin managed buffer and will create
+ /// an OrtValue containing a tensor that will not own the memory.
+ /// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
+ /// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
+ /// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
+ /// I.e. those that have the same binary representation as the original type. This includes all existing types
+ /// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
+ /// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
+ /// See example below.
+ ///
+ /// Blittable data type, compatible with supported types
+ /// memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance
+ ///
+ /// TensorElementType
+ /// shape of the tensor to be created
+ /// size of the allocation in bytes
+ /// a disposable instance of FixedBufferOnnxValue
+ ///
+ /// Here is an example of using a 3rd party library class for processing float16/bfloat16.
+ /// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
+ /// so DenseTensor can recognize it.
+ ///
+ /// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
+ /// matches native one and its size is 16-bits, you can use the following conceptual example
+ /// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
+ ///
+ /// \code{.cs}
+ /// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
+ /// Half[] input = new Half[] { 5646, 12345 };
+ /// var input_shape = new long[] {input.Length};
+ /// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
+ /// var output_shape = new long[] {output.Length};
+ ///
+ /// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+ ///
+ /// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory(memInfo,
+ /// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
+ /// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory(memInfo,
+ /// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
+ /// {
+ /// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
+ /// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
+ /// session.Run(inputNames, inputValues, outputNames, outputValues);
+ /// // Output is now in output[]
+ /// }
+ /// \endcode
+ ///
+ public static FixedBufferOnnxValue CreateFromMemory(OrtMemoryInfo memoryInfo, Memory memory,
+ TensorElementType elementType, long[] shape, long bytesSize)
+ {
+ if(elementType == TensorElementType.String)
+ {
+ throw new ArgumentException("String data type is not supported");
+ }
+
+ var memHandle = memory.Pin();
+ try
+ {
+ IntPtr memPtr;
+ unsafe
+ {
+ memPtr = (IntPtr)memHandle.Pointer;
+ }
+ var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
+ elementType,
+ shape,
+ memPtr, bytesSize);
+ return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+ }
+ catch (Exception e)
+ {
+ memHandle.Dispose();
+ throw e;
+ }
+ }
+
#region IDisposable Support
///
@@ -51,7 +135,7 @@ namespace Microsoft.ML.OnnxRuntime
/// true if invoked from Dispose()
protected virtual void Dispose(bool disposing)
{
- if(_disposed)
+ if (_disposed)
{
return;
}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index e63efe5ff5..d78d2bfed9 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -455,7 +455,7 @@ namespace Microsoft.ML.OnnxRuntime
/// Create OrtIoBinding instance to bind pre-allocated buffers
/// to input/output
///
- ///
+ /// A new instance of OrtIoBinding
public OrtIoBinding CreateIoBinding()
{
return new OrtIoBinding(this);
@@ -469,8 +469,8 @@ namespace Microsoft.ML.OnnxRuntime
/// the expense of fetching them and pairing with names.
/// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
///
- ///
- ///
+ /// runOptions
+ /// ioBinding instance to use
public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
index bbd961309a..699cf4ebbd 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
@@ -2,7 +2,6 @@
// Licensed under the MIT License.
using System;
-using System.Runtime.InteropServices;
namespace Microsoft.ML.OnnxRuntime
{
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
index 09c86ad330..4bc68e54a0 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
///
/// This class enable to bind inputs and outputs to pre-allocated
/// memory. This enables interesting scenarios. For example, if your input
- /// already resides in some pre-allocated memory even if on a device you bind
+ /// already resides in some pre-allocated memory like GPU, you can bind
/// that piece of memory to an input name and shape and onnxruntime will use that as input.
- /// Other traditional inputs can also be bound that already exists as Tensors
+ /// Other traditional inputs can also be bound that already exists as Tensors.
+ ///
+ /// Note, that this arrangement is designed to minimize data copies and to that effect
+ /// your memory allocations must match what is expected by the model, whether you run on
+ /// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
+ /// match the one expected by the model. However, copies with OrtIoBindings are only done once,
+ /// at the time of the binding, not at run time. This means, that if your input data required a copy,
+ /// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
+ /// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
+ /// for your use case.
+ ///
+ /// The fact that data copy is not made during runtime also has performance implications.
///
public class OrtIoBinding : SafeHandle
{
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index b75d6df1a8..bb0e5b6804 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -90,14 +90,14 @@ namespace Microsoft.ML.OnnxRuntime
public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
long[] shape,
IntPtr dataBuffer,
- uint bufferLength)
+ long bufferLength)
{
Type type;
int width;
TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
- if(width == 0)
+ if(width < 1)
{
- throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
+ throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
}
var shapeSize = ArrayUtilities.GetSizeForShape(shape);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 548359bc29..0d4a9a090f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -4,8 +4,6 @@
using System;
using System.Runtime.InteropServices;
using System.Text;
-using System.Runtime.InteropServices;
-using System.IO;
namespace Microsoft.ML.OnnxRuntime
{
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index f06135ed1f..4df2a124ff 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -229,16 +229,26 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
- // Set the graph optimization level for this session.
- SessionOptions options = new SessionOptions();
- options.GraphOptimizationLevel = graphOptimizationLevel;
- if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
-
- using (var session = new InferenceSession(modelPath, options))
+ using (var cleanUp = new DisposableList())
{
+ // Set the graph optimization level for this session.
+ SessionOptions options = new SessionOptions();
+ options.GraphOptimizationLevel = graphOptimizationLevel;
+ if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
+ cleanUp.Add(options);
+
+ var session = new InferenceSession(modelPath, options);
+ cleanUp.Add(session);
+
var inputMeta = session.InputMetadata;
+ var outputMeta = session.OutputMetadata;
var container = new List();
+ float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
+ int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
+ ReadOnlySpan expectedOutputDimensions = expectedDimensions;
+ string[] expectedOutputNames = new string[] { "softmaxout_1" };
+
float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
foreach (var name in inputMeta.Keys)
@@ -249,8 +259,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests
container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
}
- ReadOnlySpan expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
- string[] expectedOutputNames = new string[] { "softmaxout_1" };
// Run inference with named inputs and outputs created with in Run()
using (var results = session.Run(container)) // results is an IReadOnlyList container
@@ -291,9 +299,40 @@ namespace Microsoft.ML.OnnxRuntime.Tests
}
}
+ // Run inference with outputs pinned from buffers
+ using (var pinnedInputs = new DisposableListTest())
+ using(var pinnedOutputs = new DisposableListTest())
+ {
+ var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+
+ // Create inputs
+ Assert.Single(inputMeta.Keys);
+ var inputNames = inputMeta.Keys.ToArray();
+ var inputName = inputNames[0];
+ Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
+ Assert.True(inputMeta[inputName].IsTensor);
+ var longShape = Array.ConvertAll(inputMeta[inputName].Dimensions, d => d);
+ var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+ pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, inputData,
+ TensorElementType.Float, longShape, byteSize));
+
+
+ // Prepare output buffer
+ Assert.Single(outputMeta.Keys);
+ var outputNames = outputMeta.Keys.ToArray();
+ var outputName = outputNames[0];
+ Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
+ Assert.True(outputMeta[outputName].IsTensor);
+ longShape = Array.ConvertAll(outputMeta[outputName].Dimensions, d => d);
+ byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+ float[] outputBuffer = new float[expectedOutput.Length];
+ pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, outputBuffer,
+ TensorElementType.Float, longShape, byteSize));
+
+ session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
+ Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
+ }
- float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
- int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
// Run inference with named inputs and named outputs
{
// correct pre-allocated outputs
@@ -1954,6 +1993,10 @@ namespace Microsoft.ML.OnnxRuntime.Tests
var inputTensor = tuple.Item3;
var outputData = tuple.Item4;
dispList.Add(session);
+ var runOptions = new RunOptions();
+ dispList.Add(runOptions);
+
+ var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var outputTensor = new DenseTensor(outputData, outputMeta[outputName].Dimensions);
@@ -1967,8 +2010,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
var cyrName = "несуществующийВыход";
var longShape = Array.ConvertAll(outputMeta[outputName].Dimensions, i => i);
- ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
- ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+ ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
+ ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
string[] outputs = ioBinding.GetOutputNames();
Assert.Equal(2, outputs.Length);
Assert.Equal(outputName, outputs[0]);
@@ -1982,7 +2025,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, fixeInputBuffer);
ioBinding.BindOutput(outputName, fixedOutputBuffer);
- using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+ using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
@@ -2000,7 +2043,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
ioBinding.BindInput(inputName, fixedInputBuffer);
ioBinding.BindOutputToDevice(outputName, allocator.Info);
- using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+ using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
@@ -2040,7 +2083,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
}
var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
- dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
+ dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
SessionOptions options = new SessionOptions();
options.AddInitializer("W", sharedInitializer);
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
index ac28829ad5..eee9599875 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
@@ -1,8 +1,9 @@
-using Microsoft.ML.OnnxRuntime.Tensors;
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using Microsoft.ML.OnnxRuntime.Tensors;
using System;
-using System.Collections.Generic;
using System.Linq;
-using System.Text;
using Xunit;
using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest;
@@ -47,6 +48,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
var inputTensor = tuple.Item3;
var outputData = tuple.Item4;
dispList.Add(session);
+ var runOptions = new RunOptions();
+ dispList.Add(runOptions);
+
var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var outputTensor = new DenseTensor(outputData, outputMeta[outputName].Dimensions);
@@ -69,7 +73,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, fixedInputBuffer);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
- using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+ using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.ElementAt(0);
@@ -84,7 +88,7 @@ namespace Microsoft.ML.OnnxRuntime.Tests
{
ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
- using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+ using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.ElementAt(0);