mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-22 22:01:08 +00:00
Allow protobuf format of input data for performance test (#5323)
* Allow protobuf format of input data like onnxruntime_perf_tool * Add OnnxML.cs to fix build failure
This commit is contained in:
parent
e8b9aa1f29
commit
1612934f72
3 changed files with 6095 additions and 50 deletions
|
|
@ -8,18 +8,23 @@
|
|||
<SignAssembly>false</SignAssembly>
|
||||
<Configurations>Debug;Release;RelWithDebInfo</Configurations>
|
||||
<IsLinuxBuild Condition="'$(IsLinuxBuild)' == ''">false</IsLinuxBuild>
|
||||
<ProtoSrc>$(OnnxRuntimeCsharpRoot)\..\onnxruntime\core\protobuf</ProtoSrc>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(IsLinuxBuild)'=='true'">
|
||||
<!--internal build related properties for Linux -->
|
||||
<OnnxRuntimeBuildDirectory Condition="'$(OnnxRuntimeBuildDirectory)'==''">$(OnnxRuntimeCsharpRoot)\..\build\Linux</OnnxRuntimeBuildDirectory>
|
||||
<NativeBuildOutputDir>$(OnnxRuntimeBuildDirectory)\$(Configuration)</NativeBuildOutputDir>
|
||||
<ProtocDirectory Condition="'$(ProtocDirectory)'==''">$(OnnxRuntimeBuildDirectory)\$(Configuration)\external\protobuf\cmake</ProtocDirectory>
|
||||
<ProtocExe>$(ProtocDirectory)\protoc.exe</ProtocExe>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(IsLinuxBuild)'=='false'">
|
||||
<!--internal build related properties for Windows -->
|
||||
<OnnxRuntimeBuildDirectory Condition="'$(OnnxRuntimeBuildDirectory)'==''">$(OnnxRuntimeCsharpRoot)\..\build\Windows</OnnxRuntimeBuildDirectory>
|
||||
<NativeBuildOutputDir>$(OnnxRuntimeBuildDirectory)\$(Configuration)\$(Configuration)</NativeBuildOutputDir>
|
||||
<ProtocDirectory Condition="'$(ProtocDirectory)'==''">$(OnnxRuntimeBuildDirectory)\$(Configuration)\external\protobuf\cmake\$(Configuration)</ProtocDirectory>
|
||||
<ProtocExe>$(ProtocDirectory)\protoc.exe</ProtocExe>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
@ -53,7 +58,11 @@
|
|||
<ItemGroup>
|
||||
<ProjectReference Include="$(OnnxRuntimeCSharpRoot)\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj" />
|
||||
<PackageReference Include="CommandLineParser" Version="2.4.3" />
|
||||
<PackageReference Include="Google.Protobuf" Version="3.13.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<Target Name="ProtoGen" BeforeTargets="Build">
|
||||
<Exec Command="$(ProtocExe) -I=$(ProtoSrc) --csharp_out=. $(ProtoSrc)\onnx-ml.proto3" ContinueOnError="false"></Exec>
|
||||
</Target>
|
||||
|
||||
</Project>
|
||||
|
|
|
|||
5826
csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs
Normal file
5826
csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,11 +1,14 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
using CommandLine;
|
||||
using Google.Protobuf;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using System.Diagnostics;
|
||||
using CommandLine;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace Microsoft.ML.OnnxRuntime.PerfTool
|
||||
{
|
||||
|
|
@ -14,8 +17,9 @@ namespace Microsoft.ML.OnnxRuntime.PerfTool
|
|||
Start = 0,
|
||||
ModelLoaded = 1,
|
||||
InputLoaded = 2,
|
||||
RunComplete = 3,
|
||||
TotalCount = 4
|
||||
WarmUp = 3,
|
||||
RunComplete = 4,
|
||||
TotalCount = 5
|
||||
}
|
||||
|
||||
class CommandOptions
|
||||
|
|
@ -23,12 +27,12 @@ namespace Microsoft.ML.OnnxRuntime.PerfTool
|
|||
[Option('m', "model_file", Required = true, HelpText = "Model Path.")]
|
||||
public string ModelFile { get; set; }
|
||||
|
||||
[Option('i', "input_file", Required = true, HelpText = "Input path.")]
|
||||
public string InputFile { get; set; }
|
||||
|
||||
[Option('c', "iteration_count", Required = true, HelpText = "Iteration to run.")]
|
||||
public int IterationCount { get; set; }
|
||||
|
||||
[Option('i', "input_file", Required = false, HelpText = "Input file.")]
|
||||
public string InputFile { get; set; }
|
||||
|
||||
[Option('p', Required = false, HelpText = "Run with parallel exection. Default is false")]
|
||||
public bool ParallelExecution { get; set; } = false;
|
||||
|
||||
|
|
@ -47,51 +51,40 @@ namespace Microsoft.ML.OnnxRuntime.PerfTool
|
|||
Run(options);
|
||||
});
|
||||
}
|
||||
public static void Run(CommandOptions options)
|
||||
|
||||
static void Run(CommandOptions options)
|
||||
{
|
||||
string modelPath = options.ModelFile;
|
||||
string inputPath = options.InputFile;
|
||||
int iteration = options.IterationCount;
|
||||
bool parallelExecution = options.ParallelExecution;
|
||||
GraphOptimizationLevel optLevel = options.OptimizationLevel;
|
||||
|
||||
Console.WriteLine("Running model {0} in OnnxRuntime:", modelPath);
|
||||
Console.WriteLine("input:{0}", inputPath);
|
||||
Console.WriteLine("iteration count:{0}", iteration);
|
||||
Console.WriteLine("input:{0}", inputPath);
|
||||
Console.WriteLine("parallel execution:{0}", parallelExecution);
|
||||
Console.WriteLine("optimization level:{0}", optLevel);
|
||||
DateTime[] timestamps = new DateTime[(int)TimingPoint.TotalCount];
|
||||
|
||||
RunModelOnnxRuntime(modelPath, inputPath, iteration, timestamps, parallelExecution, optLevel);
|
||||
PrintReport(timestamps, iteration);
|
||||
DateTime[] timestamps = new DateTime[(int)TimingPoint.TotalCount];
|
||||
double[] timecosts = new double[iteration];
|
||||
|
||||
RunModelOnnxRuntime(modelPath, inputPath, iteration, timestamps, timecosts, parallelExecution, optLevel);
|
||||
|
||||
PrintReport(timestamps, timecosts, iteration);
|
||||
Console.WriteLine("Done");
|
||||
}
|
||||
|
||||
|
||||
public static float[] LoadTensorFromFile(string filename)
|
||||
{
|
||||
var tensorData = new List<float>();
|
||||
|
||||
// read data from file
|
||||
using (var inputFile = new System.IO.StreamReader(filename))
|
||||
{
|
||||
inputFile.ReadLine(); //skip the input name
|
||||
string[] dataStr = inputFile.ReadLine().Split(new char[] { ',', '[', ']' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
for (int i = 0; i < dataStr.Length; i++)
|
||||
{
|
||||
tensorData.Add(Single.Parse(dataStr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return tensorData.ToArray();
|
||||
}
|
||||
|
||||
static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps, bool parallelExecution, GraphOptimizationLevel optLevel)
|
||||
static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps,
|
||||
double[] timecosts, bool parallelExecution, GraphOptimizationLevel optLevel)
|
||||
{
|
||||
if (timestamps.Length != (int)TimingPoint.TotalCount)
|
||||
{
|
||||
throw new ArgumentException("Timestamps array must have " + (int)TimingPoint.TotalCount + " size");
|
||||
}
|
||||
|
||||
Random random = new Random();
|
||||
|
||||
timestamps[(int)TimingPoint.Start] = DateTime.Now;
|
||||
SessionOptions options = new SessionOptions();
|
||||
if (parallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
|
||||
|
|
@ -99,54 +92,271 @@ namespace Microsoft.ML.OnnxRuntime.PerfTool
|
|||
using (var session = new InferenceSession(modelPath, options))
|
||||
{
|
||||
timestamps[(int)TimingPoint.ModelLoaded] = DateTime.Now;
|
||||
var inputMeta = session.InputMetadata;
|
||||
|
||||
var container = new List<NamedOnnxValue>();
|
||||
foreach (var name in inputMeta.Keys)
|
||||
{
|
||||
float[] rawData = LoadTensorFromFile(inputPath);
|
||||
var tensor = new DenseTensor<float>(rawData, inputMeta[name].Dimensions);
|
||||
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
|
||||
}
|
||||
|
||||
|
||||
|
||||
var containers = LoadTestData(modelPath, inputPath, session.InputMetadata);
|
||||
timestamps[(int)TimingPoint.InputLoaded] = DateTime.Now;
|
||||
|
||||
// Warm-up
|
||||
{
|
||||
var container = containers[random.Next(0, containers.Count)];
|
||||
session.Run(container);
|
||||
}
|
||||
timestamps[(int)TimingPoint.WarmUp] = DateTime.Now;
|
||||
|
||||
// Run the inference
|
||||
for (int i = 0; i < iteration; i++)
|
||||
{
|
||||
var next = random.Next(0, containers.Count);
|
||||
var container = containers[next];
|
||||
var startTime = DateTime.Now;
|
||||
|
||||
var results = session.Run(container); // results is an IReadOnlyList<NamedOnnxValue> container
|
||||
|
||||
timecosts[i] = (DateTime.Now - startTime).TotalMilliseconds;
|
||||
|
||||
Debug.Assert(results != null);
|
||||
Debug.Assert(results.Count == 1);
|
||||
//results = null;
|
||||
//GC.Collect();
|
||||
//GC.WaitForPendingFinalizers();
|
||||
}
|
||||
|
||||
timestamps[(int)TimingPoint.RunComplete] = DateTime.Now;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// If inputPath is give, create a tensor from text format of data.
|
||||
// Otherwise, create a tensor from proto files. Multiple input directories can be given at the same path as a model file.
|
||||
// Each input directory must have the same number of input as a model.
|
||||
// In example, if a model has 3 input data, a layout for a model file and two set of input data are as follows,
|
||||
// |-- model.onnx
|
||||
// |-- test_data_0
|
||||
// | |-- input_0.pb
|
||||
// | |-- input_1.pb
|
||||
// | |-- input_3.pb
|
||||
// |-- test_data_1
|
||||
// | |-- input_0.pb
|
||||
// | |-- input_1.pb
|
||||
// | |-- input_3.pb
|
||||
static List<List<NamedOnnxValue>> LoadTestData(string modelPath, string inputPath, IReadOnlyDictionary<string, NodeMetadata> inputMeta)
|
||||
{
|
||||
var containers = new List<List<NamedOnnxValue>>();
|
||||
|
||||
// If inputPath is given, give priority to it
|
||||
if (!String.IsNullOrEmpty(inputPath) && File.Exists(inputPath))
|
||||
{
|
||||
var container = LoadTensorFromText(inputPath, inputMeta);
|
||||
containers.Add(container);
|
||||
}
|
||||
else
|
||||
{
|
||||
var dirs = from dir in Directory.EnumerateDirectories(Path.GetDirectoryName(modelPath)) select dir;
|
||||
foreach (var dir in dirs)
|
||||
{
|
||||
var container = LoadTestDataFromProtobuf(dir, inputMeta);
|
||||
containers.Add(container);
|
||||
}
|
||||
}
|
||||
|
||||
return containers;
|
||||
}
|
||||
|
||||
static List<NamedOnnxValue> LoadTensorFromText(string filename, IReadOnlyDictionary<string, NodeMetadata> inputMeta)
|
||||
{
|
||||
var container = new List<NamedOnnxValue>();
|
||||
foreach (var name in inputMeta.Keys)
|
||||
{
|
||||
var tensorData = new List<float>();
|
||||
|
||||
// read data from file
|
||||
using (var inputFile = new System.IO.StreamReader(filename))
|
||||
{
|
||||
inputFile.ReadLine(); //skip the input name
|
||||
string[] dataStr = inputFile.ReadLine().Split(new char[] { ',', '[', ']' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
for (int i = 0; i < dataStr.Length; i++)
|
||||
{
|
||||
tensorData.Add(Single.Parse(dataStr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
var tensor = new DenseTensor<float>(tensorData.ToArray(), inputMeta[name].Dimensions);
|
||||
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
|
||||
}
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
static List<NamedOnnxValue> LoadTestDataFromProtobuf(string testDataPath, IReadOnlyDictionary<string, NodeMetadata> inputMeta)
|
||||
{
|
||||
var container = new List<NamedOnnxValue>();
|
||||
|
||||
var filenames = from filename in Directory.EnumerateFiles(testDataPath, "input_*.pb") select filename;
|
||||
foreach (var filename in filenames)
|
||||
{
|
||||
Onnx.TensorProto tensorProto = null;
|
||||
using (var inputFile = File.OpenRead(filename))
|
||||
{
|
||||
tensorProto = Onnx.TensorProto.Parser.ParseFrom(inputFile);
|
||||
}
|
||||
|
||||
var namedOnnxValue = CreateNamedOnnxValueFromTensorProto(tensorProto, inputMeta);
|
||||
container.Add(namedOnnxValue);
|
||||
}
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
static NamedOnnxValue CreateNamedOnnxValueFromTensorProto(Onnx.TensorProto tensorProto, IReadOnlyDictionary<string, NodeMetadata> inputMeta)
|
||||
{
|
||||
Type tensorElemType = null;
|
||||
int elemWidth = 0;
|
||||
GetElementTypeAndWidth((TensorElementType)tensorProto.DataType, out tensorElemType, out elemWidth);
|
||||
var dims = tensorProto.Dims.ToList().ConvertAll(x => (int)x);
|
||||
|
||||
NodeMetadata nodeMeta = null;
|
||||
if (!inputMeta.TryGetValue(tensorProto.Name, out nodeMeta) ||
|
||||
nodeMeta.ElementType != tensorElemType)
|
||||
{
|
||||
throw new Exception("No Matching Tensor found from serialized tensor");
|
||||
}
|
||||
|
||||
if (nodeMeta.ElementType == typeof(float))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<float>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(float), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(double))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<double>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(double), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(int))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<int>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(int), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(uint))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<uint>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(uint), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(long))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<long>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(long), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(ulong))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<ulong>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(ulong), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(short))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<short>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(short), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(ushort))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<ushort>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(ushort), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(byte))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<byte>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(byte), dims);
|
||||
}
|
||||
else if (nodeMeta.ElementType == typeof(bool))
|
||||
{
|
||||
return CreateNamedOnnxValueFromRawData<bool>(tensorProto.Name, tensorProto.RawData.ToArray(), sizeof(bool), dims);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("Tensors of type " + nameof(nodeMeta.ElementType) + " not currently supported in this tool");
|
||||
}
|
||||
}
|
||||
|
||||
static void GetElementTypeAndWidth(TensorElementType elemType, out Type type, out int width)
|
||||
{
|
||||
switch (elemType)
|
||||
{
|
||||
case TensorElementType.Float:
|
||||
type = typeof(float);
|
||||
width = sizeof(float);
|
||||
break;
|
||||
case TensorElementType.Double:
|
||||
type = typeof(double);
|
||||
width = sizeof(double);
|
||||
break;
|
||||
case TensorElementType.Int16:
|
||||
type = typeof(short);
|
||||
width = sizeof(short);
|
||||
break;
|
||||
case TensorElementType.UInt16:
|
||||
type = typeof(ushort);
|
||||
width = sizeof(ushort);
|
||||
break;
|
||||
case TensorElementType.Int32:
|
||||
type = typeof(int);
|
||||
width = sizeof(int);
|
||||
break;
|
||||
case TensorElementType.UInt32:
|
||||
type = typeof(uint);
|
||||
width = sizeof(uint);
|
||||
break;
|
||||
case TensorElementType.Int64:
|
||||
type = typeof(long);
|
||||
width = sizeof(long);
|
||||
break;
|
||||
case TensorElementType.UInt64:
|
||||
type = typeof(ulong);
|
||||
width = sizeof(ulong);
|
||||
break;
|
||||
case TensorElementType.UInt8:
|
||||
type = typeof(byte);
|
||||
width = sizeof(byte);
|
||||
break;
|
||||
case TensorElementType.Int8:
|
||||
type = typeof(sbyte);
|
||||
width = sizeof(sbyte);
|
||||
break;
|
||||
case TensorElementType.String:
|
||||
type = typeof(byte);
|
||||
width = sizeof(byte);
|
||||
break;
|
||||
case TensorElementType.Bool:
|
||||
type = typeof(bool);
|
||||
width = sizeof(bool);
|
||||
break;
|
||||
default:
|
||||
type = null;
|
||||
width = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static NamedOnnxValue CreateNamedOnnxValueFromRawData<T>(string name, byte[] rawData, int elemWidth, List<int> dimensions)
|
||||
{
|
||||
T[] data = new T[rawData.Length / elemWidth];
|
||||
Buffer.BlockCopy(rawData, 0, data, 0, rawData.Length);
|
||||
var denseTensor = new DenseTensor<T>(data, dimensions.ToArray());
|
||||
return NamedOnnxValue.CreateFromTensor<T>(name, denseTensor);
|
||||
}
|
||||
|
||||
static void PrintUsage()
|
||||
{
|
||||
Console.WriteLine("Usage:\n"
|
||||
+ "dotnet Microsoft.ML.OnnxRuntime.PerfTool <onnx-model-path> <input-file-path> <iteration-count>"
|
||||
+ "dotnet Microsoft.ML.OnnxRuntime.PerfTool -m <onnx-model-path> -i <input-file-path> -c <iteration-count>"
|
||||
);
|
||||
}
|
||||
|
||||
static void PrintReport(DateTime[] timestamps, int iterations)
|
||||
static void PrintReport(DateTime[] timestamps, double[] timecosts, int iterations)
|
||||
{
|
||||
Console.WriteLine("Model Load Time = " + (timestamps[(int)TimingPoint.ModelLoaded] - timestamps[(int)TimingPoint.Start]).TotalMilliseconds);
|
||||
Console.WriteLine("Input Load Time = " + (timestamps[(int)TimingPoint.InputLoaded] - timestamps[(int)TimingPoint.ModelLoaded]).TotalMilliseconds);
|
||||
Console.WriteLine("Warm-up Time = " + (timestamps[(int)TimingPoint.WarmUp] - timestamps[(int)TimingPoint.InputLoaded]).TotalMilliseconds);
|
||||
|
||||
double totalRuntime = (timestamps[(int)TimingPoint.RunComplete] - timestamps[(int)TimingPoint.InputLoaded]).TotalMilliseconds;
|
||||
double totalRuntime = (timestamps[(int)TimingPoint.RunComplete] - timestamps[(int)TimingPoint.WarmUp]).TotalMilliseconds;
|
||||
double perIterationTime = totalRuntime / iterations;
|
||||
|
||||
Console.WriteLine("Total Run time for {0} iterations = {1}", iterations, totalRuntime);
|
||||
Console.WriteLine("Per iteration time = {0}", perIterationTime);
|
||||
|
||||
Array.Sort(timecosts);
|
||||
Console.WriteLine("Min Latency: {0}", timecosts[0]);
|
||||
Console.WriteLine("Max Latency: {0}", timecosts[timecosts.Length - 1]);
|
||||
Console.WriteLine("P50 Latency: {0}", timecosts[(int)(timecosts.Length * 0.5)]);
|
||||
Console.WriteLine("P90 Latency: {0}", timecosts[(int)(timecosts.Length * 0.9)]);
|
||||
Console.WriteLine("P95 Latency: {0}", timecosts[(int)(timecosts.Length * 0.95)]);
|
||||
Console.WriteLine("P99 Latency: {0}", timecosts[(int)(timecosts.Length * 0.99)]);
|
||||
Console.WriteLine("P999 Latency: {0}", timecosts[(int)(timecosts.Length * 0.999)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue