From 65201e47bf58ce5bbc8389a83082f69ee1ea50b3 Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Mon, 5 Dec 2022 14:54:09 -0800 Subject: [PATCH] Enable nuget packages for on device training (#13637) ### Description This PR enables building nuget packages locally for on device training using --build_nuget arg. This PR also enables the C# bindings by default in the managed package. If a user triggers any training apis when the native binary is not built for training, an exception with message "Training is disabled in the current build. Please build ONNXRuntime from source with the build flags enable_training and enable_training_on_device. " is thrown. Build command for creating nuget packes for on device training: build.bat --enable_training --enable_training_on_device --build_nuget 2 Nuget packages are built 1. Microsoft.ML.OnnxRuntime.Managed 2. Microsoft.ML.OnnxRuntime.Training OR Microsoft.ML.OnnxRuntime.Training.Gpu ### Motivation and Context --- cmake/onnxruntime_csharp.cmake | 4 ++ csharp/OnnxRuntime.CSharp.proj | 16 ++--- .../Microsoft.ML.OnnxRuntime.csproj | 68 ++++++++++--------- .../Training/CheckpointState.shared.cs | 2 +- .../Training/TrainingSession.shared.cs | 2 +- .../TrainingTest.cs | 18 ++++- ...oft.ML.OnnxRuntime.Tests.NetCoreApp.csproj | 20 ++++-- tools/ci_build/build.py | 40 ++++++++--- .../templates/win-ci-vs-2019.yml | 4 +- .../nuget/generate_nuspec_for_native_nuget.py | 14 ++++ 10 files changed, 127 insertions(+), 61 deletions(-) diff --git a/cmake/onnxruntime_csharp.cmake b/cmake/onnxruntime_csharp.cmake index 58a9f894ec..c65acd849f 100644 --- a/cmake/onnxruntime_csharp.cmake +++ b/cmake/onnxruntime_csharp.cmake @@ -50,6 +50,10 @@ if (onnxruntime_USE_XNNPACK) STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_XNNPACK;") endif() +if (onnxruntime_ENABLE_TRAINING_ON_DEVICE) + STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "__TRAINING_ENABLED_NATIVE_BUILD__;") +endif() + include(CSharpUtilities) # generate Directory.Build.props diff --git a/csharp/OnnxRuntime.CSharp.proj b/csharp/OnnxRuntime.CSharp.proj index 94635f2dc3..5473246e8b 100644 --- a/csharp/OnnxRuntime.CSharp.proj +++ b/csharp/OnnxRuntime.CSharp.proj @@ -1,12 +1,12 @@ - + Microsoft.ML.OnnxRuntime Debug @@ -54,7 +54,7 @@ CMake creates a target to this project - + @@ -72,7 +72,7 @@ CMake creates a target to this project - + $([System.DateTime]::UtcNow.ToString(yyyyMMdd)) $([System.DateTime]::UtcNow.ToString(hhmm)) @@ -82,12 +82,12 @@ CMake creates a target to this project - + + Properties="NoBuild=true;Platform=AnyCPU;PackageVersion=$(PackageVersion);OrtPackageId=$(OrtPackageId);SelectedTargets=All"/> @@ -105,7 +105,7 @@ CMake creates a target to this project @@ -113,7 +113,7 @@ CMake creates a target to this project - + diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 679361c336..e6a8272fd8 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -4,19 +4,19 @@ Microsoft.ML.OnnxRuntime - @@ -24,18 +24,18 @@ netstandard1.1;netstandard2.0;net5.0;netcoreapp3.1 - - xamarinios10;monoandroid11.0 - net6.0;net6.0-android;net6.0-ios;net6.0-macos @@ -64,7 +64,7 @@ $(ProjectDir)..\..\.. $(OnnxRuntimeRoot)\csharp - x64 + x64 Microsoft.ML.OnnxRuntime Microsoft.ML.OnnxRuntime @@ -72,11 +72,15 @@ false portable - - false + + true - + Microsoft.ML.OnnxRuntime.Managed Microsoft 1.0.0 @@ -105,27 +109,35 @@ $(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb Debug;Release;RelWithDebInfo - true - true + true + true true - - true - true - true true - + + $(OrtConstants);__ENABLE_TRAINING_ON_DEVICE__ + + + @@ -146,7 +158,7 @@ $(OnnxRuntimeBuildDirectory)\$(Configuration) - @@ -179,14 +191,6 @@ $(OrtConstants);__XAMARIN__ - - - $(OrtConstants);__ENABLE_TRAINING_ON_DEVICE__ - - $(DefineConstants);$(OrtConstants) @@ -287,7 +291,7 @@ @@ -346,5 +350,5 @@ DestinationFolder="$(NativeBuildOutputDir)" /> - + diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs index 4cff5bccb1..fec2c18572 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs @@ -33,7 +33,7 @@ namespace Microsoft.ML.OnnxRuntime } else { - throw new InvalidOperationException("Training is disabled in the current build"); + throw new InvalidOperationException("Training is disabled in the current build. Please build ONNXRuntime from source with the build flags enable_training and enable_training_on_device. \n"); } } diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs index b5b5a7c598..8d804628af 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs @@ -336,7 +336,7 @@ namespace Microsoft.ML.OnnxRuntime { if (!NativeTrainingMethods.TrainingEnabled()) { - throw new InvalidOperationException("Training is disabled in the current build."); + throw new InvalidOperationException("Training is disabled in the current build. Please build ONNXRuntime from source with the build flags enable_training and enable_training_on_device. \n"); } var options = sessOptions; if (sessOptions == null) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs index f5bf62b4b8..f643efa720 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs @@ -23,6 +23,17 @@ namespace Microsoft.ML.OnnxRuntime.Tests this.output = o; } +#if !__TRAINING_ENABLED_NATIVE_BUILD__ + [Fact(DisplayName = "TestLoadCheckpointThrows")] + public void TestLoadCheckpointThrows() + { + string path = Path.Combine(Directory.GetCurrentDirectory(), "checkpoint.ckpt"); + var ex = Assert.Throws(() => { var opt = new CheckpointState(path); }); + Assert.Contains("Training is disabled in the current build.", ex.Message); + } +#endif + +#if __TRAINING_ENABLED_NATIVE_BUILD__ [Fact(DisplayName = "TestLoadCheckpoint")] public void TestLoadCheckpoint() { @@ -122,13 +133,13 @@ namespace Microsoft.ML.OnnxRuntime.Tests outputs = trainingSession.TrainStep(pinnedInputs); var outputBuffer = outputs.ElementAtOrDefault(0); - Assert.Equal("542.loss", outputBuffer.Name); + Assert.Equal("onnx::loss::21273", outputBuffer.Name); Assert.Equal(OnnxValueType.ONNX_TYPE_TENSOR, outputBuffer.ValueType); Assert.Equal(TensorElementType.Float, outputBuffer.ElementType); var outLabelTensor = outputBuffer.AsTensor(); Assert.NotNull(outLabelTensor); - Assert.Equal(expectedOutput, outLabelTensor, new FloatComparer()); + Assert.Equal(expectedOutput, outLabelTensor.ToArray(), new FloatComparer()); } } @@ -304,5 +315,6 @@ namespace Microsoft.ML.OnnxRuntime.Tests return x.GetHashCode(); } } - } +#endif + } } diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj index fbb0417215..dafebd9ccb 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj @@ -17,10 +17,18 @@ ..\..\OnnxRuntime.snk Debug;Release;RelWithDebInfo - __NET_CORE_APP__;$(DefineConstants) - - false + + false + + + + $(ExtraDefineConstants);__TRAINING_ENABLED_NATIVE_BUILD__ + + + + __NET_CORE_APP__;$(DefineConstants);$(ExtraDefineConstants) @@ -95,12 +103,12 @@ ArrayTensorExtensionsTests.cs - - - TrainingTest.cs + + + PreserveNewest false diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 6d094c8c6a..a6e6b4cdc7 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2131,7 +2131,17 @@ def derive_linux_build_property(): def build_nuget_package( - source_dir, build_dir, configs, use_cuda, use_openvino, use_tensorrt, use_dnnl, use_tvm, use_winml, use_snpe + source_dir, + build_dir, + configs, + use_cuda, + use_openvino, + use_tensorrt, + use_dnnl, + use_tvm, + use_winml, + use_snpe, + enable_training_on_device, ): if not (is_windows() or is_linux()): raise BuildError( @@ -2150,7 +2160,12 @@ def build_nuget_package( target_name = "/t:CreatePackage" execution_provider = '/p:ExecutionProvider="None"' package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime"' - if use_winml: + if enable_training_on_device: + if use_cuda: + package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Training.Gpu"' + else: + package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Training"' + elif use_winml: package_name = '/p:OrtPackageId="Microsoft.AI.MachineLearning"' target_name = "/t:CreateWindowsAIPackage" elif use_openvino: @@ -2210,7 +2225,7 @@ def build_nuget_package( run_subprocess(cmd_args, cwd=csharp_build_dir) if is_windows(): - if use_openvino or use_tvm: + if not use_winml: # user needs to make sure nuget is installed and added to the path variable nuget_exe = "nuget.exe" else: @@ -2237,12 +2252,11 @@ def build_nuget_package( run_subprocess(cmd_args, cwd=csharp_build_dir) -def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt, use_dnnl): +def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt, use_dnnl, enable_training_on_device): # Currently only running tests on windows. if not is_windows(): return csharp_source_dir = os.path.join(source_dir, "csharp") - is_linux_build = derive_linux_build_property() # define macros based on build args macros = "" @@ -2254,6 +2268,8 @@ def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt macros += "USE_DNNL;" if use_cuda: macros += "USE_CUDA;" + if enable_training_on_device: + macros += "__TRAINING_ENABLED_NATIVE_BUILD__;" define_constants = "" if macros != "": @@ -2268,10 +2284,9 @@ def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt cmd_args = [ "dotnet", "test", - "test\\Microsoft.ML.OnnxRuntime.Tests\\Microsoft.ML.OnnxRuntime.Tests.csproj", + "test\\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp\\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj", "--filter", "FullyQualifiedName!=Microsoft.ML.OnnxRuntime.Tests.InferenceTest.TestPreTrainedModels", - is_linux_build, define_constants, ort_build_dir, ] @@ -2811,10 +2826,19 @@ def main(): args.use_tvm, args.use_winml, args.use_snpe, + args.enable_training_on_device, ) if args.test and args.build_nuget: - run_csharp_tests(source_dir, build_dir, args.use_cuda, args.use_openvino, args.use_tensorrt, args.use_dnnl) + run_csharp_tests( + source_dir, + build_dir, + args.use_cuda, + args.use_openvino, + args.use_tensorrt, + args.use_dnnl, + args.enable_training_on_device, + ) if args.gen_doc: # special case CI where we create the build config separately to building diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci-vs-2019.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci-vs-2019.yml index 932951c9f7..72d196bb38 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci-vs-2019.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci-vs-2019.yml @@ -192,7 +192,7 @@ jobs: solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' configuration: '${{ parameters.BuildConfig }}' platform: 'Any CPU' - msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:DefineConstants=USE_${{ parameters.ORT_EP_NAME }}' + msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId)' workingDirectory: '$(Build.SourcesDirectory)\csharp' # C# test isn't launched by build.py, so models link has to be added. @@ -208,7 +208,7 @@ jobs: command: test projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj' configuration: '${{ parameters.BuildConfig }}' - arguments: '--configuration ${{ parameters.BuildConfig }} -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:DefineConstants=USE_${{ parameters.ORT_EP_NAME }} --blame' + arguments: '--configuration ${{ parameters.BuildConfig }} -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) --blame' workingDirectory: '$(Build.SourcesDirectory)\csharp' - ${{ if eq(parameters.EnablePython, true) }}: diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index c49cfa5723..532ed8d1ef 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -306,6 +306,10 @@ def generate_files(line_list, args): is_dml_package = args.package_name == "Microsoft.ML.OnnxRuntime.DirectML" is_windowsai_package = args.package_name == "Microsoft.AI.MachineLearning" is_snpe_package = args.package_name == "Microsoft.ML.OnnxRuntime.Snpe" + is_training_package = args.package_name in [ + "Microsoft.ML.OnnxRuntime.Training", + "Microsoft.ML.OnnxRuntime.Training.Gpu", + ] includes_winml = is_windowsai_package includes_directml = (is_dml_package or is_windowsai_package) and ( @@ -380,6 +384,16 @@ def generate_files(line_list, args): + '" target="build\\native\\include" />' ) + if is_training_package: + files_list.append( + "' + ) + if args.execution_provider == "tvm": files_list.append( "