From 3bc91c2151823830df83dbdc4a5d0a36df932b4e Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Tue, 28 Dec 2021 19:04:20 -0800 Subject: [PATCH] Move reduced ops files into build directory (#10030) In a reduced ops build, some source files get updated. This change moves the updated files into the build directory. This way, it is easier to simultaneously manage different build directories (with possibly different reduced ops configurations) based on a single source directory. --- .gitignore | 8 -- cmake/CMakeLists.txt | 4 - cmake/onnxruntime_providers.cmake | 78 +++++++++++-- docs/Reduced_Operator_Kernel_build.md | 16 ++- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 12 -- .../contrib_ops/cuda/cuda_contrib_kernels.cc | 12 -- .../providers/cpu/cpu_execution_provider.cc | 12 -- .../providers/cuda/cuda_execution_provider.cc | 12 -- .../core/providers/op_kernel_type_control.h | 13 --- .../internal_testing_partitioning_tests.cc | 4 + .../internal_testing_tests.cc | 4 + .../training_ops/cpu/cpu_training_kernels.cc | 12 -- .../cuda/cuda_training_kernels.cc | 12 -- tools/ci_build/build.py | 23 ++-- .../linux-cpu-minimal-build-ci-pipeline.yml | 20 ---- .../build_full_ort_and_create_ort_files.sh | 4 +- tools/ci_build/reduce_op_kernels.py | 110 +++++++++++------- 17 files changed, 173 insertions(+), 183 deletions(-) diff --git a/.gitignore b/.gitignore index 418e30eed1..be2375d256 100644 --- a/.gitignore +++ b/.gitignore @@ -60,11 +60,3 @@ onnxruntime/python/version_info.py .envrc .psenvrc *.csproj.user -# exclude generated reduced kernel registration and type control -onnxruntime/contrib_ops/cpu/cpu_contrib_kernels_reduced_ops.cc -onnxruntime/core/providers/cpu/cpu_execution_provider_reduced_ops.cc -orttraining/orttraining/training_ops/cpu/cpu_training_kernels_reduced_ops.cc -onnxruntime/contrib_ops/cuda/cuda_contrib_kernels_reduced_ops.cc -onnxruntime/core/providers/cuda/cuda_execution_provider_reduced_ops.cc -orttraining/orttraining/training_ops/cuda/cuda_training_kernels_reduced_ops.cc -onnxruntime/core/providers/op_kernel_type_control_overrides_reduced_types.inc diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index adfaa4c11f..3c471ed49e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -118,7 +118,6 @@ cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handlin option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF) option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF) option(onnxruntime_REDUCED_OPS_BUILD "Reduced set of kernels are registered in build via modification of the kernel registration source files." OFF) -option(onnxruntime_REDUCED_OP_TYPE_SUPPORT "Limit the types individual operators support where possible to further reduce the build size." OFF) option(onnxruntime_DISABLE_EXTERNAL_INITIALIZERS "Don't allow models to load external data" OFF) cmake_dependent_option(onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION "Enable runtime graph optimization of ORT format models. Warning: Not yet ready for general use." @@ -390,9 +389,6 @@ endif() if (onnxruntime_REDUCED_OPS_BUILD) add_compile_definitions(REDUCED_OPS_BUILD) - if (onnxruntime_REDUCED_OP_TYPE_SUPPORT) - add_compile_definitions(REDUCED_OP_TYPE_SUPPORT) - endif() endif() if (onnxruntime_DISABLE_EXTERNAL_INITIALIZERS) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 90240c5f68..8fba42e3e8 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -1,6 +1,64 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +# Reduced ops build helpers + +# In a reduced ops build, the reduction is performed by updating source files. +# Rather than modifying the source files directly, updated versions will be +# saved to another location in the build directory: ${op_reduction_root}. +set(op_reduction_root "${CMAKE_BINARY_DIR}/op_reduction.generated") + +# This helper function replaces the relevant original source files with their +# updated, reduced ops versions in `all_srcs`. +function(substitute_op_reduction_srcs all_srcs) + # files that are potentially updated in a reduced ops build + set(original_srcs + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc" + "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc" + "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc" + "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc" + ) + + set(replacement_srcs) + + foreach(original_src ${original_srcs}) + string(FIND "${${all_srcs}}" "${original_src}" idx) + if(idx EQUAL "-1") + continue() + endif() + + file(RELATIVE_PATH src_relative_path "${REPO_ROOT}" "${original_src}") + set(replacement_src "${op_reduction_root}/${src_relative_path}") + + message("File '${original_src}' substituted with reduced op version '${replacement_src}'.") + + string(REPLACE "${original_src}" "${replacement_src}" ${all_srcs} "${${all_srcs}}") + + list(APPEND replacement_srcs "${replacement_src}") + endforeach() + + if(replacement_srcs) + source_group(TREE "${op_reduction_root}" PREFIX "op_reduction.generated" FILES ${replacement_srcs}) + endif() + + set(${all_srcs} "${${all_srcs}}" PARENT_SCOPE) +endfunction() + +# This helper function adds reduced ops build-specific include directories to +# `target`. +function(add_op_reduction_include_dirs target) + set(op_reduction_include_dirs "${op_reduction_root}/onnxruntime") + if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS) + list(APPEND op_reduction_include_dirs "${op_reduction_root}/orttraining") + endif() + # add include directories BEFORE so they are searched first, giving op reduction file paths precedence + target_include_directories(${target} BEFORE PRIVATE ${op_reduction_include_dirs}) +endfunction() + + file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h" "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc" @@ -45,16 +103,10 @@ file(GLOB_RECURSE onnxruntime_rocm_generated_contrib_ops_cu_srcs CONFIGURE_DEPEN "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime/contrib_ops/rocm/*.cuh" ) - file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/*.h" "${ONNXRUNTIME_ROOT}/core/providers/*.cc" - # If we are building with reduced number of kernel registration and types, - # "core/providers/op_kernel_type_control_overrides_reduced_types.inc" - # will be generated with type specifications code. - # For simplicity, we inlcude both .inc files, - # see onnxruntime/core/providers/op_kernel_type_control.h - "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides*.inc" + "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc" ) if(onnxruntime_USE_NUPHAR) @@ -176,7 +228,13 @@ if (onnxruntime_ENABLE_TRAINING) list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs}) endif() +if (onnxruntime_REDUCED_OPS_BUILD) + substitute_op_reduction_srcs(onnxruntime_providers_src) +endif() onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src}) +if (onnxruntime_REDUCED_OPS_BUILD) + add_op_reduction_include_dirs(onnxruntime_providers) +endif() if (MSVC) target_compile_options(onnxruntime_providers PRIVATE "/bigobj") @@ -323,7 +381,13 @@ if (onnxruntime_USE_CUDA) list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs}) endif() + if (onnxruntime_REDUCED_OPS_BUILD) + substitute_op_reduction_srcs(onnxruntime_providers_cuda_src) + endif() onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src}) + if (onnxruntime_REDUCED_OPS_BUILD) + add_op_reduction_include_dirs(onnxruntime_providers_cuda) + endif() #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler \"/analyze:stacksize 131072\">") if (HAS_GUARD_CF) diff --git a/docs/Reduced_Operator_Kernel_build.md b/docs/Reduced_Operator_Kernel_build.md index 02e761e5c6..e25f7d0478 100644 --- a/docs/Reduced_Operator_Kernel_build.md +++ b/docs/Reduced_Operator_Kernel_build.md @@ -4,17 +4,23 @@ In order to reduce the compiled binary size of ONNX Runtime (ORT), the operator A configuration file must be created with details of the kernels that are required. -Following that, ORT must be manually built, providing the configuration file in the `--include_ops_by_config` parameter. The build process will update the ORT kernel registration source files to exclude the unused kernels. +Following that, ORT must be manually built, providing the configuration file in the [build.py](../tools/ci_build/build.py) `--include_ops_by_config` argument. See the [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#build-instructions) for more details on building ORT. -When building ORT with a reduced set of kernel registrations, `--skip_tests` **MUST** be specified as the kernel reduction will render many of the unit tests invalid. +The build process will generate updated ORT kernel registration and type reduction source files to exclude unused kernel implementations. +The generated files will be under the build directory and the original source files that they are based on are not directly modified. +When building, the generated files will be used instead of the original files. -NOTE: The operator exclusion logic when building with an operator reduction configuration file will only disable kernel registrations each time it runs. It will NOT re-enable previously disabled kernels. If you wish to change the list of kernels included, it is best to revert the repository to a clean state (e.g. via `git reset --hard`) before building ORT again. +The operator exclusion logic only runs during the build file generation (or "update") phase of the build process, i.e., when invoking build.py with no build phase arguments or explicitly with `--update`. + +Note: It is also possible to run the operator exclusion logic independently with [reduce_op_kernels.py](../tools/ci_build/reduce_op_kernels.py). This may be useful when building ORT without using build.py. +As the generated files will go into a build directory, the build directory must be provided with the reduce_op_kernels.py `--cmake_build_dir` argument. +Note that this argument is slightly different from the build.py `--build_dir` argument - build.py will append an additional directory for the build configuration to its `--build_dir` value to get the equivalent of `--cmake_build_dir`. ## Creating a configuration file with the required kernels -The script in `/tools/python/create_reduced_build_config.py` should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models. +The [create_reduced_build_config.py](../tools/python/create_reduced_build_config.py) script should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models. ``` create_reduced_build_config.py --help @@ -35,7 +41,7 @@ optional arguments: ### Type reduction -If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if `--enable_type_reduction` is specified. This can be used to further reduce the build size if `--enable_reduced_operator_type_support` is specified when building ORT. +If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if the `--enable_type_reduction` argument is specified. This can be used to further reduce the build size if the build.py `--enable_reduced_operator_type_support` argument is specified when building ORT. ONNX format models are not guaranteed to include the required per-node type information, so cannot be used with this option. diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 1f0ee6d17e..e9d22000b0 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "contrib_ops/cpu/cpu_contrib_kernels.h" #include "core/graph/constants.h" #include "core/mlas/inc/mlas.h" @@ -278,5 +268,3 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc index e2be2b8ab4..19b226858a 100644 --- a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_common.h" @@ -214,5 +204,3 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) { } // namespace cuda } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 1732170145..f015b53941 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/cpu/cpu_execution_provider.h" #include "core/framework/op_kernel.h" #include "core/framework/kernel_registry.h" @@ -2074,5 +2064,3 @@ std::unique_ptr CPUExecutionProvider::GetDataTransfer() const { return std::make_unique(); } } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index c676ff43a2..cd5e7ad6b9 100755 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_execution_provider.h" #include "core/providers/cuda/cuda_common.h" @@ -2363,5 +2353,3 @@ void CUDAExecutionProvider::RegisterAllocator(std::shared_ptr } } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/op_kernel_type_control.h b/onnxruntime/core/providers/op_kernel_type_control.h index 5d491d9dc9..446666eaa7 100644 --- a/onnxruntime/core/providers/op_kernel_type_control.h +++ b/onnxruntime/core/providers/op_kernel_type_control.h @@ -469,17 +469,4 @@ struct EnabledTypes { #include "core/framework/data_types.h" // for types that might be used in type specifications // all allowed type specifications should be contained in the following file - -// If we are building with reduced number of kernel registration and types -// will be copied to -// , -// where the type specifications code will be inserted, -// This will prevent, -// 1. Accidental commit of the modified -// 2. If the required ops and types config has changed, user has to revert the changes to -// -#ifndef REDUCED_OP_TYPE_SUPPORT #include "core/providers/op_kernel_type_control_overrides.inc" -#else -#include "core/providers/op_kernel_type_control_overrides_reduced_types.inc" -#endif diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc index 0aacbf14ce..a0acb9518e 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations + #include "core/common/logging/logging.h" #include "core/framework/compute_capability.h" #include "core/framework/utils.h" @@ -342,3 +344,5 @@ TEST(InternalTestingEP, DISABLED_TestNnapiPartitioningMlPerfModels) { } // namespace test } // namespace onnxruntime + +#endif // !defined(REDUCED_OPS_BUILD) diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc index f456809f0b..71032d560c 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations + #include "core/common/logging/logging.h" #include "core/framework/utils.h" #include "core/session/inference_session.h" @@ -354,3 +356,5 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) { } } // namespace test } // namespace onnxruntime + +#endif // !defined(REDUCED_OPS_BUILD) diff --git a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc index 8754305d95..800fd48221 100644 --- a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc +++ b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "orttraining/training_ops/cpu/cpu_training_kernels.h" #include "core/graph/constants.h" @@ -244,5 +234,3 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) { } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc index 7d84cb3f30..8d0882f2ee 100644 --- a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc +++ b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_fwd.h" #include "core/providers/cuda/cuda_pch.h" @@ -468,5 +458,3 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) { } // namespace cuda } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index f9c900295f..edb2433d7e 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -791,9 +791,6 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home args.minimal_build or args.use_extensions)) else "OFF"), "-Donnxruntime_REDUCED_OPS_BUILD=" + ("ON" if is_reduced_ops_build(args) else "OFF"), - "-Donnxruntime_REDUCED_OP_TYPE_SUPPORT=" + ( - "ON" if is_reduced_ops_build(args) and args.enable_reduced_operator_type_support - else "OFF"), "-Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=" + ("ON" if args.enable_language_interop_ops else "OFF"), "-Donnxruntime_USE_DML=" + ("ON" if args.use_dml else "OFF"), "-Donnxruntime_USE_WINML=" + ("ON" if args.use_winml else "OFF"), @@ -2035,13 +2032,6 @@ def main(): if args.skip_tests: args.test = False - if is_reduced_ops_build(args) and args.update: - from reduce_op_kernels import reduce_ops - reduce_ops( - config_path=args.include_ops_by_config, - enable_type_reduction=args.enable_reduced_operator_type_support, - use_cuda=args.use_cuda) - if args.use_tensorrt: args.use_cuda = True @@ -2127,10 +2117,21 @@ def main(): rocm_home = setup_rocm_build(args, configs) if args.update or args.build: - os.makedirs(build_dir, exist_ok=True) + for config in configs: + os.makedirs(get_config_build_dir(build_dir, config), exist_ok=True) log.info("Build started") + if args.update: + if is_reduced_ops_build(args): + from reduce_op_kernels import reduce_ops + for config in configs: + reduce_ops( + config_path=args.include_ops_by_config, + build_dir=get_config_build_dir(build_dir, config), + enable_type_reduction=args.enable_reduced_operator_type_support, + use_cuda=args.use_cuda) + cmake_extra_args = [] path_to_protoc_exe = args.path_to_protoc_exe if not args.skip_submodule_sync: diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml index f53e688df3..a273c590b6 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml @@ -110,10 +110,6 @@ jobs: --enable-custom-ops workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 3b. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED] and run tests inputs: @@ -132,10 +128,6 @@ jobs: --enable-type-reduction workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 4. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED (globally allowed types)] and run tests inputs: @@ -159,10 +151,6 @@ jobs: --skip-model-tests workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 5. Build onnxruntime minimal baseline for Android arm64-v8a and report binary size inputs: @@ -207,10 +195,6 @@ jobs: --build_id=$(Build.BuildId) workingDirectory: '$(Build.BinariesDirectory)' - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 6a. Build full onnxruntime with runtime optimizations enabled inputs: @@ -252,10 +236,6 @@ jobs: --cmake_extra_defines onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION=ON workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 7a. Regular build with python and all optional features disabled. inputs: diff --git a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh index 3664d8c114..0f91f1ae0d 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh @@ -17,7 +17,7 @@ python3 /onnxruntime_src/tools/ci_build/op_registration_validator.py # Run a full build of ORT. # We need the ORT python package to generate the ORT format files and the required ops config files. # We do not run tests in this command since those are covered by other CIs. -# We run two full builds here. One for enabling nnapi and the other for enabling coreml. +# Both the NNAPI and CoreML EPs are enabled. python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir ${BUILD_DIR} --cmake_generator Ninja \ --config Debug \ @@ -27,7 +27,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \ --skip_tests \ --enable_training_ops \ --enable_pybind --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp37-cp37m/include/python3.7m PYTHON_LIBRARY=/usr/lib64/librt.so \ - --use_nnapi\ + --use_nnapi \ --use_coreml # Run kernel def hash verification test diff --git a/tools/ci_build/reduce_op_kernels.py b/tools/ci_build/reduce_op_kernels.py index ae98907a46..dc5d578c21 100755 --- a/tools/ci_build/reduce_op_kernels.py +++ b/tools/ci_build/reduce_op_kernels.py @@ -4,7 +4,6 @@ import argparse import op_registration_utils -import os import shutil import sys import typing @@ -12,14 +11,13 @@ import typing from logger import get_logger from pathlib import Path -REDUCED_KERNEL_DEF_SUFFIX = '_reduced_ops' -REDUCED_TYPE_CONTROL_SUFFIX = '_reduced_types' +# directory containing the reduced op files, relative to the build directory +OP_REDUCTION_DIR = "op_reduction.generated" # add the path to /tools/python so we can import the config parsing and type reduction processing -script_path = os.path.dirname(os.path.realpath(__file__)) -ort_root = os.path.abspath(os.path.join(script_path, '..', '..', )) -ort_tools_py_path = os.path.abspath(os.path.join(ort_root, 'tools', 'python')) -sys.path.append(ort_tools_py_path) +SCRIPT_DIR = Path(__file__).parent.resolve() +ORT_ROOT = SCRIPT_DIR.parents[1] +sys.path.append(str(ORT_ROOT / 'tools' / 'python')) from util import parse_config # noqa from util.ort_format_model.operator_type_usage_processors import OpTypeImplFilterInterface # noqa @@ -96,24 +94,37 @@ class _ExcludingRegistrationProcessor(op_registration_utils.RegistrationProcesso return True -def _process_provider_registrations( - ort_root: str, use_cuda: bool, +def _get_op_reduction_file_path(ort_root: Path, build_dir: Path, original_path: typing.Optional[Path] = None): + ''' + Return the op reduction file path corresponding to `original_path` or the op reduction file root if unspecified. + Op reduction files are in a subdirectory of `build_dir` but otherwise share the same components of `original_path` + relative to `ort_root`. + ''' + op_reduction_root = Path(build_dir, OP_REDUCTION_DIR) + return (op_reduction_root / original_path.relative_to(ort_root)) if original_path is not None \ + else op_reduction_root + + +def _generate_provider_registrations( + ort_root: Path, build_dir: Path, use_cuda: bool, required_ops: typing.Optional[dict], op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface]): - '''Rewrite provider registration files.''' - kernel_registration_files = op_registration_utils.get_kernel_registration_files(ort_root, use_cuda) + '''Generate provider registration files.''' + kernel_registration_files = [Path(f) for f in + op_registration_utils.get_kernel_registration_files(str(ort_root), use_cuda)] for kernel_registration_file in kernel_registration_files: - if not os.path.isfile(kernel_registration_file): - raise ValueError('Kernel registration file {} does not exist'.format(kernel_registration_file)) + if not kernel_registration_file.is_file(): + raise ValueError(f'Kernel registration file does not exist: {kernel_registration_file}') log.info("Processing {}".format(kernel_registration_file)) - old_path = Path(kernel_registration_file) - reduced_path = Path(old_path.parent, f'{old_path.stem}{REDUCED_KERNEL_DEF_SUFFIX}{old_path.suffix}') + reduced_path = _get_op_reduction_file_path(ort_root, build_dir, kernel_registration_file) - # read from original and create the reduced kernel def file (*_reduced_ops.cc), - # with commented out lines for any kernels that are not required + reduced_path.parent.mkdir(parents=True, exist_ok=True) + + # read from original and create the reduced kernel def file with commented out lines for any kernels that are + # not required with open(reduced_path, 'w') as file_to_write: processor = _ExcludingRegistrationProcessor(required_ops, op_type_impl_filter, file_to_write) @@ -123,29 +134,24 @@ def _process_provider_registrations( # error should have already been logged so just exit sys.exit(-1) - # enable the contents in the *_reduced_ops.cc - with open(reduced_path, 'r+') as file: - file_content = file.read().replace(r'#ifndef REDUCED_OPS_BUILD', r'#ifdef REDUCED_OPS_BUILD') - with open(reduced_path, "w") as file_to_write: - file_to_write.write(file_content) - - -def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str]): +def _generate_type_control_overrides(ort_root: Path, build_dir: Path, cpp_lines: typing.Sequence[str]): ''' - Insert the C++ code to specify operator type requirements. + Generate type control overrides. Insert applicable C++ code to specify operator type requirements. :param ort_root: Root of the ONNX Runtime repository + :param build_dir: Path to the build directory :param cpp_lines: The C++ code to insert ''' - src = os.path.join(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc') - if not os.path.exists(src) or not os.path.isfile(src): - log.warning('Could not find {}. Skipping generation of C++ code to reduce the types supported by operators.' - .format(src)) - return + src = Path(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc') + + if not src.is_file(): + raise ValueError(f"Op kernel type control overrides file does not exist: {src}") + + # create a copy of op_kernel_type_control_overrides.inc + target = _get_op_reduction_file_path(ort_root, build_dir, src) + + target.parent.mkdir(parents=True, exist_ok=True) - # create a copy of the op_kernel_type_control_overrides.inc even the cpp_lines is empty - src_path = Path(src) - target = Path(src_path.parent, f'{src_path.stem}{REDUCED_TYPE_CONTROL_SUFFIX}{src_path.suffix}') shutil.copyfile(src, target) if cpp_lines: @@ -173,20 +179,29 @@ def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str] raise RuntimeError('Insertion point was not found in {}'.format(target)) -def reduce_ops(config_path: str, enable_type_reduction: bool = False, use_cuda: bool = True): +def reduce_ops(config_path: str, build_dir: str, enable_type_reduction: bool = False, use_cuda: bool = True): ''' Reduce op kernel implementations. :param config_path: Path to configuration file that specifies the ops to include + :param build_dir: Path to the build directory. The op reduction files will be generated under the build directory. :param enable_type_reduction: Whether per operator type reduction is enabled :param use_cuda: Whether to reduce op kernels for the CUDA provider ''' + build_dir = Path(build_dir).resolve() + build_dir.mkdir(parents=True, exist_ok=True) + required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction) - _process_provider_registrations(ort_root, use_cuda, required_ops, op_type_impl_filter) + # delete any existing generated files first + op_reduction_root = _get_op_reduction_file_path(ORT_ROOT, build_dir) + if op_reduction_root.is_dir(): + log.info(f"Deleting existing op reduction file root directory: {op_reduction_root}") + shutil.rmtree(op_reduction_root) - if enable_type_reduction: - type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else [] - _insert_type_control_cpp_code(ort_root, type_control_cpp_code) + _generate_provider_registrations(ORT_ROOT, build_dir, use_cuda, required_ops, op_type_impl_filter) + + type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else [] + _generate_type_control_overrides(ORT_ROOT, build_dir, type_control_cpp_code) if __name__ == "__main__": @@ -199,6 +214,19 @@ if __name__ == "__main__": "Create with /tools/python/create_reduced_build_config.py and edit if needed. " "See /docs/ONNX_Runtime_Format_Model_Usage.md for more information.") + parser.add_argument("--cmake_build_dir", type=str, required=True, + help="Path to the build directory. " + "The op reduction files will be generated under the build directory.") + + parser.add_argument("--enable_type_reduction", action="store_true", + help="Whether per operator type reduction is enabled.") + + parser.add_argument("--use_cuda", action="store_true", + help="Whether to reduce op kernels for the CUDA provider.") + args = parser.parse_args() - config_path = os.path.abspath(args.config_path) - reduce_ops(config_path, enable_type_reduction=True, use_cuda=True) + + reduce_ops(config_path=args.config_path, + build_dir=args.cmake_build_dir, + enable_type_reduction=args.enable_type_reduction, + use_cuda=args.use_cuda)