diff --git a/.gitignore b/.gitignore index 418e30eed1..be2375d256 100644 --- a/.gitignore +++ b/.gitignore @@ -60,11 +60,3 @@ onnxruntime/python/version_info.py .envrc .psenvrc *.csproj.user -# exclude generated reduced kernel registration and type control -onnxruntime/contrib_ops/cpu/cpu_contrib_kernels_reduced_ops.cc -onnxruntime/core/providers/cpu/cpu_execution_provider_reduced_ops.cc -orttraining/orttraining/training_ops/cpu/cpu_training_kernels_reduced_ops.cc -onnxruntime/contrib_ops/cuda/cuda_contrib_kernels_reduced_ops.cc -onnxruntime/core/providers/cuda/cuda_execution_provider_reduced_ops.cc -orttraining/orttraining/training_ops/cuda/cuda_training_kernels_reduced_ops.cc -onnxruntime/core/providers/op_kernel_type_control_overrides_reduced_types.inc diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index adfaa4c11f..3c471ed49e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -118,7 +118,6 @@ cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handlin option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF) option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF) option(onnxruntime_REDUCED_OPS_BUILD "Reduced set of kernels are registered in build via modification of the kernel registration source files." OFF) -option(onnxruntime_REDUCED_OP_TYPE_SUPPORT "Limit the types individual operators support where possible to further reduce the build size." OFF) option(onnxruntime_DISABLE_EXTERNAL_INITIALIZERS "Don't allow models to load external data" OFF) cmake_dependent_option(onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION "Enable runtime graph optimization of ORT format models. Warning: Not yet ready for general use." @@ -390,9 +389,6 @@ endif() if (onnxruntime_REDUCED_OPS_BUILD) add_compile_definitions(REDUCED_OPS_BUILD) - if (onnxruntime_REDUCED_OP_TYPE_SUPPORT) - add_compile_definitions(REDUCED_OP_TYPE_SUPPORT) - endif() endif() if (onnxruntime_DISABLE_EXTERNAL_INITIALIZERS) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 90240c5f68..8fba42e3e8 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -1,6 +1,64 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +# Reduced ops build helpers + +# In a reduced ops build, the reduction is performed by updating source files. +# Rather than modifying the source files directly, updated versions will be +# saved to another location in the build directory: ${op_reduction_root}. +set(op_reduction_root "${CMAKE_BINARY_DIR}/op_reduction.generated") + +# This helper function replaces the relevant original source files with their +# updated, reduced ops versions in `all_srcs`. +function(substitute_op_reduction_srcs all_srcs) + # files that are potentially updated in a reduced ops build + set(original_srcs + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc" + "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc" + "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc" + "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc" + ) + + set(replacement_srcs) + + foreach(original_src ${original_srcs}) + string(FIND "${${all_srcs}}" "${original_src}" idx) + if(idx EQUAL "-1") + continue() + endif() + + file(RELATIVE_PATH src_relative_path "${REPO_ROOT}" "${original_src}") + set(replacement_src "${op_reduction_root}/${src_relative_path}") + + message("File '${original_src}' substituted with reduced op version '${replacement_src}'.") + + string(REPLACE "${original_src}" "${replacement_src}" ${all_srcs} "${${all_srcs}}") + + list(APPEND replacement_srcs "${replacement_src}") + endforeach() + + if(replacement_srcs) + source_group(TREE "${op_reduction_root}" PREFIX "op_reduction.generated" FILES ${replacement_srcs}) + endif() + + set(${all_srcs} "${${all_srcs}}" PARENT_SCOPE) +endfunction() + +# This helper function adds reduced ops build-specific include directories to +# `target`. +function(add_op_reduction_include_dirs target) + set(op_reduction_include_dirs "${op_reduction_root}/onnxruntime") + if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS) + list(APPEND op_reduction_include_dirs "${op_reduction_root}/orttraining") + endif() + # add include directories BEFORE so they are searched first, giving op reduction file paths precedence + target_include_directories(${target} BEFORE PRIVATE ${op_reduction_include_dirs}) +endfunction() + + file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h" "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc" @@ -45,16 +103,10 @@ file(GLOB_RECURSE onnxruntime_rocm_generated_contrib_ops_cu_srcs CONFIGURE_DEPEN "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime/contrib_ops/rocm/*.cuh" ) - file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/*.h" "${ONNXRUNTIME_ROOT}/core/providers/*.cc" - # If we are building with reduced number of kernel registration and types, - # "core/providers/op_kernel_type_control_overrides_reduced_types.inc" - # will be generated with type specifications code. - # For simplicity, we inlcude both .inc files, - # see onnxruntime/core/providers/op_kernel_type_control.h - "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides*.inc" + "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc" ) if(onnxruntime_USE_NUPHAR) @@ -176,7 +228,13 @@ if (onnxruntime_ENABLE_TRAINING) list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs}) endif() +if (onnxruntime_REDUCED_OPS_BUILD) + substitute_op_reduction_srcs(onnxruntime_providers_src) +endif() onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src}) +if (onnxruntime_REDUCED_OPS_BUILD) + add_op_reduction_include_dirs(onnxruntime_providers) +endif() if (MSVC) target_compile_options(onnxruntime_providers PRIVATE "/bigobj") @@ -323,7 +381,13 @@ if (onnxruntime_USE_CUDA) list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs}) endif() + if (onnxruntime_REDUCED_OPS_BUILD) + substitute_op_reduction_srcs(onnxruntime_providers_cuda_src) + endif() onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src}) + if (onnxruntime_REDUCED_OPS_BUILD) + add_op_reduction_include_dirs(onnxruntime_providers_cuda) + endif() #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler \"/analyze:stacksize 131072\">") if (HAS_GUARD_CF) diff --git a/docs/Reduced_Operator_Kernel_build.md b/docs/Reduced_Operator_Kernel_build.md index 02e761e5c6..e25f7d0478 100644 --- a/docs/Reduced_Operator_Kernel_build.md +++ b/docs/Reduced_Operator_Kernel_build.md @@ -4,17 +4,23 @@ In order to reduce the compiled binary size of ONNX Runtime (ORT), the operator A configuration file must be created with details of the kernels that are required. -Following that, ORT must be manually built, providing the configuration file in the `--include_ops_by_config` parameter. The build process will update the ORT kernel registration source files to exclude the unused kernels. +Following that, ORT must be manually built, providing the configuration file in the [build.py](../tools/ci_build/build.py) `--include_ops_by_config` argument. See the [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#build-instructions) for more details on building ORT. -When building ORT with a reduced set of kernel registrations, `--skip_tests` **MUST** be specified as the kernel reduction will render many of the unit tests invalid. +The build process will generate updated ORT kernel registration and type reduction source files to exclude unused kernel implementations. +The generated files will be under the build directory and the original source files that they are based on are not directly modified. +When building, the generated files will be used instead of the original files. -NOTE: The operator exclusion logic when building with an operator reduction configuration file will only disable kernel registrations each time it runs. It will NOT re-enable previously disabled kernels. If you wish to change the list of kernels included, it is best to revert the repository to a clean state (e.g. via `git reset --hard`) before building ORT again. +The operator exclusion logic only runs during the build file generation (or "update") phase of the build process, i.e., when invoking build.py with no build phase arguments or explicitly with `--update`. + +Note: It is also possible to run the operator exclusion logic independently with [reduce_op_kernels.py](../tools/ci_build/reduce_op_kernels.py). This may be useful when building ORT without using build.py. +As the generated files will go into a build directory, the build directory must be provided with the reduce_op_kernels.py `--cmake_build_dir` argument. +Note that this argument is slightly different from the build.py `--build_dir` argument - build.py will append an additional directory for the build configuration to its `--build_dir` value to get the equivalent of `--cmake_build_dir`. ## Creating a configuration file with the required kernels -The script in `/tools/python/create_reduced_build_config.py` should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models. +The [create_reduced_build_config.py](../tools/python/create_reduced_build_config.py) script should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models. ``` create_reduced_build_config.py --help @@ -35,7 +41,7 @@ optional arguments: ### Type reduction -If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if `--enable_type_reduction` is specified. This can be used to further reduce the build size if `--enable_reduced_operator_type_support` is specified when building ORT. +If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if the `--enable_type_reduction` argument is specified. This can be used to further reduce the build size if the build.py `--enable_reduced_operator_type_support` argument is specified when building ORT. ONNX format models are not guaranteed to include the required per-node type information, so cannot be used with this option. diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 1f0ee6d17e..e9d22000b0 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "contrib_ops/cpu/cpu_contrib_kernels.h" #include "core/graph/constants.h" #include "core/mlas/inc/mlas.h" @@ -278,5 +268,3 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc index e2be2b8ab4..19b226858a 100644 --- a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_common.h" @@ -214,5 +204,3 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) { } // namespace cuda } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 1732170145..f015b53941 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/cpu/cpu_execution_provider.h" #include "core/framework/op_kernel.h" #include "core/framework/kernel_registry.h" @@ -2074,5 +2064,3 @@ std::unique_ptr CPUExecutionProvider::GetDataTransfer() const { return std::make_unique(); } } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index c676ff43a2..cd5e7ad6b9 100755 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_execution_provider.h" #include "core/providers/cuda/cuda_common.h" @@ -2363,5 +2353,3 @@ void CUDAExecutionProvider::RegisterAllocator(std::shared_ptr } } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/onnxruntime/core/providers/op_kernel_type_control.h b/onnxruntime/core/providers/op_kernel_type_control.h index 5d491d9dc9..446666eaa7 100644 --- a/onnxruntime/core/providers/op_kernel_type_control.h +++ b/onnxruntime/core/providers/op_kernel_type_control.h @@ -469,17 +469,4 @@ struct EnabledTypes { #include "core/framework/data_types.h" // for types that might be used in type specifications // all allowed type specifications should be contained in the following file - -// If we are building with reduced number of kernel registration and types -// will be copied to -// , -// where the type specifications code will be inserted, -// This will prevent, -// 1. Accidental commit of the modified -// 2. If the required ops and types config has changed, user has to revert the changes to -// -#ifndef REDUCED_OP_TYPE_SUPPORT #include "core/providers/op_kernel_type_control_overrides.inc" -#else -#include "core/providers/op_kernel_type_control_overrides_reduced_types.inc" -#endif diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc index 0aacbf14ce..a0acb9518e 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations + #include "core/common/logging/logging.h" #include "core/framework/compute_capability.h" #include "core/framework/utils.h" @@ -342,3 +344,5 @@ TEST(InternalTestingEP, DISABLED_TestNnapiPartitioningMlPerfModels) { } // namespace test } // namespace onnxruntime + +#endif // !defined(REDUCED_OPS_BUILD) diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc index f456809f0b..71032d560c 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations + #include "core/common/logging/logging.h" #include "core/framework/utils.h" #include "core/session/inference_session.h" @@ -354,3 +356,5 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) { } } // namespace test } // namespace onnxruntime + +#endif // !defined(REDUCED_OPS_BUILD) diff --git a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc index 8754305d95..800fd48221 100644 --- a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc +++ b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "orttraining/training_ops/cpu/cpu_training_kernels.h" #include "core/graph/constants.h" @@ -244,5 +234,3 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) { } // namespace contrib } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc index 7d84cb3f30..8d0882f2ee 100644 --- a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc +++ b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// If we are building with reduced number of kernel registration, -// this file will be copied to _reduced_ops.cc, -// where the unused kernel registration will be commented out -// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD" -// This will prevent, -// 1. Accidental commit of the reduced kernel registration files -// 2. If the required ops config has changed, user has to revert the changes to -// the kernel registration files -#ifndef REDUCED_OPS_BUILD - #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_fwd.h" #include "core/providers/cuda/cuda_pch.h" @@ -468,5 +458,3 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) { } // namespace cuda } // namespace onnxruntime - -#endif // #ifndef REDUCED_OPS_BUILD diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index f9c900295f..edb2433d7e 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -791,9 +791,6 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home args.minimal_build or args.use_extensions)) else "OFF"), "-Donnxruntime_REDUCED_OPS_BUILD=" + ("ON" if is_reduced_ops_build(args) else "OFF"), - "-Donnxruntime_REDUCED_OP_TYPE_SUPPORT=" + ( - "ON" if is_reduced_ops_build(args) and args.enable_reduced_operator_type_support - else "OFF"), "-Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=" + ("ON" if args.enable_language_interop_ops else "OFF"), "-Donnxruntime_USE_DML=" + ("ON" if args.use_dml else "OFF"), "-Donnxruntime_USE_WINML=" + ("ON" if args.use_winml else "OFF"), @@ -2035,13 +2032,6 @@ def main(): if args.skip_tests: args.test = False - if is_reduced_ops_build(args) and args.update: - from reduce_op_kernels import reduce_ops - reduce_ops( - config_path=args.include_ops_by_config, - enable_type_reduction=args.enable_reduced_operator_type_support, - use_cuda=args.use_cuda) - if args.use_tensorrt: args.use_cuda = True @@ -2127,10 +2117,21 @@ def main(): rocm_home = setup_rocm_build(args, configs) if args.update or args.build: - os.makedirs(build_dir, exist_ok=True) + for config in configs: + os.makedirs(get_config_build_dir(build_dir, config), exist_ok=True) log.info("Build started") + if args.update: + if is_reduced_ops_build(args): + from reduce_op_kernels import reduce_ops + for config in configs: + reduce_ops( + config_path=args.include_ops_by_config, + build_dir=get_config_build_dir(build_dir, config), + enable_type_reduction=args.enable_reduced_operator_type_support, + use_cuda=args.use_cuda) + cmake_extra_args = [] path_to_protoc_exe = args.path_to_protoc_exe if not args.skip_submodule_sync: diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml index f53e688df3..a273c590b6 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml @@ -110,10 +110,6 @@ jobs: --enable-custom-ops workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 3b. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED] and run tests inputs: @@ -132,10 +128,6 @@ jobs: --enable-type-reduction workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 4. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED (globally allowed types)] and run tests inputs: @@ -159,10 +151,6 @@ jobs: --skip-model-tests workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 5. Build onnxruntime minimal baseline for Android arm64-v8a and report binary size inputs: @@ -207,10 +195,6 @@ jobs: --build_id=$(Build.BuildId) workingDirectory: '$(Build.BinariesDirectory)' - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 6a. Build full onnxruntime with runtime optimizations enabled inputs: @@ -252,10 +236,6 @@ jobs: --cmake_extra_defines onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION=ON workingDirectory: $(Build.SourcesDirectory) - - script: git checkout -- . - displayName: Discard local changes to Git repository files - workingDirectory: $(Build.SourcesDirectory) - - task: CmdLine@2 displayName: 7a. Regular build with python and all optional features disabled. inputs: diff --git a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh index 3664d8c114..0f91f1ae0d 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh @@ -17,7 +17,7 @@ python3 /onnxruntime_src/tools/ci_build/op_registration_validator.py # Run a full build of ORT. # We need the ORT python package to generate the ORT format files and the required ops config files. # We do not run tests in this command since those are covered by other CIs. -# We run two full builds here. One for enabling nnapi and the other for enabling coreml. +# Both the NNAPI and CoreML EPs are enabled. python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir ${BUILD_DIR} --cmake_generator Ninja \ --config Debug \ @@ -27,7 +27,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \ --skip_tests \ --enable_training_ops \ --enable_pybind --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp37-cp37m/include/python3.7m PYTHON_LIBRARY=/usr/lib64/librt.so \ - --use_nnapi\ + --use_nnapi \ --use_coreml # Run kernel def hash verification test diff --git a/tools/ci_build/reduce_op_kernels.py b/tools/ci_build/reduce_op_kernels.py index ae98907a46..dc5d578c21 100755 --- a/tools/ci_build/reduce_op_kernels.py +++ b/tools/ci_build/reduce_op_kernels.py @@ -4,7 +4,6 @@ import argparse import op_registration_utils -import os import shutil import sys import typing @@ -12,14 +11,13 @@ import typing from logger import get_logger from pathlib import Path -REDUCED_KERNEL_DEF_SUFFIX = '_reduced_ops' -REDUCED_TYPE_CONTROL_SUFFIX = '_reduced_types' +# directory containing the reduced op files, relative to the build directory +OP_REDUCTION_DIR = "op_reduction.generated" # add the path to /tools/python so we can import the config parsing and type reduction processing -script_path = os.path.dirname(os.path.realpath(__file__)) -ort_root = os.path.abspath(os.path.join(script_path, '..', '..', )) -ort_tools_py_path = os.path.abspath(os.path.join(ort_root, 'tools', 'python')) -sys.path.append(ort_tools_py_path) +SCRIPT_DIR = Path(__file__).parent.resolve() +ORT_ROOT = SCRIPT_DIR.parents[1] +sys.path.append(str(ORT_ROOT / 'tools' / 'python')) from util import parse_config # noqa from util.ort_format_model.operator_type_usage_processors import OpTypeImplFilterInterface # noqa @@ -96,24 +94,37 @@ class _ExcludingRegistrationProcessor(op_registration_utils.RegistrationProcesso return True -def _process_provider_registrations( - ort_root: str, use_cuda: bool, +def _get_op_reduction_file_path(ort_root: Path, build_dir: Path, original_path: typing.Optional[Path] = None): + ''' + Return the op reduction file path corresponding to `original_path` or the op reduction file root if unspecified. + Op reduction files are in a subdirectory of `build_dir` but otherwise share the same components of `original_path` + relative to `ort_root`. + ''' + op_reduction_root = Path(build_dir, OP_REDUCTION_DIR) + return (op_reduction_root / original_path.relative_to(ort_root)) if original_path is not None \ + else op_reduction_root + + +def _generate_provider_registrations( + ort_root: Path, build_dir: Path, use_cuda: bool, required_ops: typing.Optional[dict], op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface]): - '''Rewrite provider registration files.''' - kernel_registration_files = op_registration_utils.get_kernel_registration_files(ort_root, use_cuda) + '''Generate provider registration files.''' + kernel_registration_files = [Path(f) for f in + op_registration_utils.get_kernel_registration_files(str(ort_root), use_cuda)] for kernel_registration_file in kernel_registration_files: - if not os.path.isfile(kernel_registration_file): - raise ValueError('Kernel registration file {} does not exist'.format(kernel_registration_file)) + if not kernel_registration_file.is_file(): + raise ValueError(f'Kernel registration file does not exist: {kernel_registration_file}') log.info("Processing {}".format(kernel_registration_file)) - old_path = Path(kernel_registration_file) - reduced_path = Path(old_path.parent, f'{old_path.stem}{REDUCED_KERNEL_DEF_SUFFIX}{old_path.suffix}') + reduced_path = _get_op_reduction_file_path(ort_root, build_dir, kernel_registration_file) - # read from original and create the reduced kernel def file (*_reduced_ops.cc), - # with commented out lines for any kernels that are not required + reduced_path.parent.mkdir(parents=True, exist_ok=True) + + # read from original and create the reduced kernel def file with commented out lines for any kernels that are + # not required with open(reduced_path, 'w') as file_to_write: processor = _ExcludingRegistrationProcessor(required_ops, op_type_impl_filter, file_to_write) @@ -123,29 +134,24 @@ def _process_provider_registrations( # error should have already been logged so just exit sys.exit(-1) - # enable the contents in the *_reduced_ops.cc - with open(reduced_path, 'r+') as file: - file_content = file.read().replace(r'#ifndef REDUCED_OPS_BUILD', r'#ifdef REDUCED_OPS_BUILD') - with open(reduced_path, "w") as file_to_write: - file_to_write.write(file_content) - - -def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str]): +def _generate_type_control_overrides(ort_root: Path, build_dir: Path, cpp_lines: typing.Sequence[str]): ''' - Insert the C++ code to specify operator type requirements. + Generate type control overrides. Insert applicable C++ code to specify operator type requirements. :param ort_root: Root of the ONNX Runtime repository + :param build_dir: Path to the build directory :param cpp_lines: The C++ code to insert ''' - src = os.path.join(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc') - if not os.path.exists(src) or not os.path.isfile(src): - log.warning('Could not find {}. Skipping generation of C++ code to reduce the types supported by operators.' - .format(src)) - return + src = Path(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc') + + if not src.is_file(): + raise ValueError(f"Op kernel type control overrides file does not exist: {src}") + + # create a copy of op_kernel_type_control_overrides.inc + target = _get_op_reduction_file_path(ort_root, build_dir, src) + + target.parent.mkdir(parents=True, exist_ok=True) - # create a copy of the op_kernel_type_control_overrides.inc even the cpp_lines is empty - src_path = Path(src) - target = Path(src_path.parent, f'{src_path.stem}{REDUCED_TYPE_CONTROL_SUFFIX}{src_path.suffix}') shutil.copyfile(src, target) if cpp_lines: @@ -173,20 +179,29 @@ def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str] raise RuntimeError('Insertion point was not found in {}'.format(target)) -def reduce_ops(config_path: str, enable_type_reduction: bool = False, use_cuda: bool = True): +def reduce_ops(config_path: str, build_dir: str, enable_type_reduction: bool = False, use_cuda: bool = True): ''' Reduce op kernel implementations. :param config_path: Path to configuration file that specifies the ops to include + :param build_dir: Path to the build directory. The op reduction files will be generated under the build directory. :param enable_type_reduction: Whether per operator type reduction is enabled :param use_cuda: Whether to reduce op kernels for the CUDA provider ''' + build_dir = Path(build_dir).resolve() + build_dir.mkdir(parents=True, exist_ok=True) + required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction) - _process_provider_registrations(ort_root, use_cuda, required_ops, op_type_impl_filter) + # delete any existing generated files first + op_reduction_root = _get_op_reduction_file_path(ORT_ROOT, build_dir) + if op_reduction_root.is_dir(): + log.info(f"Deleting existing op reduction file root directory: {op_reduction_root}") + shutil.rmtree(op_reduction_root) - if enable_type_reduction: - type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else [] - _insert_type_control_cpp_code(ort_root, type_control_cpp_code) + _generate_provider_registrations(ORT_ROOT, build_dir, use_cuda, required_ops, op_type_impl_filter) + + type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else [] + _generate_type_control_overrides(ORT_ROOT, build_dir, type_control_cpp_code) if __name__ == "__main__": @@ -199,6 +214,19 @@ if __name__ == "__main__": "Create with /tools/python/create_reduced_build_config.py and edit if needed. " "See /docs/ONNX_Runtime_Format_Model_Usage.md for more information.") + parser.add_argument("--cmake_build_dir", type=str, required=True, + help="Path to the build directory. " + "The op reduction files will be generated under the build directory.") + + parser.add_argument("--enable_type_reduction", action="store_true", + help="Whether per operator type reduction is enabled.") + + parser.add_argument("--use_cuda", action="store_true", + help="Whether to reduce op kernels for the CUDA provider.") + args = parser.parse_args() - config_path = os.path.abspath(args.config_path) - reduce_ops(config_path, enable_type_reduction=True, use_cuda=True) + + reduce_ops(config_path=args.config_path, + build_dir=args.cmake_build_dir, + enable_type_reduction=args.enable_type_reduction, + use_cuda=args.use_cuda)