diff --git a/.gitignore b/.gitignore
index 418e30eed1..be2375d256 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,11 +60,3 @@ onnxruntime/python/version_info.py
 .envrc
 .psenvrc
 *.csproj.user
-# exclude generated reduced kernel registration and type control
-onnxruntime/contrib_ops/cpu/cpu_contrib_kernels_reduced_ops.cc
-onnxruntime/core/providers/cpu/cpu_execution_provider_reduced_ops.cc
-orttraining/orttraining/training_ops/cpu/cpu_training_kernels_reduced_ops.cc
-onnxruntime/contrib_ops/cuda/cuda_contrib_kernels_reduced_ops.cc
-onnxruntime/core/providers/cuda/cuda_execution_provider_reduced_ops.cc
-orttraining/orttraining/training_ops/cuda/cuda_training_kernels_reduced_ops.cc
-onnxruntime/core/providers/op_kernel_type_control_overrides_reduced_types.inc
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index adfaa4c11f..3c471ed49e 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -118,7 +118,6 @@ cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handlin
 option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF)
 option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF)
 option(onnxruntime_REDUCED_OPS_BUILD "Reduced set of kernels are registered in build via modification of the kernel registration source files." OFF)
-option(onnxruntime_REDUCED_OP_TYPE_SUPPORT "Limit the types individual operators support where possible to further reduce the build size." OFF)
 option(onnxruntime_DISABLE_EXTERNAL_INITIALIZERS "Don't allow models to load external data" OFF)
 cmake_dependent_option(onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION
                        "Enable runtime graph optimization of ORT format models. Warning: Not yet ready for general use."
@@ -390,9 +389,6 @@ endif()
 
 if (onnxruntime_REDUCED_OPS_BUILD)
   add_compile_definitions(REDUCED_OPS_BUILD)
-  if (onnxruntime_REDUCED_OP_TYPE_SUPPORT)
-    add_compile_definitions(REDUCED_OP_TYPE_SUPPORT)
-  endif()
 endif()
 
 if (onnxruntime_DISABLE_EXTERNAL_INITIALIZERS)
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index 90240c5f68..8fba42e3e8 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -1,6 +1,64 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+# Reduced ops build helpers
+
+# In a reduced ops build, the reduction is performed by updating source files.
+# Rather than modifying the source files directly, updated versions will be
+# saved to another location in the build directory: ${op_reduction_root}.
+set(op_reduction_root "${CMAKE_BINARY_DIR}/op_reduction.generated")
+
+# This helper function replaces the relevant original source files with their
+# updated, reduced ops versions in `all_srcs`.
+function(substitute_op_reduction_srcs all_srcs)
+  # files that are potentially updated in a reduced ops build
+  set(original_srcs
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc"
+    )
+
+  set(replacement_srcs)
+
+  foreach(original_src ${original_srcs})
+    string(FIND "${${all_srcs}}" "${original_src}" idx)
+    if(idx EQUAL "-1")
+      continue()
+    endif()
+
+    file(RELATIVE_PATH src_relative_path "${REPO_ROOT}" "${original_src}")
+    set(replacement_src "${op_reduction_root}/${src_relative_path}")
+
+    message("File '${original_src}' substituted with reduced op version '${replacement_src}'.")
+
+    string(REPLACE "${original_src}" "${replacement_src}" ${all_srcs} "${${all_srcs}}")
+
+    list(APPEND replacement_srcs "${replacement_src}")
+  endforeach()
+
+  if(replacement_srcs)
+    source_group(TREE "${op_reduction_root}" PREFIX "op_reduction.generated" FILES ${replacement_srcs})
+  endif()
+
+  set(${all_srcs} "${${all_srcs}}" PARENT_SCOPE)
+endfunction()
+
+# This helper function adds reduced ops build-specific include directories to
+# `target`.
+function(add_op_reduction_include_dirs target)
+  set(op_reduction_include_dirs "${op_reduction_root}/onnxruntime")
+  if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS)
+    list(APPEND op_reduction_include_dirs "${op_reduction_root}/orttraining")
+  endif()
+  # add include directories BEFORE so they are searched first, giving op reduction file paths precedence
+  target_include_directories(${target} BEFORE PRIVATE ${op_reduction_include_dirs})
+endfunction()
+
+
 file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc"
@@ -45,16 +103,10 @@ file(GLOB_RECURSE onnxruntime_rocm_generated_contrib_ops_cu_srcs CONFIGURE_DEPEN
   "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime/contrib_ops/rocm/*.cuh"
 )
 
-
 file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/*.cc"
-  # If we are building with reduced number of kernel registration and types,
-  # "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
-  # will be generated with type specifications code.
-  # For simplicity, we inlcude both .inc files,
-  # see onnxruntime/core/providers/op_kernel_type_control.h
-  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides*.inc"
+  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
 )
 
 if(onnxruntime_USE_NUPHAR)
@@ -176,7 +228,13 @@ if (onnxruntime_ENABLE_TRAINING)
   list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
 endif()
 
+if (onnxruntime_REDUCED_OPS_BUILD)
+  substitute_op_reduction_srcs(onnxruntime_providers_src)
+endif()
 onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
+if (onnxruntime_REDUCED_OPS_BUILD)
+  add_op_reduction_include_dirs(onnxruntime_providers)
+endif()
 
 if (MSVC)
    target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
@@ -323,7 +381,13 @@ if (onnxruntime_USE_CUDA)
     list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
   endif()
 
+  if (onnxruntime_REDUCED_OPS_BUILD)
+    substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
+  endif()
   onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
+  if (onnxruntime_REDUCED_OPS_BUILD)
+    add_op_reduction_include_dirs(onnxruntime_providers_cuda)
+  endif()
 
   #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"/analyze:stacksize 131072\">")
   if (HAS_GUARD_CF)
diff --git a/docs/Reduced_Operator_Kernel_build.md b/docs/Reduced_Operator_Kernel_build.md
index 02e761e5c6..e25f7d0478 100644
--- a/docs/Reduced_Operator_Kernel_build.md
+++ b/docs/Reduced_Operator_Kernel_build.md
@@ -4,17 +4,23 @@ In order to reduce the compiled binary size of ONNX Runtime (ORT), the operator
 
 A configuration file must be created with details of the kernels that are required.
 
-Following that, ORT must be manually built, providing the configuration file in the `--include_ops_by_config` parameter. The build process will update the ORT kernel registration source files to exclude the unused kernels.
+Following that, ORT must be manually built, providing the configuration file in the [build.py](../tools/ci_build/build.py) `--include_ops_by_config` argument.
 
 See the [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#build-instructions) for more details on building ORT.
 
-When building ORT with a reduced set of kernel registrations, `--skip_tests` **MUST** be specified as the kernel reduction will render many of the unit tests invalid.
+The build process will generate updated ORT kernel registration and type reduction source files to exclude unused kernel implementations.
+The generated files will be under the build directory and the original source files that they are based on are not directly modified.
+When building, the generated files will be used instead of the original files.
 
-NOTE: The operator exclusion logic when building with an operator reduction configuration file will only disable kernel registrations each time it runs. It will NOT re-enable previously disabled kernels. If you wish to change the list of kernels included, it is best to revert the repository to a clean state (e.g. via `git reset --hard`) before building ORT again.
+The operator exclusion logic only runs during the build file generation (or "update") phase of the build process, i.e., when invoking build.py with no build phase arguments or explicitly with `--update`.
+
+Note: It is also possible to run the operator exclusion logic independently with [reduce_op_kernels.py](../tools/ci_build/reduce_op_kernels.py). This may be useful when building ORT without using build.py.
+As the generated files will go into a build directory, the build directory must be provided with the reduce_op_kernels.py `--cmake_build_dir` argument.
+Note that this argument is slightly different from the build.py `--build_dir` argument - build.py will append an additional directory for the build configuration to its `--build_dir` value to get the equivalent of `--cmake_build_dir`.
 
 ## Creating a configuration file with the required kernels
 
-The script in `<ORT Root>/tools/python/create_reduced_build_config.py` should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.
+The [create_reduced_build_config.py](../tools/python/create_reduced_build_config.py) script should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.
 
 ```
 create_reduced_build_config.py --help
@@ -35,7 +41,7 @@ optional arguments:
 
 ### Type reduction
 
-If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if `--enable_type_reduction` is specified. This can be used to further reduce the build size if `--enable_reduced_operator_type_support` is specified when building ORT.
+If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if the `--enable_type_reduction` argument is specified. This can be used to further reduce the build size if the build.py `--enable_reduced_operator_type_support` argument is specified when building ORT.
 
 ONNX format models are not guaranteed to include the required per-node type information, so cannot be used with this option.
 
diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
index 1f0ee6d17e..e9d22000b0 100644
--- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "contrib_ops/cpu/cpu_contrib_kernels.h"
 #include "core/graph/constants.h"
 #include "core/mlas/inc/mlas.h"
@@ -278,5 +268,3 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
index e2be2b8ab4..19b226858a 100644
--- a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_common.h"
 
@@ -214,5 +204,3 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index 1732170145..f015b53941 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/cpu/cpu_execution_provider.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/kernel_registry.h"
@@ -2074,5 +2064,3 @@ std::unique_ptr<IDataTransfer> CPUExecutionProvider::GetDataTransfer() const {
   return std::make_unique<CPUDataTransfer>();
 }
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
index c676ff43a2..cd5e7ad6b9 100755
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_execution_provider.h"
 #include "core/providers/cuda/cuda_common.h"
@@ -2363,5 +2353,3 @@ void CUDAExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManager>
 }
 
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/op_kernel_type_control.h b/onnxruntime/core/providers/op_kernel_type_control.h
index 5d491d9dc9..446666eaa7 100644
--- a/onnxruntime/core/providers/op_kernel_type_control.h
+++ b/onnxruntime/core/providers/op_kernel_type_control.h
@@ -469,17 +469,4 @@ struct EnabledTypes {
 #include "core/framework/data_types.h"  // for types that might be used in type specifications
 
 // all allowed type specifications should be contained in the following file
-
-// If we are building with reduced number of kernel registration and types
-// <op_kernel_type_control_overrides.inc> will be copied to
-// <core/providers/op_kernel_type_control_overrides_reduced_types.inc>,
-// where the type specifications code will be inserted,
-// This will prevent,
-// 1. Accidental commit of the modified <op_kernel_type_control_overrides.inc>
-// 2. If the required ops and types config has changed, user has to revert the changes to
-//    <op_kernel_type_control_overrides.inc>
-#ifndef REDUCED_OP_TYPE_SUPPORT
 #include "core/providers/op_kernel_type_control_overrides.inc"
-#else
-#include "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
-#endif
diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc
index 0aacbf14ce..a0acb9518e 100644
--- a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc
+++ b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#if !defined(REDUCED_OPS_BUILD)  // may not work with excluded op kernel implementations
+
 #include "core/common/logging/logging.h"
 #include "core/framework/compute_capability.h"
 #include "core/framework/utils.h"
@@ -342,3 +344,5 @@ TEST(InternalTestingEP, DISABLED_TestNnapiPartitioningMlPerfModels) {
 
 }  // namespace test
 }  // namespace onnxruntime
+
+#endif  // !defined(REDUCED_OPS_BUILD)
diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
index f456809f0b..71032d560c 100644
--- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
+++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#if !defined(REDUCED_OPS_BUILD)  // may not work with excluded op kernel implementations
+
 #include "core/common/logging/logging.h"
 #include "core/framework/utils.h"
 #include "core/session/inference_session.h"
@@ -354,3 +356,5 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) {
 }
 }  // namespace test
 }  // namespace onnxruntime
+
+#endif  // !defined(REDUCED_OPS_BUILD)
diff --git a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc
index 8754305d95..800fd48221 100644
--- a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc
+++ b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "orttraining/training_ops/cpu/cpu_training_kernels.h"
 #include "core/graph/constants.h"
 
@@ -244,5 +234,3 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
index 7d84cb3f30..8d0882f2ee 100644
--- a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
+++ b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_fwd.h"
 #include "core/providers/cuda/cuda_pch.h"
@@ -468,5 +458,3 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace cuda
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index f9c900295f..edb2433d7e 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -791,9 +791,6 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
                                                      args.minimal_build or args.use_extensions))
                                                      else "OFF"),
         "-Donnxruntime_REDUCED_OPS_BUILD=" + ("ON" if is_reduced_ops_build(args) else "OFF"),
-        "-Donnxruntime_REDUCED_OP_TYPE_SUPPORT=" + (
-            "ON" if is_reduced_ops_build(args) and args.enable_reduced_operator_type_support
-            else "OFF"),
         "-Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=" + ("ON" if args.enable_language_interop_ops else "OFF"),
         "-Donnxruntime_USE_DML=" + ("ON" if args.use_dml else "OFF"),
         "-Donnxruntime_USE_WINML=" + ("ON" if args.use_winml else "OFF"),
@@ -2035,13 +2032,6 @@ def main():
     if args.skip_tests:
         args.test = False
 
-    if is_reduced_ops_build(args) and args.update:
-        from reduce_op_kernels import reduce_ops
-        reduce_ops(
-            config_path=args.include_ops_by_config,
-            enable_type_reduction=args.enable_reduced_operator_type_support,
-            use_cuda=args.use_cuda)
-
     if args.use_tensorrt:
         args.use_cuda = True
 
@@ -2127,10 +2117,21 @@ def main():
     rocm_home = setup_rocm_build(args, configs)
 
     if args.update or args.build:
-        os.makedirs(build_dir, exist_ok=True)
+        for config in configs:
+            os.makedirs(get_config_build_dir(build_dir, config), exist_ok=True)
 
     log.info("Build started")
+
     if args.update:
+        if is_reduced_ops_build(args):
+            from reduce_op_kernels import reduce_ops
+            for config in configs:
+                reduce_ops(
+                    config_path=args.include_ops_by_config,
+                    build_dir=get_config_build_dir(build_dir, config),
+                    enable_type_reduction=args.enable_reduced_operator_type_support,
+                    use_cuda=args.use_cuda)
+
         cmake_extra_args = []
         path_to_protoc_exe = args.path_to_protoc_exe
         if not args.skip_submodule_sync:
diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml
index f53e688df3..a273c590b6 100644
--- a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml
@@ -110,10 +110,6 @@ jobs:
               --enable-custom-ops
       workingDirectory: $(Build.SourcesDirectory)
 
-  - script: git checkout -- .
-    displayName: Discard local changes to Git repository files
-    workingDirectory: $(Build.SourcesDirectory)
-
   - task: CmdLine@2
     displayName: 3b. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED] and run tests
     inputs:
@@ -132,10 +128,6 @@ jobs:
               --enable-type-reduction
       workingDirectory: $(Build.SourcesDirectory)
 
-  - script: git checkout -- .
-    displayName: Discard local changes to Git repository files
-    workingDirectory: $(Build.SourcesDirectory)
-
   - task: CmdLine@2
     displayName: 4. Build minimal onnxruntime [exceptions ENABLED, type reduction ENABLED (globally allowed types)] and run tests
     inputs:
@@ -159,10 +151,6 @@ jobs:
               --skip-model-tests
       workingDirectory: $(Build.SourcesDirectory)
 
-  - script: git checkout -- .
-    displayName: Discard local changes to Git repository files
-    workingDirectory: $(Build.SourcesDirectory)
-
   - task: CmdLine@2
     displayName: 5. Build onnxruntime minimal baseline for Android arm64-v8a and report binary size
     inputs:
@@ -207,10 +195,6 @@ jobs:
           --build_id=$(Build.BuildId)
       workingDirectory: '$(Build.BinariesDirectory)'
 
-  - script: git checkout -- .
-    displayName: Discard local changes to Git repository files
-    workingDirectory: $(Build.SourcesDirectory)
-
   - task: CmdLine@2
     displayName: 6a. Build full onnxruntime with runtime optimizations enabled
     inputs:
@@ -252,10 +236,6 @@ jobs:
               --cmake_extra_defines onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION=ON
       workingDirectory: $(Build.SourcesDirectory)
 
-  - script: git checkout -- .
-    displayName: Discard local changes to Git repository files
-    workingDirectory: $(Build.SourcesDirectory)
-
   - task: CmdLine@2
     displayName: 7a. Regular build with python and all optional features disabled. 
     inputs:
diff --git a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh
index 3664d8c114..0f91f1ae0d 100755
--- a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh
+++ b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh
@@ -17,7 +17,7 @@ python3 /onnxruntime_src/tools/ci_build/op_registration_validator.py
 # Run a full build of ORT.
 # We need the ORT python package to generate the ORT format files and the required ops config files.
 # We do not run tests in this command since those are covered by other CIs.
-# We run two full builds here. One for enabling nnapi and the other for enabling coreml.
+# Both the NNAPI and CoreML EPs are enabled.
 python3 /onnxruntime_src/tools/ci_build/build.py \
     --build_dir ${BUILD_DIR} --cmake_generator Ninja \
     --config Debug \
@@ -27,7 +27,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \
     --skip_tests \
     --enable_training_ops \
     --enable_pybind --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp37-cp37m/include/python3.7m PYTHON_LIBRARY=/usr/lib64/librt.so \
-    --use_nnapi\
+    --use_nnapi \
     --use_coreml
 
 # Run kernel def hash verification test
diff --git a/tools/ci_build/reduce_op_kernels.py b/tools/ci_build/reduce_op_kernels.py
index ae98907a46..dc5d578c21 100755
--- a/tools/ci_build/reduce_op_kernels.py
+++ b/tools/ci_build/reduce_op_kernels.py
@@ -4,7 +4,6 @@
 
 import argparse
 import op_registration_utils
-import os
 import shutil
 import sys
 import typing
@@ -12,14 +11,13 @@ import typing
 from logger import get_logger
 from pathlib import Path
 
-REDUCED_KERNEL_DEF_SUFFIX = '_reduced_ops'
-REDUCED_TYPE_CONTROL_SUFFIX = '_reduced_types'
+# directory containing the reduced op files, relative to the build directory
+OP_REDUCTION_DIR = "op_reduction.generated"
 
 # add the path to /tools/python so we can import the config parsing and type reduction processing
-script_path = os.path.dirname(os.path.realpath(__file__))
-ort_root = os.path.abspath(os.path.join(script_path, '..', '..', ))
-ort_tools_py_path = os.path.abspath(os.path.join(ort_root, 'tools', 'python'))
-sys.path.append(ort_tools_py_path)
+SCRIPT_DIR = Path(__file__).parent.resolve()
+ORT_ROOT = SCRIPT_DIR.parents[1]
+sys.path.append(str(ORT_ROOT / 'tools' / 'python'))
 
 from util import parse_config  # noqa
 from util.ort_format_model.operator_type_usage_processors import OpTypeImplFilterInterface  # noqa
@@ -96,24 +94,37 @@ class _ExcludingRegistrationProcessor(op_registration_utils.RegistrationProcesso
         return True
 
 
-def _process_provider_registrations(
-        ort_root: str, use_cuda: bool,
+def _get_op_reduction_file_path(ort_root: Path, build_dir: Path, original_path: typing.Optional[Path] = None):
+    '''
+    Return the op reduction file path corresponding to `original_path` or the op reduction file root if unspecified.
+    Op reduction files are in a subdirectory of `build_dir` but otherwise share the same components of `original_path`
+    relative to `ort_root`.
+    '''
+    op_reduction_root = Path(build_dir, OP_REDUCTION_DIR)
+    return (op_reduction_root / original_path.relative_to(ort_root)) if original_path is not None \
+        else op_reduction_root
+
+
+def _generate_provider_registrations(
+        ort_root: Path, build_dir: Path, use_cuda: bool,
         required_ops: typing.Optional[dict],
         op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface]):
-    '''Rewrite provider registration files.'''
-    kernel_registration_files = op_registration_utils.get_kernel_registration_files(ort_root, use_cuda)
+    '''Generate provider registration files.'''
+    kernel_registration_files = [Path(f) for f in
+                                 op_registration_utils.get_kernel_registration_files(str(ort_root), use_cuda)]
 
     for kernel_registration_file in kernel_registration_files:
-        if not os.path.isfile(kernel_registration_file):
-            raise ValueError('Kernel registration file {} does not exist'.format(kernel_registration_file))
+        if not kernel_registration_file.is_file():
+            raise ValueError(f'Kernel registration file does not exist: {kernel_registration_file}')
 
         log.info("Processing {}".format(kernel_registration_file))
 
-        old_path = Path(kernel_registration_file)
-        reduced_path = Path(old_path.parent, f'{old_path.stem}{REDUCED_KERNEL_DEF_SUFFIX}{old_path.suffix}')
+        reduced_path = _get_op_reduction_file_path(ort_root, build_dir, kernel_registration_file)
 
-        # read from original and create the reduced kernel def file (*_reduced_ops.cc),
-        # with commented out lines for any kernels that are not required
+        reduced_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # read from original and create the reduced kernel def file with commented out lines for any kernels that are
+        # not required
         with open(reduced_path, 'w') as file_to_write:
             processor = _ExcludingRegistrationProcessor(required_ops, op_type_impl_filter, file_to_write)
 
@@ -123,29 +134,24 @@ def _process_provider_registrations(
                 # error should have already been logged so just exit
                 sys.exit(-1)
 
-        # enable the contents in the *_reduced_ops.cc
-        with open(reduced_path, 'r+') as file:
-            file_content = file.read().replace(r'#ifndef REDUCED_OPS_BUILD', r'#ifdef REDUCED_OPS_BUILD')
 
-        with open(reduced_path, "w") as file_to_write:
-            file_to_write.write(file_content)
-
-
-def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str]):
+def _generate_type_control_overrides(ort_root: Path, build_dir: Path, cpp_lines: typing.Sequence[str]):
     '''
-    Insert the C++ code to specify operator type requirements.
+    Generate type control overrides. Insert applicable C++ code to specify operator type requirements.
     :param ort_root: Root of the ONNX Runtime repository
+    :param build_dir: Path to the build directory
     :param cpp_lines: The C++ code to insert
     '''
-    src = os.path.join(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc')
-    if not os.path.exists(src) or not os.path.isfile(src):
-        log.warning('Could not find {}. Skipping generation of C++ code to reduce the types supported by operators.'
-                    .format(src))
-        return
+    src = Path(ort_root, 'onnxruntime', 'core', 'providers', 'op_kernel_type_control_overrides.inc')
+
+    if not src.is_file():
+        raise ValueError(f"Op kernel type control overrides file does not exist: {src}")
+
+    # create a copy of op_kernel_type_control_overrides.inc
+    target = _get_op_reduction_file_path(ort_root, build_dir, src)
+
+    target.parent.mkdir(parents=True, exist_ok=True)
 
-    # create a copy of the op_kernel_type_control_overrides.inc even the cpp_lines is empty
-    src_path = Path(src)
-    target = Path(src_path.parent, f'{src_path.stem}{REDUCED_TYPE_CONTROL_SUFFIX}{src_path.suffix}')
     shutil.copyfile(src, target)
 
     if cpp_lines:
@@ -173,20 +179,29 @@ def _insert_type_control_cpp_code(ort_root: str, cpp_lines: typing.Sequence[str]
             raise RuntimeError('Insertion point was not found in {}'.format(target))
 
 
-def reduce_ops(config_path: str, enable_type_reduction: bool = False, use_cuda: bool = True):
+def reduce_ops(config_path: str, build_dir: str, enable_type_reduction: bool = False, use_cuda: bool = True):
     '''
     Reduce op kernel implementations.
     :param config_path: Path to configuration file that specifies the ops to include
+    :param build_dir: Path to the build directory. The op reduction files will be generated under the build directory.
     :param enable_type_reduction: Whether per operator type reduction is enabled
     :param use_cuda: Whether to reduce op kernels for the CUDA provider
     '''
+    build_dir = Path(build_dir).resolve()
+    build_dir.mkdir(parents=True, exist_ok=True)
+
     required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction)
 
-    _process_provider_registrations(ort_root, use_cuda, required_ops, op_type_impl_filter)
+    # delete any existing generated files first
+    op_reduction_root = _get_op_reduction_file_path(ORT_ROOT, build_dir)
+    if op_reduction_root.is_dir():
+        log.info(f"Deleting existing op reduction file root directory: {op_reduction_root}")
+        shutil.rmtree(op_reduction_root)
 
-    if enable_type_reduction:
-        type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else []
-        _insert_type_control_cpp_code(ort_root, type_control_cpp_code)
+    _generate_provider_registrations(ORT_ROOT, build_dir, use_cuda, required_ops, op_type_impl_filter)
+
+    type_control_cpp_code = op_type_impl_filter.get_cpp_entries() if op_type_impl_filter is not None else []
+    _generate_type_control_overrides(ORT_ROOT, build_dir, type_control_cpp_code)
 
 
 if __name__ == "__main__":
@@ -199,6 +214,19 @@ if __name__ == "__main__":
                              "Create with <ORT root>/tools/python/create_reduced_build_config.py and edit if needed. "
                              "See /docs/ONNX_Runtime_Format_Model_Usage.md for more information.")
 
+    parser.add_argument("--cmake_build_dir", type=str, required=True,
+                        help="Path to the build directory. "
+                             "The op reduction files will be generated under the build directory.")
+
+    parser.add_argument("--enable_type_reduction", action="store_true",
+                        help="Whether per operator type reduction is enabled.")
+
+    parser.add_argument("--use_cuda", action="store_true",
+                        help="Whether to reduce op kernels for the CUDA provider.")
+
     args = parser.parse_args()
-    config_path = os.path.abspath(args.config_path)
-    reduce_ops(config_path, enable_type_reduction=True, use_cuda=True)
+
+    reduce_ops(config_path=args.config_path,
+               build_dir=args.cmake_build_dir,
+               enable_type_reduction=args.enable_type_reduction,
+               use_cuda=args.use_cuda)