diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7462b57904e..10a92dcc7c2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,30 +1,27 @@
 cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-#cmake_policy(SET CMP0022 NEW)
-#cmake_policy(SET CMP0023 NEW)
+# cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW)
 
-# Use compiler ID "AppleClang" instead of "Clang" for XCode.
-# Not setting this sometimes makes XCode C compiler gets detected as "Clang",
-# even when the C++ one is detected as "AppleClang".
+# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this
+# sometimes makes XCode C compiler gets detected as "Clang", even when the C++
+# one is detected as "AppleClang".
 cmake_policy(SET CMP0010 NEW)
 cmake_policy(SET CMP0025 NEW)
 
 # Enables CMake to set LTO on compilers other than Intel.
 cmake_policy(SET CMP0069 NEW)
-# Enable the policy for CMake subprojects.
-# protobuf currently causes issues
-#set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
+# Enable the policy for CMake subprojects. protobuf currently causes issues
+# set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
 
-# Suppress warning flags in default MSVC configuration.  It's not
-# mandatory that we do this (and we don't if cmake is old), but it's
-# nice when it's possible, and it's possible on our Windows configs.
+# Suppress warning flags in default MSVC configuration.  It's not mandatory that
+# we do this (and we don't if cmake is old), but it's nice when it's possible,
+# and it's possible on our Windows configs.
 cmake_policy(SET CMP0092 NEW)
 
 # Prohibit in-source builds
 if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
-message(FATAL_ERROR "In-source build are not supported")
+  message(FATAL_ERROR "In-source build are not supported")
 endif()
 
-
 # ---[ Project and semantic versioning.
 project(Torch CXX C)
 
@@ -40,33 +37,49 @@ set(CMAKE_INSTALL_MESSAGE NEVER)
 string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
 if(env_cxx_standard GREATER -1)
   message(
-      WARNING "C++ standard version definition detected in environment variable."
-      "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.")
+    WARNING
+      "C++ standard version definition detected in environment variable."
+      "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment."
+  )
 endif()
-set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
-set(CMAKE_C_STANDARD   11 CACHE STRING "The C standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD
+    17
+    CACHE STRING
+          "The C++ standard whose features are requested to build this target.")
+set(CMAKE_C_STANDARD
+    11
+    CACHE STRING
+          "The C standard whose features are requested to build this target.")
 
 # ---[ Utils
 include(cmake/public/utils.cmake)
 
 # --- [ Check that minimal gcc version is 9.3+
 if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3)
-  message(FATAL_ERROR "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}")
+  message(
+    FATAL_ERROR
+      "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}"
+  )
 endif()
 
-# This define is needed to preserve behavior given anticpated changes to cccl/thrust
+# This define is needed to preserve behavior given anticpated changes to
+# cccl/thrust
 # https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
-string(APPEND CMAKE_CUDA_FLAGS " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
+string(APPEND CMAKE_CUDA_FLAGS
+       " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
 
 if(LINUX)
   include(cmake/CheckAbi.cmake)
-  string(APPEND CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
-  string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
+  string(APPEND CMAKE_CXX_FLAGS
+         " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
+  string(APPEND CMAKE_CUDA_FLAGS
+         " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
   if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
     set(CXX_STANDARD_REQUIRED ON)
   else()
-    # Please note this is required in order to ensure compatibility between gcc 9 and gcc 7
-    # This could be removed when all Linux PyTorch binary builds are compiled by the same toolchain again
+    # Please note this is required in order to ensure compatibility between gcc
+    # 9 and gcc 7 This could be removed when all Linux PyTorch binary builds are
+    # compiled by the same toolchain again
     append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS)
   endif()
 endif()
@@ -75,12 +88,10 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_LINK_WHAT_YOU_USE TRUE)
 
 # One variable that determines whether the current cmake process is being run
-# with the main Caffe2 library. This is useful for building modules - if
-# modules are built with the main Caffe2 library then one does not need to do
-# find caffe2 in the cmake script. One can usually guard it in some way like
-#    if(NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
-#      find_package(Caffe2 REQUIRED)
-#    endif()
+# with the main Caffe2 library. This is useful for building modules - if modules
+# are built with the main Caffe2 library then one does not need to do find
+# caffe2 in the cmake script. One can usually guard it in some way like if(NOT
+# CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) find_package(Caffe2 REQUIRED) endif()
 set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON)
 
 # Googletest's cmake files are going to set it on once they are processed. Let's
@@ -94,23 +105,26 @@ if(NOT DEFINED BLAS_SET_BY_USER)
     message(STATUS "Not forcing any particular BLAS to be found")
     set(BLAS_SET_BY_USER FALSE)
   endif()
-  set(BLAS_SET_BY_USER ${BLAS_SET_BY_USER} CACHE STRING "Marks whether BLAS was manually set by user or auto-detected")
+  set(BLAS_SET_BY_USER
+      ${BLAS_SET_BY_USER}
+      CACHE STRING
+            "Marks whether BLAS was manually set by user or auto-detected")
 endif()
 
 # Apple specific
 if(APPLE)
-  # These lines are an attempt to make find_package(cuda) pick up
-  # libcuda.dylib, and not cuda.framework.  It doesn't work all
-  # the time, but it seems to help for some users.
-  # TODO: replace this with a more robust fix
+  # These lines are an attempt to make find_package(cuda) pick up libcuda.dylib,
+  # and not cuda.framework.  It doesn't work all the time, but it seems to help
+  # for some users. TODO: replace this with a more robust fix
   set(CMAKE_FIND_FRAMEWORK LAST)
   set(CMAKE_FIND_APPBUNDLE LAST)
 
   # Get clang version on macOS
-  execute_process( COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version_string )
-  string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2" CLANG_VERSION_STRING ${clang_full_version_string})
-  message( STATUS "CLANG_VERSION_STRING:         " ${CLANG_VERSION_STRING} )
-
+  execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version
+                  OUTPUT_VARIABLE clang_full_version_string)
+  string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2"
+                       CLANG_VERSION_STRING ${clang_full_version_string})
+  message(STATUS "CLANG_VERSION_STRING:         " ${CLANG_VERSION_STRING})
 
   # RPATH stuff
   set(CMAKE_MACOSX_RPATH ON)
@@ -123,27 +137,40 @@ if(APPLE)
       OUTPUT_VARIABLE _macosx_sdk_version
       OUTPUT_STRIP_TRAILING_WHITESPACE)
     if(_exit_code EQUAL 0)
-        set(_MPS_supported_os_version OFF)
-        if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3)
-            set(_MPS_supported_os_version ON)
-        endif()
-        message(STATUS "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}")
-        execute_process(
-          COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
-          OUTPUT_VARIABLE _macosx_sdk_path
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-        set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
-        set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")
+      set(_MPS_supported_os_version OFF)
+      if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3)
+        set(_MPS_supported_os_version ON)
+      endif()
+      message(
+        STATUS
+          "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}"
+      )
+      execute_process(
+        COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
+        OUTPUT_VARIABLE _macosx_sdk_path
+        OUTPUT_STRIP_TRAILING_WHITESPACE)
+      set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
+      set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")
 
-        find_library(_MPS_fwrk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_FRAMEWORK_SEARCH_PATH} NO_DEFAULT_PATH)
-        find_library(_MPS_sdk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_SDK_SEARCH_PATH} NO_DEFAULT_PATH)
+      find_library(
+        _MPS_fwrk_path_
+        NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
+        PATHS ${_FRAMEWORK_SEARCH_PATH}
+        NO_DEFAULT_PATH)
+      find_library(
+        _MPS_sdk_path_
+        NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
+        PATHS ${_SDK_SEARCH_PATH}
+        NO_DEFAULT_PATH)
 
-        if(_MPS_supported_os_version AND _MPS_fwrk_path_ AND _MPS_sdk_path_)
-          set(MPS_FOUND ON)
-          message(STATUS "MPSGraph framework found")
-        else()
-          message(STATUS "MPSGraph framework not found")
-        endif()
+      if(_MPS_supported_os_version
+         AND _MPS_fwrk_path_
+         AND _MPS_sdk_path_)
+        set(MPS_FOUND ON)
+        message(STATUS "MPSGraph framework found")
+      else()
+        message(STATUS "MPSGraph framework not found")
+      endif()
     else()
       message(STATUS "MPS: unable to get MacOS sdk version")
       message(STATUS "MPSGraph framework not found")
@@ -160,66 +187,72 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
   set(CPU_AARCH64 ON)
 endif()
 
-
-# For non-supported platforms, turn USE_DISTRIBUTED off by default.
-# It is not tested and likely won't work without additional changes.
+# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
+# tested and likely won't work without additional changes.
 if(NOT LINUX AND NOT WIN32)
-  set(USE_DISTRIBUTED OFF CACHE STRING "Use distributed")
-  # On macOS, if USE_DISTRIBUTED is enabled (specified by the user),
-  # then make Gloo build with the libuv transport.
+  set(USE_DISTRIBUTED
+      OFF
+      CACHE STRING "Use distributed")
+  # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), then make
+  # Gloo build with the libuv transport.
   if(APPLE AND USE_DISTRIBUTED)
-    set(USE_LIBUV ON CACHE STRING "")
+    set(USE_LIBUV
+        ON
+        CACHE STRING "")
   endif()
 endif()
 
-# ---[ Options.
-# Note to developers: if you add an option below, make sure you also add it to
-# cmake/Summary.cmake so that the summary prints out the option values.
+# ---[ Options. Note to developers: if you add an option below, make sure you
+# also add it to cmake/Summary.cmake so that the summary prints out the option
+# values.
 include(CMakeDependentOption)
 option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
 option(BUILD_BINARY "Build C++ binaries" OFF)
 option(BUILD_DOCS "Build Caffe2 documentation" OFF)
-option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
+option(BUILD_CUSTOM_PROTOBUF
+       "Build and use Caffe2's own protobuf under third_party" ON)
 option(BUILD_PYTHON "Build Python binaries" ON)
 option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF)
 option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
 cmake_dependent_option(
-    CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
-    "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF)
+  CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
+  "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF)
 cmake_dependent_option(
-    CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
-    "NOT BUILD_SHARED_LIBS" OFF)
+  CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
+  "NOT BUILD_SHARED_LIBS" OFF)
 option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
 option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF)
-option(BUILD_STATIC_RUNTIME_BENCHMARK "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
-option(BUILD_MOBILE_BENCHMARK "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
-option(BUILD_MOBILE_TEST "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
+option(BUILD_STATIC_RUNTIME_BENCHMARK
+       "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
+option(
+  BUILD_MOBILE_BENCHMARK
+  "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
+  OFF)
+option(
+  BUILD_MOBILE_TEST
+  "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
+  OFF)
 option(BUILD_JNI "Build JNI bindings" OFF)
-option(BUILD_MOBILE_AUTOGRAD "Build autograd function in mobile build (in development)" OFF)
-cmake_dependent_option(
-    INSTALL_TEST "Install test binaries if BUILD_TEST is on" ON
-    "BUILD_TEST" OFF)
+option(BUILD_MOBILE_AUTOGRAD
+       "Build autograd function in mobile build (in development)" OFF)
+cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on"
+                       ON "BUILD_TEST" OFF)
 option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF)
 option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
 option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
 option(USE_TSAN "Use Thread Sanitizer" OFF)
 option(USE_CUDA "Use CUDA" ON)
+cmake_dependent_option(USE_XPU "Use XPU. Only available on Linux." ON "LINUX"
+                       OFF)
 cmake_dependent_option(
-    USE_XPU "Use XPU. Only available on Linux." ON
-    "LINUX" OFF)
-cmake_dependent_option(
-     BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
+  BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
+  "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
 cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
 option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
-cmake_dependent_option(
-    USE_CUDNN "Use cuDNN" ON
-    "USE_CUDA" OFF)
-cmake_dependent_option(
-    USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
-    "USE_CUDNN" OFF)
-cmake_dependent_option(
-    USE_CUSPARSELT "Use cuSPARSELt" ON
-    "USE_CUDA" OFF)
+cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
+cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
+                       "USE_CUDNN" OFF)
+cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
@@ -231,33 +264,25 @@ option(USE_MAGMA "Use MAGMA" ON)
 option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
 option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
-cmake_dependent_option(
-    USE_MPS "Use MPS for macOS build" ON
-    "MPS_FOUND" OFF)
-cmake_dependent_option(
-    USE_NCCL "Use NCCL" ON
-    "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
-cmake_dependent_option(USE_RCCL "Use RCCL" ON
-    USE_NCCL OFF)
-cmake_dependent_option(
-    USE_STATIC_NCCL "Use static NCCL" OFF
-    "USE_NCCL" OFF)
-cmake_dependent_option(
-    USE_SYSTEM_NCCL "Use system-wide NCCL" OFF
-    "USE_NCCL" OFF)
+cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
+cmake_dependent_option(USE_NCCL "Use NCCL" ON
+                       "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
+cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
+cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
+cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
+                       OFF)
 option(USE_NNAPI "Use NNAPI" OFF)
 option(USE_NNPACK "Use NNPACK" ON)
-cmake_dependent_option(
-    USE_NUMA "Use NUMA. Only available on Linux." ON
-    "LINUX" OFF)
-cmake_dependent_option(
-    USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." OFF
-    "USE_CUDA" OFF)
+cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX"
+                       OFF)
+cmake_dependent_option(USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on."
+                       OFF "USE_CUDA" OFF)
 option(USE_NUMPY "Use NumPy" ON)
 option(USE_OBSERVERS "Use observers module." OFF)
 option(USE_OPENCL "Use OpenCL" OFF)
 option(USE_OPENMP "Use OpenMP for parallel code" ON)
-option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build." OFF)
+option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build."
+       OFF)
 
 option(USE_PROF "Use profiling" OFF)
 option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
@@ -269,9 +294,7 @@ cmake_dependent_option(
     "LINUX" OFF)
 
 if(NOT DEFINED USE_VULKAN)
-  cmake_dependent_option(
-      USE_VULKAN "Use Vulkan GPU backend" ON
-      "ANDROID" OFF)
+  cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
 endif()
 
 option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
@@ -281,39 +304,49 @@ cmake_dependent_option(
   USE_LITE_AOTI "Include AOTI sources" OFF
   "BUILD_LITE_INTERPRETER" OFF)
 option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
-option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
+option(USE_VULKAN_RELAXED_PRECISION
+       "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
 # option USE_XNNPACK: try to enable xnnpack by default.
 option(USE_XNNPACK "Use XNNPACK" ON)
 option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF)
 # Ensure that an ITT build is the default for x86 CPUs
+cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
+                       ON "CPU_INTEL" OFF)
+# Ensure that an MKLDNN build is the default for x86 CPUs but optional for
+# AArch64 (dependent on -DUSE_MKLDNN).
 cmake_dependent_option(
-  USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
-  "CPU_INTEL" OFF)
-# Ensure that an MKLDNN build is the default for x86 CPUs
-# but optional for AArch64 (dependent on -DUSE_MKLDNN).
-cmake_dependent_option(
-  USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." "${CPU_INTEL}"
-  "CPU_INTEL OR CPU_AARCH64" OFF)
+  USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64."
+  "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF)
 cmake_dependent_option(
   USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
   "USE_MKLDNN AND CPU_AARCH64" OFF)
 set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
-cmake_dependent_option(
-    USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF
-    "USE_MKLDNN" OFF)
+cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN"
+                       OFF)
 option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
 option(USE_DISTRIBUTED "Use distributed" ON)
 cmake_dependent_option(
-    USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
-    "USE_DISTRIBUTED" OFF)
+  USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
+  "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(
-    USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF
-    "USE_DISTRIBUTED" OFF)
+  USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF
+  "USE_DISTRIBUTED" OFF)
+cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
+cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
+                       OFF)
 cmake_dependent_option(
-    USE_SYSTEM_UCC "Use system-wide UCC" OFF
-    "USE_UCC" OFF)
+  USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
+  "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(
-    USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC" OFF)
+  USE_GLOO_WITH_OPENSSL
+  "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
+  "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
+cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
+                       "USE_DISTRIBUTED;USE_GLOO" OFF)
+cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
+                       "USE_DISTRIBUTED;USE_NCCL" OFF)
+cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
+                       OFF)
 cmake_dependent_option(
     USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
     "USE_DISTRIBUTED" OFF)
@@ -334,22 +367,29 @@ cmake_dependent_option(
     USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
-option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
-cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
+option(BUILD_LIBTORCH_CPU_WITH_DEBUG
+       "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
+cmake_dependent_option(
+  USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
 option(WERROR "Build with -Werror supported by the compiler" OFF)
-option(DEBUG_CUDA "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)" OFF)
+option(
+  DEBUG_CUDA
+  "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)"
+  OFF)
 option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF)
-option(USE_PER_OPERATOR_HEADERS "Whether ATen should generate separate headers for each operator" ON)
+option(USE_PER_OPERATOR_HEADERS
+       "Whether ATen should generate separate headers for each operator" ON)
 cmake_dependent_option(
-    BUILD_LAZY_TS_BACKEND "Build the lazy Torchscript backend, not compatible with mobile builds" ON
-    "NOT INTERN_BUILD_MOBILE" OFF)
-cmake_dependent_option(
-    BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
-cmake_dependent_option(
-    BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF)
+  BUILD_LAZY_TS_BACKEND
+  "Build the lazy Torchscript backend, not compatible with mobile builds" ON
+  "NOT INTERN_BUILD_MOBILE" OFF)
+cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
+cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
+                       OFF "USE_CUDA" OFF)
 
 option(USE_MIMALLOC "Use mimalloc" OFF)
-# Enable third party mimalloc library to improve memory allocation performance on Windows.
+# Enable third party mimalloc library to improve memory allocation performance
+# on Windows.
 if(WIN32)
   set(USE_MIMALLOC ON)
 endif()
@@ -357,11 +397,20 @@ endif()
 if(USE_CCACHE)
   find_program(CCACHE_PROGRAM ccache)
   if(CCACHE_PROGRAM)
-    set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "C compiler launcher")
-    set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher")
-    set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher")
+    set(CMAKE_C_COMPILER_LAUNCHER
+        "${CCACHE_PROGRAM}"
+        CACHE STRING "C compiler launcher")
+    set(CMAKE_CXX_COMPILER_LAUNCHER
+        "${CCACHE_PROGRAM}"
+        CACHE STRING "CXX compiler launcher")
+    set(CMAKE_CUDA_COMPILER_LAUNCHER
+        "${CCACHE_PROGRAM}"
+        CACHE STRING "CUDA compiler launcher")
   else()
-    message(STATUS "Could not find ccache. Consider installing ccache to speed up compilation.")
+    message(
+      STATUS
+        "Could not find ccache. Consider installing ccache to speed up compilation."
+    )
   endif()
 endif()
 
@@ -383,8 +432,10 @@ if(WIN32)
       set(USE_DISTRIBUTED OFF)
       set(USE_GLOO OFF)
       message(
-        WARNING "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
-        "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv.")
+        WARNING
+          "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
+          "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv."
+      )
     else()
       set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
     endif()
@@ -392,12 +443,13 @@ if(WIN32)
 endif()
 
 if(USE_GLOO_WITH_OPENSSL)
-  set(USE_TCP_OPENSSL_LOAD ON CACHE STRING "")
+  set(USE_TCP_OPENSSL_LOAD
+      ON
+      CACHE STRING "")
 endif()
 
 # Linux distributions do not want too many embedded sources, in that sense we
-# need to be able to build pytorch with an (almost) empty third_party
-# directory.
+# need to be able to build pytorch with an (almost) empty third_party directory.
 # USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_*
 # variables on. Individual USE_SYSTEM_* variables can be toggled with
 # USE_SYSTEM_LIBS being "OFF".
@@ -437,61 +489,91 @@ if(USE_SYSTEM_LIBS)
 endif()
 
 # Used when building Caffe2 through setup.py
-option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" ON)
+option(BUILDING_WITH_TORCH_LIBS
+       "Tell cmake if Caffe2 is being built alongside torch libs" ON)
 
-# /Z7 override option
-# When generating debug symbols, CMake default to use the flag /Zi.
-# However, it is not compatible with sccache. So we rewrite it off.
+# /Z7 override option When generating debug symbols, CMake default to use the
+# flag /Zi. However, it is not compatible with sccache. So we rewrite it off.
 # But some users don't use sccache; this override is for them.
 cmake_dependent_option(
-  MSVC_Z7_OVERRIDE "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)" ON
-  "MSVC" OFF)
+  MSVC_Z7_OVERRIDE
+  "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)"
+  ON
+  "MSVC"
+  OFF)
 
 if(NOT USE_SYSTEM_ONNX)
-  set(ONNX_NAMESPACE "onnx_torch" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.")
+  set(ONNX_NAMESPACE
+      "onnx_torch"
+      CACHE
+        STRING
+        "A namespace for ONNX; needed to build with other frameworks that share ONNX."
+  )
 else()
-  set(ONNX_NAMESPACE "onnx" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.")
+  set(ONNX_NAMESPACE
+      "onnx"
+      CACHE
+        STRING
+        "A namespace for ONNX; needed to build with other frameworks that share ONNX."
+  )
 endif()
-set(SELECTED_OP_LIST "" CACHE STRING
-    "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.")
+set(SELECTED_OP_LIST
+    ""
+    CACHE
+      STRING
+      "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default."
+)
 option(
-    STATIC_DISPATCH_BACKEND
-    "Name of the backend for which static dispatch code is generated, e.g.: CPU."
-    "")
-option(USE_LIGHTWEIGHT_DISPATCH "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." OFF)
-if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
-  message(FATAL_ERROR "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
-endif()
+  STATIC_DISPATCH_BACKEND
+  "Name of the backend for which static dispatch code is generated, e.g.: CPU."
+  "")
 option(
-  TRACING_BASED
-  "Master flag to build Lite Interpreter with tracing build option"
+  USE_LIGHTWEIGHT_DISPATCH
+  "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly."
   OFF)
+if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
+  message(
+    FATAL_ERROR
+      "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
+endif()
+option(TRACING_BASED
+       "Master flag to build Lite Interpreter with tracing build option" OFF)
 option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
-# This is a fix for a rare build issue on Ubuntu:
-# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
+# This is a fix for a rare build issue on Ubuntu: symbol lookup error:
+# miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol:
+# mkl_blas_dsyrk
 # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
 if(LINUX)
-  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
+  set(CMAKE_SHARED_LINKER_FLAGS
+      "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
 endif()
 
 if(MSVC)
-  # MSVC by default does not apply the correct __cplusplus version as specified by the C++ standard
-  # because MSVC is not a completely compliant implementation. This option forces MSVC to use the
-  # appropriate value given the requested --std option. This fixes a compilation issue mismatch
-  # between GCC/Clang and MSVC.
+  # MSVC by default does not apply the correct __cplusplus version as specified
+  # by the C++ standard because MSVC is not a completely compliant
+  # implementation. This option forces MSVC to use the appropriate value given
+  # the requested --std option. This fixes a compilation issue mismatch between
+  # GCC/Clang and MSVC.
   #
-  # See:
-  # * https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170
+  # See: *
+  # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170
   # * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler  /Zc:__cplusplus")
 
   set(CMAKE_NINJA_CMCLDEPS_RC OFF)
-  foreach(flag_var
-      CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
-      CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
-      CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-      CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+  foreach(
+    flag_var
+    CMAKE_C_FLAGS
+    CMAKE_C_FLAGS_DEBUG
+    CMAKE_C_FLAGS_RELEASE
+    CMAKE_C_FLAGS_MINSIZEREL
+    CMAKE_C_FLAGS_RELWITHDEBINFO
+    CMAKE_CXX_FLAGS
+    CMAKE_CXX_FLAGS_DEBUG
+    CMAKE_CXX_FLAGS_RELEASE
+    CMAKE_CXX_FLAGS_MINSIZEREL
+    CMAKE_CXX_FLAGS_RELWITHDEBINFO)
     # Replace /Zi and /ZI with /Z7
     if(MSVC_Z7_OVERRIDE)
       if(${flag_var} MATCHES "/Z[iI]")
@@ -510,12 +592,12 @@ if(MSVC)
     endif()
 
     # /bigobj increases number of sections in .obj file, which is needed to link
-    # against libraries in Python 2.7 under Windows
-    # For Visual Studio generators, if /MP is not added, then we may need
-    # to add /MP to the flags.
+    # against libraries in Python 2.7 under Windows For Visual Studio
+    # generators, if /MP is not added, then we may need to add /MP to the flags.
     # For other generators like ninja, we don't need to add /MP because it is
     # already handled by the generator itself.
-    if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES "/MP")
+    if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES
+                                                   "/MP")
       set(${flag_var} "${${flag_var}} /MP /bigobj")
     else()
       set(${flag_var} "${${flag_var}} /bigobj")
@@ -523,37 +605,44 @@ if(MSVC)
   endforeach(flag_var)
 
   foreach(flag_var
-      CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
-      CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
+          CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
+          CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
     if(${flag_var} MATCHES "/Z[iI7]")
       string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}")
     endif()
   endforeach(flag_var)
 
-  foreach(flag_var
-      CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
-      CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
-      CMAKE_SHARED_LINKER_FLAGS_DEBUG CMAKE_STATIC_LINKER_FLAGS_DEBUG
-      CMAKE_EXE_LINKER_FLAGS_DEBUG CMAKE_MODULE_LINKER_FLAGS_DEBUG)
+  foreach(
+    flag_var
+    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+    CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
+    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
+    CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
+    CMAKE_SHARED_LINKER_FLAGS_DEBUG
+    CMAKE_STATIC_LINKER_FLAGS_DEBUG
+    CMAKE_EXE_LINKER_FLAGS_DEBUG
+    CMAKE_MODULE_LINKER_FLAGS_DEBUG)
     # Switch off incremental linking in debug/relwithdebinfo builds
-    if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES "/INCREMENTAL:NO")
-      string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} "${${flag_var}}")
+    if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES
+                                              "/INCREMENTAL:NO")
+      string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var}
+                           "${${flag_var}}")
     endif()
   endforeach(flag_var)
 
-  foreach(flag_var
-      CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
-      CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
+  foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
+                   CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
     string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099")
   endforeach(flag_var)
 
-  foreach(flag_var
-      CMAKE_SHARED_LINKER_FLAGS)
-    # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest filename
-    # explicitly helps fix the linker error when linking torch_python.dll. The manifest
-    # file would still be there in the correct format torch_python.dll.manifest
+  foreach(flag_var CMAKE_SHARED_LINKER_FLAGS)
+    # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest
+    # filename explicitly helps fix the linker error when linking
+    # torch_python.dll. The manifest file would still be there in the correct
+    # format torch_python.dll.manifest
     if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest")
-      string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var} "${${flag_var}}")
+      string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var}
+                           "${${flag_var}}")
     endif()
   endforeach(flag_var)
 
@@ -567,11 +656,12 @@ endif(MSVC)
 string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
 
 # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
-# applicable to mobile are disabled by this variable.
-# Setting `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can
-# force it to do mobile build with host toolchain - which is useful for testing
-# purpose.
-if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
+# applicable to mobile are disabled by this variable. Setting
+# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
+# to do mobile build with host toolchain - which is useful for testing purpose.
+if(ANDROID
+   OR IOS
+   OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
   set(INTERN_BUILD_MOBILE ON)
   message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND")
   set(BUILD_LAZY_TS_BACKEND OFF)
@@ -585,12 +675,11 @@ if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
   string(APPEND CMAKE_C_FLAGS " -fdata-sections")
 
   # Please note that the use of the following flags is required when linking
-  # against libtorch_cpu.a for mobile builds.
-  # -Wl,--whole-archive -ltorch_cpu -Wl,--no-whole-archive
+  # against libtorch_cpu.a for mobile builds. -Wl,--whole-archive -ltorch_cpu
+  # -Wl,--no-whole-archive
   #
-  # This allows global constructors to be included and run. Global
-  # constructors are used for operator/kernel registration with the
-  # PyTorch Dispatcher.
+  # This allows global constructors to be included and run. Global constructors
+  # are used for operator/kernel registration with the PyTorch Dispatcher.
 
   if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
     # C10_MOBILE is derived from Android/iOS toolchain macros in
@@ -599,10 +688,10 @@ if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
   endif()
 
   if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET})
-    # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var),
-    # then define C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the
-    # number of dispatch keys in OperatorEntry::dispatchTable_
-    # to reduce peak memory during library initialization.
+    # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), then define
+    # C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the number of dispatch keys in
+    # OperatorEntry::dispatchTable_ to reduce peak memory during library
+    # initialization.
     string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS")
   endif()
 endif()
@@ -614,8 +703,8 @@ if(NOT DEFINED USE_BLAS)
   set(USE_BLAS ON)
 endif()
 
-# Build libtorch mobile library, which contains ATen/TH ops and native support for
-# TorchScript model, but doesn't contain not-yet-unified caffe2 ops;
+# Build libtorch mobile library, which contains ATen/TH ops and native support
+# for TorchScript model, but doesn't contain not-yet-unified caffe2 ops;
 if(INTERN_BUILD_MOBILE)
   if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "")
     string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT")
@@ -636,8 +725,8 @@ if(INTERN_BUILD_MOBILE)
   else()
     set(INTERN_USE_EIGEN_BLAS OFF)
   endif()
-  # Disable developing mobile interpreter for actual mobile build.
-  # Enable it elsewhere to capture build error.
+  # Disable developing mobile interpreter for actual mobile build. Enable it
+  # elsewhere to capture build error.
   set(INTERN_DISABLE_MOBILE_INTERP ON)
 endif()
 
@@ -647,19 +736,23 @@ file(READ version.txt TORCH_DEFAULT_VERSION)
 string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}")
 if("${TORCH_DEFAULT_VERSION} " STREQUAL " ")
   message(WARNING "Could not get version from base 'version.txt'")
-  # If we can't get the version from the version file we should probably
-  # set it to something non-sensical like 0.0.0
+  # If we can't get the version from the version file we should probably set it
+  # to something non-sensical like 0.0.0
   set(TORCH_DEFAULT_VERSION, "0.0.0")
 endif()
-set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" CACHE STRING "Torch build version")
+set(TORCH_BUILD_VERSION
+    "${TORCH_DEFAULT_VERSION}"
+    CACHE STRING "Torch build version")
 if(DEFINED ENV{PYTORCH_BUILD_VERSION})
-  set(TORCH_BUILD_VERSION "$ENV{PYTORCH_BUILD_VERSION}"
-    CACHE STRING "Torch build version" FORCE)
+  set(TORCH_BUILD_VERSION
+      "$ENV{PYTORCH_BUILD_VERSION}"
+      CACHE STRING "Torch build version" FORCE)
 endif()
 if(NOT TORCH_BUILD_VERSION)
   # An empty string was specified so force version to the default
-  set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}"
-    CACHE STRING "Torch build version" FORCE)
+  set(TORCH_BUILD_VERSION
+      "${TORCH_DEFAULT_VERSION}"
+      CACHE STRING "Torch build version" FORCE)
 endif()
 caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
 caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION})
@@ -677,32 +770,53 @@ enable_testing()
 
 # ---[ Build variables set within the cmake tree
 include(cmake/BuildVariables.cmake)
-set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should build.")
+set(CAFFE2_ALLOWLIST
+    ""
+    CACHE STRING "A allowlist file of files that one should build.")
 
 # Set default build type
 if(NOT CMAKE_BUILD_TYPE)
-    message(STATUS "Build type not set - defaulting to Release")
-    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
+  message(STATUS "Build type not set - defaulting to Release")
+  set(CMAKE_BUILD_TYPE
+      "Release"
+      CACHE
+        STRING
+        "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage."
+        FORCE)
 endif()
 
 # The below means we are cross compiling for arm64 or x86_64 on MacOSX
-if(NOT IOS AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
+if(NOT IOS
+   AND CMAKE_SYSTEM_NAME STREQUAL "Darwin"
+   AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
   set(CROSS_COMPILING_MACOSX TRUE)
-  # We need to compile a universal protoc to not fail protobuf build
-  # We set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed the cmake compiler check for cross-compiling
-  set(protoc_build_command "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1")
-  # We write to a temp scriptfile because CMake COMMAND dislikes double quotes in commands
-  file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh "#!/bin/bash\n${protoc_build_command}")
-  file(COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh DESTINATION ${PROJECT_SOURCE_DIR}/scripts/ FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ)
-  execute_process(COMMAND ./scripts/tmp_protoc_script.sh
-                  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                  RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT)
-  file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh)
+  # We need to compile a universal protoc to not fail protobuf build We set
+  # CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed
+  # the cmake compiler check for cross-compiling
+  set(protoc_build_command
+      "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1"
+  )
+  # We write to a temp scriptfile because CMake COMMAND dislikes double quotes
+  # in commands
+  file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
+       "#!/bin/bash\n${protoc_build_command}")
+  file(
+    COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
+    DESTINATION ${PROJECT_SOURCE_DIR}/scripts/
+    FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ)
+  execute_process(
+    COMMAND ./scripts/tmp_protoc_script.sh
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+    RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT)
+  file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
+       ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh)
   if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0")
     message(FATAL_ERROR "Could not compile universal protoc.")
   endif()
-  set(PROTOBUF_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
-  set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
+  set(PROTOBUF_PROTOC_EXECUTABLE
+      "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
+  set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE
+      "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
 endif()
 
 # ---[ Misc checks to cope with various compiler modes
@@ -711,9 +825,12 @@ include(cmake/MiscCheck.cmake)
 # External projects
 include(ExternalProject)
 
-# ---[ Dependencies
-# ---[ FBGEMM doesn't work on x86 32bit and CMAKE_SYSTEM_PROCESSOR thinks its 64bit
-if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL 4) OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
+# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
+# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
+if(USE_FBGEMM
+   AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
+                                                      4)
+        OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
   set(USE_FBGEMM OFF)
 endif()
 
@@ -724,16 +841,15 @@ if(MSVC)
   append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS)
 endif()
 
-# Note for ROCM platform:
-# 1. USE_ROCM is always ON until include(cmake/Dependencies.cmake)
-# 2. USE_CUDA will become OFF during re-configuration
-# Truth Table:
-# CUDA 1st pass: USE_CUDA=True;USE_ROCM=True, FLASH evaluates to ON by default
-# CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False, FLASH evaluates to ON by default
-# ROCM 1st pass: USE_CUDA=True;USE_ROCM=True, FLASH evaluates to ON by default
-# ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True, FLASH evaluates to ON by default
-# CPU 1st pass: USE_CUDA=False(Cmd Option);USE_ROCM=True, FLASH evaluates to OFF by default
-# CPU 2nd pass: USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default
+# Note for ROCM platform: 1. USE_ROCM is always ON until
+# include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during
+# re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True,
+# FLASH evaluates to ON by default CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False,
+# FLASH evaluates to ON by default ROCM 1st pass: USE_CUDA=True;USE_ROCM=True,
+# FLASH evaluates to ON by default ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True,
+# FLASH evaluates to ON by default CPU 1st pass: USE_CUDA=False(Cmd
+# Option);USE_ROCM=True, FLASH evaluates to OFF by default CPU 2nd pass:
+# USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default
 # Thus we cannot tell ROCM 2nd pass and CPU 1st pass
 #
 # The only solution is to include(cmake/Dependencies.cmake), and defer the
@@ -744,35 +860,34 @@ include(cmake/Dependencies.cmake)
 cmake_dependent_option(
   USE_FLASH_ATTENTION
   "Whether to build the flash_attention kernel for scaled dot product attention.\
-  Will be disabled if not supported by the platform" ON
-  "USE_CUDA OR USE_ROCM;NOT MSVC" OFF)
+  Will be disabled if not supported by the platform"
+  ON
+  "USE_CUDA OR USE_ROCM;NOT MSVC"
+  OFF)
 
-# We are currenlty not using alibi attention for Flash
-# So we disable this feature by default
-# We dont currently document this feature because we don't
+# We are currenlty not using alibi attention for Flash So we disable this
+# feature by default We dont currently document this feature because we don't
 # Suspect users building from source will need this
 add_definitions(-DFLASHATTENTION_DISABLE_ALIBI)
 
-# CAVEAT: Again, do not check USE_ROCM here
-# Flash Attention2 will error while building for sm52 while Mem Eff Attention won't
+# CAVEAT: Again, do not check USE_ROCM here Flash Attention2 will error while
+# building for sm52 while Mem Eff Attention won't
 cmake_dependent_option(
   USE_MEM_EFF_ATTENTION
   "Enable memory-efficient attention for scaled dot product attention.\
-  Will be disabled if not supported by the platform" ON
-  "USE_CUDA" OFF)
+  Will be disabled if not supported by the platform" ON "USE_CUDA" OFF)
 
 if(DEBUG_CUDA)
   string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")
   string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo")
-  # CUDA-12.1 crashes when trying to compile with --source-in-ptx
-  # See https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893
+  # CUDA-12.1 crashes when trying to compile with --source-in-ptx See
+  # https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1)
     string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx")
     string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx")
   endif()
 endif(DEBUG_CUDA)
 
-
 if(USE_FBGEMM)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
 endif()
@@ -836,12 +951,15 @@ include(cmake/Allowlist.cmake)
 
 # ---[ Set link flag, handle additional deps for gcc 4.8 and above
 if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
-  message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line")
+  message(
+    STATUS
+      "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line"
+  )
   list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc)
 endif()
 
-# ---[ Build flags
-# Re-include to override append_cxx_flag_if_supported from third_party/FBGEMM
+# ---[ Build flags Re-include to override append_cxx_flag_if_supported from
+# third_party/FBGEMM
 include(cmake/public/utils.cmake)
 if(NOT MSVC)
   string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
@@ -855,7 +973,8 @@ if(NOT MSVC)
   append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-missing-field-initializers" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-missing-field-initializers"
+                               CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
@@ -868,13 +987,16 @@ if(NOT MSVC)
   append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Winconsistent-missing-override" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Winconsistent-missing-override"
+                               CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override"
+                               CMAKE_CXX_FLAGS)
   if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed")
   endif()
   if(CMAKE_COMPILER_IS_GNUCXX)
-    # Suppress "The ABI for passing parameters with 64-byte alignment has changed in GCC 4.6"
+    # Suppress "The ABI for passing parameters with 64-byte alignment has
+    # changed in GCC 4.6"
     string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi")
   endif()
 
@@ -886,37 +1008,46 @@ if(NOT MSVC)
       message(WARNING "Refusing to use gold when USE_MPI=1")
     else()
       execute_process(
-        COMMAND
-        "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version
-         ERROR_QUIET
-         OUTPUT_VARIABLE LD_VERSION)
+        COMMAND "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version
+        ERROR_QUIET
+        OUTPUT_VARIABLE LD_VERSION)
       if(NOT "${LD_VERSION}" MATCHES "GNU gold")
-        message(WARNING "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off")
+        message(
+          WARNING
+            "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off"
+        )
         set(USE_GOLD_LINKER OFF)
       else()
         message(STATUS "ld.gold is available, using it to link")
         set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
-        set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold")
+        set(CMAKE_SHARED_LINKER_FLAGS
+            "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
+        set(CMAKE_MODULE_LINKER_FLAGS
+            "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold")
       endif()
     endif()
   endif()
 
   append_cxx_flag_if_supported("-Wno-error=pedantic" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-override" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-destructor-override" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-override"
+                               CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported(
+    "-Wno-error=inconsistent-missing-destructor-override" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-invalid-partial-specialization" CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-invalid-partial-specialization"
+                               CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable"
+                               CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)
 
   if(${USE_COLORIZE_OUTPUT})
-    # Why compiler checks are necessary even when `try_compile` is used
-    # Because of the bug in ccache that can incorrectly identify `-fcolor-diagnostics`
-    # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for older ccache)
-    # and https://github.com/ccache/ccache/issues/1275 (for newer ones)
+    # Why compiler checks are necessary even when `try_compile` is used Because
+    # of the bug in ccache that can incorrectly identify `-fcolor-diagnostics`
+    # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for
+    # older ccache) and https://github.com/ccache/ccache/issues/1275 (for newer
+    # ones)
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
       append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS)
     else()
@@ -948,8 +1079,8 @@ else()
   add_compile_definitions(_UCRT_LEGACY_INFINITY)
   # disable min/max macros
   add_compile_definitions(NOMINMAX)
-  # Turn off these warnings on Windows.
-  # destructor was implicitly defined as delete
+  # Turn off these warnings on Windows. destructor was implicitly defined as
+  # delete
   append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS)
   # unknown pragma
   append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS)
@@ -969,10 +1100,10 @@ else()
   append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS)
 endif()
 
-
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
   include(CheckCSourceCompiles)
-  check_c_source_compiles("#include <arm_neon.h>
+  check_c_source_compiles(
+    "#include <arm_neon.h>
 int main() {
   float a[] = {1.0, 1.0};
   float32x4x2_t v;
@@ -980,7 +1111,8 @@ int main() {
   v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
   vst1q_f32_x2(a, v);
   return 0;
-}" HAS_VST1)
+}"
+    HAS_VST1)
 
   if(NOT HAS_VST1)
     string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1")
@@ -989,47 +1121,60 @@ endif()
 
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
   include(CheckCSourceCompiles)
-  check_c_source_compiles("#include <arm_neon.h>
+  check_c_source_compiles(
+    "#include <arm_neon.h>
 int main() {
   float a[] = {1.0, 1.0};
   vld1q_f32_x2(a);
   return 0;
-}" HAS_VLD1)
+}"
+    HAS_VLD1)
 
   if(NOT HAS_VLD1)
     string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
   endif()
 endif()
 
-
 # Add code coverage flags to supported compilers
 if(USE_CPP_CODE_COVERAGE)
   if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    string(APPEND CMAKE_C_FLAGS  " --coverage -fprofile-abs-path")
-    string(APPEND CMAKE_CXX_FLAGS  " --coverage -fprofile-abs-path")
+    string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path")
+    string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path")
   elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-    string(APPEND CMAKE_C_FLAGS  " -fprofile-instr-generate -fcoverage-mapping")
-    string(APPEND CMAKE_CXX_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
+    string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
+    string(APPEND CMAKE_CXX_FLAGS
+           " -fprofile-instr-generate -fcoverage-mapping")
   else()
-    message(ERROR "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported")
+    message(
+      ERROR
+      "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported")
   endif()
 
 endif()
 
 if(APPLE)
-    if(USE_MPS)
-      string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc")
-      string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS")
-      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal")
-      # To suppress MPSGraph availability warnings
-      append_cxx_flag_if_supported("-Wno-unguarded-availability-new" CMAKE_OBJCXX_FLAGS)
-    endif()
-    append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
-    append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
+  if(USE_MPS)
+    string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc")
+    string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS")
+    string(
+      APPEND
+      CMAKE_SHARED_LINKER_FLAGS
+      " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal"
+    )
+    # To suppress MPSGraph availability warnings
+    append_cxx_flag_if_supported("-Wno-unguarded-availability-new"
+                                 CMAKE_OBJCXX_FLAGS)
+  endif()
+  append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
 endif()
 
 if(EMSCRIPTEN)
-  string(APPEND CMAKE_CXX_FLAGS " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0")
+  string(
+    APPEND
+    CMAKE_CXX_FLAGS
+    " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0"
+  )
 endif()
 
 append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
@@ -1048,15 +1193,13 @@ if(NOT APPLE AND UNIX)
   list(APPEND Caffe2_DEPENDENCY_LIBS dl)
 endif()
 
-# Prefix path to Caffe2 headers.
-# If a directory containing installed Caffe2 headers was inadvertently
-# added to the list of include directories, prefixing
+# Prefix path to Caffe2 headers. If a directory containing installed Caffe2
+# headers was inadvertently added to the list of include directories, prefixing
 # PROJECT_SOURCE_DIR means this source tree always takes precedence.
 include_directories(BEFORE ${PROJECT_SOURCE_DIR})
 
-# Prefix path to generated Caffe2 headers.
-# These need to take precedence over their empty counterparts located
-# in PROJECT_SOURCE_DIR.
+# Prefix path to generated Caffe2 headers. These need to take precedence over
+# their empty counterparts located in PROJECT_SOURCE_DIR.
 include_directories(BEFORE ${PROJECT_BINARY_DIR})
 
 include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
@@ -1096,107 +1239,108 @@ if(BUILD_DOCS)
     configure_file(${DOXYGEN_C_IN} ${DOXYGEN_C_OUT} @ONLY)
     configure_file(${DOXYGEN_P_IN} ${DOXYGEN_P_OUT} @ONLY)
 
-    add_custom_target(doc_doxygen_c ALL
-        COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT}
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-        COMMENT "Generating C++ API documentation with Doxygen"
-        VERBATIM)
+    add_custom_target(
+      doc_doxygen_c ALL
+      COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT}
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+      COMMENT "Generating C++ API documentation with Doxygen"
+      VERBATIM)
 
-    add_custom_target(doc_doxygen_python ALL
-        COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT}
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-        COMMENT "Generating Python API documentation with Doxygen"
-        VERBATIM)
+    add_custom_target(
+      doc_doxygen_python ALL
+      COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT}
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+      COMMENT "Generating Python API documentation with Doxygen"
+      VERBATIM)
   else()
-    message(FATAL_ERROR "Doxygen needs to be installed to generate the documentation")
+    message(
+      FATAL_ERROR "Doxygen needs to be installed to generate the documentation")
   endif()
 endif()
 
-# ---[ CMake related files
-# Uninistall option.
+# ---[ CMake related files Uninistall option.
 if(NOT TARGET caffe2_uninstall)
   configure_file(
-      ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
-      ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
-      IMMEDIATE @ONLY)
+    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake IMMEDIATE @ONLY)
 
-  add_custom_target(caffe2_uninstall
-      COMMAND ${CMAKE_COMMAND} -P
-      ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
+  add_custom_target(
+    caffe2_uninstall COMMAND ${CMAKE_COMMAND} -P
+                             ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
 endif()
 
-# ---[ Make configuration files for cmake to allow dependent libraries
-# easier access to Caffe2.
+# ---[ Make configuration files for cmake to allow dependent libraries easier
+# access to Caffe2.
 
-if((NOT USE_GLOG) OR (NOT USE_GFLAGS) OR BUILD_CUSTOM_PROTOBUF)
-  message(WARNING
-      "Generated cmake files are only fully tested if one builds "
-      "with system glog, gflags, and protobuf. Other settings may "
-      "generate files that are not well tested.")
+if((NOT USE_GLOG)
+   OR(NOT USE_GFLAGS)
+   OR BUILD_CUSTOM_PROTOBUF)
+  message(WARNING "Generated cmake files are only fully tested if one builds "
+                  "with system glog, gflags, and protobuf. Other settings may "
+                  "generate files that are not well tested.")
 endif()
 
 if(USE_CUDA OR USE_ROCM)
-  # TODO: check if we should include other cuda dependency libraries
-  # to the interface as well.
+  # TODO: check if we should include other cuda dependency libraries to the
+  # interface as well.
 
 endif()
 
-# Note(jiayq): when building static libraries, all PRIVATE dependencies
-# will also become interface libraries, and as a result if there are any
-# dependency libraries that are not exported, the following install export
-# script will fail. As a result, we will only provide the targets cmake
-# files for shared lib installation. For more info, read:
+# Note(jiayq): when building static libraries, all PRIVATE dependencies will
+# also become interface libraries, and as a result if there are any dependency
+# libraries that are not exported, the following install export script will
+# fail. As a result, we will only provide the targets cmake files for shared lib
+# installation. For more info, read:
 # https://cmake.org/pipermail/cmake/2016-May/063400.html
 if(BUILD_SHARED_LIBS)
-  configure_file(
-      ${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in
-      ${PROJECT_BINARY_DIR}/Caffe2Config.cmake
-      @ONLY)
-  install(FILES
-      ${PROJECT_BINARY_DIR}/Caffe2Config.cmake
+  configure_file(${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in
+                 ${PROJECT_BINARY_DIR}/Caffe2Config.cmake @ONLY)
+  install(
+    FILES ${PROJECT_BINARY_DIR}/Caffe2Config.cmake
+    DESTINATION share/cmake/Caffe2
+    COMPONENT dev)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
+          ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
+    DESTINATION share/cmake/Caffe2/public
+    COMPONENT dev)
+  install(
+    DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
+    DESTINATION share/cmake/Caffe2/
+    COMPONENT dev)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
+    DESTINATION share/cmake/Caffe2/
+    COMPONENT dev)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake
+    DESTINATION share/cmake/Caffe2/
+    COMPONENT dev)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake
+    DESTINATION share/cmake/Caffe2/
+    COMPONENT dev)
+  if(NOT BUILD_LIBTORCHLESS)
+    install(
+      EXPORT Caffe2Targets
       DESTINATION share/cmake/Caffe2
-      COMPONENT dev)
-  install(FILES
-      ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
-      DESTINATION share/cmake/Caffe2/public
-      COMPONENT dev)
-  install(DIRECTORY
-      ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
-      DESTINATION share/cmake/Caffe2/
-      COMPONENT dev)
-  install(FILES
-      ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
-      DESTINATION share/cmake/Caffe2/
-      COMPONENT dev)
-  install(FILES
-      ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake
-      DESTINATION share/cmake/Caffe2/
-      COMPONENT dev)
-  install(FILES
-      ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake
-      DESTINATION share/cmake/Caffe2/
-      COMPONENT dev)
-
-  install(EXPORT Caffe2Targets DESTINATION share/cmake/Caffe2
       FILE Caffe2Targets.cmake
       COMPONENT dev)
+  endif()
 else()
-  message(WARNING
-      "Generated cmake files are only available when building "
-      "shared libs.")
+  message(WARNING "Generated cmake files are only available when building "
+                  "shared libs.")
 endif()
 
-# ---[ Binaries
-# Binaries will be built after the Caffe2 main libraries and the modules
-# are built. For the binaries, they will be linked to the Caffe2 main
+# ---[ Binaries Binaries will be built after the Caffe2 main libraries and the
+# modules are built. For the binaries, they will be linked to the Caffe2 main
 # libraries, as well as all the modules that are built with Caffe2 (the ones
 # built in the previous Modules section above).
 if(BUILD_BINARY)
@@ -1222,29 +1366,38 @@ endif()
 
 # Parse custom debug info
 if(DEFINED USE_CUSTOM_DEBINFO)
-    string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
-    message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}")
+  string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
+  message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}")
 
-    string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}")
+  string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}")
 
-    # Set the COMPILE_FLAGS property for each source file
-    foreach(SOURCE_FILE ${SOURCE_FILES_LIST})
-        # We have to specify the scope here. We do this by specifying the
-        # targets we care about and caffe2/ for all test targets defined there
-        set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
-        set_source_files_properties(${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS} PROPERTIES COMPILE_FLAGS "-g")
-    endforeach()
+  # Set the COMPILE_FLAGS property for each source file
+  foreach(SOURCE_FILE ${SOURCE_FILES_LIST})
+    # We have to specify the scope here. We do this by specifying the targets we
+    # care about and caffe2/ for all test targets defined there
+    if(BUILD_LIBTORCHLESS)
+      set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
+    else()
+      # @todo test if we can remove this
+      set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
+    endif()
+    set_source_files_properties(
+      ${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS}
+      PROPERTIES COMPILE_FLAGS "-g")
+  endforeach()
 
-    # Link everything with debug info when any file is in debug mode
-    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
-    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
+  # Link everything with debug info when any file is in debug mode
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
+  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
 endif()
 
 # Bundle PTXAS if needed
 if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
-   if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas")
-     message(STATUS "Copying PTXAS into the bin folder")
-     file(COPY "${CUDAToolkit_BIN_DIR}/ptxas" DESTINATION "${PROJECT_BINARY_DIR}")
-   endif()
-   install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR}")
+  if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas")
+    message(STATUS "Copying PTXAS into the bin folder")
+    file(COPY "${CUDAToolkit_BIN_DIR}/ptxas"
+         DESTINATION "${PROJECT_BINARY_DIR}")
+  endif()
+  install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
+          DESTINATION "${CMAKE_INSTALL_BINDIR}")
 endif()
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
index 1f742f4c176..82eb9690383 100644
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@@ -12,120 +12,128 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 # protobuf header files, because protobuf header files will transitively force
 # one to link against a specific protobuf version.
 
-# ---[ Configure macro file.
-set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
-set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
-set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
-set(C10_USE_NUMA ${USE_NUMA})
-set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
-set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT})
-configure_file(
-    ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
-    ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h)
-
-# Note: if you want to add ANY dependency to the c10 library, make sure you
-# check with the core PyTorch developers as the dependency will be
-# transitively passed on to all libraries dependent on PyTorch.
-file(GLOB C10_SRCS
-        *.cpp
-        core/*.cpp
-        core/impl/*.cpp
-        mobile/*.cpp
-        macros/*.cpp
-        util/*.cpp
-      )
-file(GLOB C10_HEADERS
-        *.h
-        core/*.h
-        core/impl/*.h
-        mobile/*.h
-        macros/*.h
-        util/*.h
-      )
-add_library(c10 ${C10_SRCS} ${C10_HEADERS})
-target_compile_options_if_supported(c10 "-Wdeprecated")
-if(HAVE_SOVERSION)
-  set_target_properties(c10 PROPERTIES
-      VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
-endif()
-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10 PRIVATE "-fvisibility=hidden")
+if(BUILD_LIBTORCHLESS)
+  find_library(C10_LIB c10 PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+else()
+  set(C10_LIB c10)
 endif()
 
-option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
-if(C10_USE_IWYU)
-  find_program(iwyu NAMES include-what-you-use)
-  if(iwyu)
-    set(iwyu_cmd
-        "include-what-you-use"
-        "-Xiwyu"
-        "--transitive_includes_only"
-        "-Xiwyu"
-        "--no_fwd_decls"
-        "-Xiwyu"
-        "--prefix_header_includes=keep"
-        "-Xiwyu"
-        "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
-      )
-    set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
+  # ---[ Configure macro file.
+  set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
+  set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
+  set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
+  set(C10_USE_NUMA ${USE_NUMA})
+  set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
+  set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT})
+  configure_file(
+      ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
+      ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h)
+
+  # Note: if you want to add ANY dependency to the c10 library, make sure you
+  # check with the core PyTorch developers as the dependency will be
+  # transitively passed on to all libraries dependent on PyTorch.
+  file(GLOB C10_SRCS
+          *.cpp
+          core/*.cpp
+          core/impl/*.cpp
+          mobile/*.cpp
+          macros/*.cpp
+          util/*.cpp
+        )
+  file(GLOB C10_HEADERS
+          *.h
+          core/*.h
+          core/impl/*.h
+          mobile/*.h
+          macros/*.h
+          util/*.h
+        )
+if(NOT BUILD_LIBTORCHLESS)
+  add_library(c10 ${C10_SRCS} ${C10_HEADERS})
+  target_compile_options_if_supported(c10 "-Wdeprecated")
+  if(HAVE_SOVERSION)
+    set_target_properties(c10 PROPERTIES
+        VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
+  endif()
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10 PRIVATE "-fvisibility=hidden")
   endif()
-endif()
 
-if(WERROR)
-  target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare")
-  target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow")
-endif()
+  option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
+  if(C10_USE_IWYU)
+    find_program(iwyu NAMES include-what-you-use)
+    if(iwyu)
+      set(iwyu_cmd
+          "include-what-you-use"
+          "-Xiwyu"
+          "--transitive_includes_only"
+          "-Xiwyu"
+          "--no_fwd_decls"
+          "-Xiwyu"
+          "--prefix_header_includes=keep"
+          "-Xiwyu"
+          "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
+        )
+      set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
+    endif()
+  endif()
 
-# ---[ Dependency of c10
-if(C10_USE_GFLAGS)
-  target_link_libraries(c10 PUBLIC gflags)
-endif()
+  if(WERROR)
+    target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare")
+    target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow")
+  endif()
 
-if(C10_USE_GLOG)
-  target_link_libraries(c10 PUBLIC glog::glog)
-endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+  # ---[ Dependency of c10
+  if(C10_USE_GFLAGS)
+    target_link_libraries(c10 PUBLIC gflags)
+  endif()
 
-if(C10_USE_NUMA)
-  message(STATUS "NUMA paths:")
-  message(STATUS ${Numa_INCLUDE_DIR})
-  message(STATUS ${Numa_LIBRARIES})
-  target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
-  target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES})
-else()
-  message(STATUS "don't use NUMA")
-endif()
+  if(C10_USE_GLOG)
+    target_link_libraries(c10 PUBLIC glog::glog)
+  endif()
+  target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
 
-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
-  target_link_libraries(c10 PRIVATE cpuinfo)
-endif()
+  if(C10_USE_NUMA)
+    message(STATUS "NUMA paths:")
+    message(STATUS ${Numa_INCLUDE_DIR})
+    message(STATUS ${Numa_LIBRARIES})
+    target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
+    target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES})
+  else()
+    message(STATUS "don't use NUMA")
+  endif()
 
-find_package(Backtrace)
-if(Backtrace_FOUND)
-  target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS})
-  target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES})
-  target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1)
-else()
-  target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0)
-endif()
+  if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
+    target_link_libraries(c10 PRIVATE cpuinfo)
+  endif()
 
-if(USE_MIMALLOC)
-  target_link_libraries(c10 PRIVATE "mimalloc-static")
-  add_dependencies(c10 mimalloc-static)
-endif()
+  find_package(Backtrace)
+  if(Backtrace_FOUND)
+    target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS})
+    target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES})
+    target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1)
+  else()
+    target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0)
+  endif()
 
-if(ANDROID)
-  target_link_libraries(c10 PRIVATE log)
-endif()
+  if(USE_MIMALLOC)
+    target_link_libraries(c10 PRIVATE "mimalloc-static")
+    add_dependencies(c10 mimalloc-static)
+  endif()
 
-target_include_directories(
-    c10 PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
+  if(ANDROID)
+    target_link_libraries(c10 PRIVATE log)
+  endif()
+
+  target_include_directories(
+      c10 PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+endif()
 
 add_subdirectory(test)
 add_subdirectory(benchmark)
@@ -144,11 +152,14 @@ if(USE_XPU)
   add_subdirectory(xpu)
 endif()
 
-# ---[ Installation
-# Note: for now, we will put all export path into one single Caffe2Targets group
-# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
-# individual libraries like libc10.so and libcaffe2.so are still self-contained.
-install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
+if(NOT BUILD_LIBTORCHLESS)
+  # ---[ Installation
+  # Note: for now, we will put all export path into one single Caffe2Targets group
+  # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
+  # individual libraries like libc10.so and libcaffe2.so are still self-contained.
+  install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
+endif()
+
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
         DESTINATION include
         FILES_MATCHING PATTERN "*.h")
diff --git a/c10/benchmark/CMakeLists.txt b/c10/benchmark/CMakeLists.txt
index 9658e603761..16b268e3800 100644
--- a/c10/benchmark/CMakeLists.txt
+++ b/c10/benchmark/CMakeLists.txt
@@ -6,7 +6,7 @@ if(BUILD_TEST)
     get_filename_component(bench_file_name ${bench_src} NAME_WE)
     set(bench_name "c10_${bench_file_name}")
     add_executable(${bench_name} "${bench_src}")
-    target_link_libraries(${bench_name} c10 benchmark)
+    target_link_libraries(${bench_name} ${C10_LIB} benchmark)
     if(INSTALL_TEST)
       install(TARGETS ${bench_name} DESTINATION test)
     endif()
diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt
index c5c45c68d8f..893a8556297 100644
--- a/c10/cuda/CMakeLists.txt
+++ b/c10/cuda/CMakeLists.txt
@@ -12,6 +12,10 @@ configure_file(
     ${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
     ${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)
 
+if(BUILD_LIBTORCHLESS)
+  find_library(C10_CUDA_LIB c10_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 # Note: if you want to add ANY dependency to the c10 library, make sure you
 # check with the core PyTorch developers as the dependency will be
 # transitively passed on to all libraries dependent on PyTorch.
@@ -47,36 +51,42 @@ set(C10_CUDA_HEADERS
     impl/CUDATest.h
 )
 set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS})
-set(CUDA_LINK_LIBRARIES_KEYWORD)
-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden")
-endif()
 
-# ---[ Dependency of c10_cuda
-target_link_libraries(c10_cuda PUBLIC c10 torch::cudart)
+if(NOT BUILD_LIBTORCHLESS)
+  torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS})
+  set(CUDA_LINK_LIBRARIES_KEYWORD)
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden")
+  endif()
 
-if(NOT WIN32)
-target_link_libraries(c10_cuda PRIVATE dl)
-target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
-endif()
+  # ---[ Dependency of c10_cuda
+  target_link_libraries(c10_cuda PUBLIC ${C10_LIB} torch::cudart)
 
-target_include_directories(
-    c10_cuda PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
+  if(NOT WIN32)
+  target_link_libraries(c10_cuda PRIVATE dl)
+  target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
+  endif()
 
-add_subdirectory(test)
+  target_include_directories(
+      c10_cuda PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+  set(C10_CUDA_LIB c10_cuda)
 
 # ---[ Installation
 # Note: for now, we will put all export path into one single Caffe2Targets group
 # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
 # individual libraries like libc10.so and libcaffe2.so are still self-contained.
 install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib)
+
+endif()
+
+add_subdirectory(test)
+
 foreach(file ${C10_CUDA_HEADERS})
   get_filename_component( dir ${file} DIRECTORY )
   install( FILES ${file} DESTINATION include/c10/cuda/${dir} )
diff --git a/c10/cuda/test/CMakeLists.txt b/c10/cuda/test/CMakeLists.txt
index eed7fdff42c..7a93087f5ee 100644
--- a/c10/cuda/test/CMakeLists.txt
+++ b/c10/cuda/test/CMakeLists.txt
@@ -15,7 +15,7 @@ if(BUILD_TEST)
     get_filename_component(test_file_name ${test_src} NAME_WE)
     set(test_name "c10_cuda_${test_file_name}")
     add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} c10_cuda gtest_main)
+    target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gtest_main)
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
     if(INSTALL_TEST)
       install(TARGETS ${test_name} DESTINATION test)
diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt
index f4273ca2055..a6442e01d2e 100644
--- a/c10/hip/CMakeLists.txt
+++ b/c10/hip/CMakeLists.txt
@@ -5,6 +5,10 @@
 
 include(../../cmake/public/utils.cmake)
 
+if(BUILD_LIBTORCHLESS)
+  find_library(C10_HIP_LIB c10_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 # ---[ Configure macro file.
 set(C10_HIP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
 configure_file(
@@ -26,36 +30,40 @@ file(GLOB __c10_hip_srcs_cpp *.cc impl/*.cc)
 set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
 
 file(GLOB_RECURSE C10_HIP_HEADERS *.h)
-hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
 
-# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
-target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
+if(NOT BUILD_LIBTORCHLESS)
+  hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
 
-# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
-# minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+  # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+  target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
 
-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10_hip PRIVATE "-fvisibility=hidden")
+  # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
+  # minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10_hip PRIVATE "-fvisibility=hidden")
+  endif()
+
+  # ---[ Dependency of c10_hip
+  target_link_libraries(c10_hip PUBLIC c10)
+
+  target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES})
+
+  target_include_directories(
+      c10_hip PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+  install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
+  set(C10_HIP_LIB c10_hip)
 endif()
 
-# ---[ Dependency of c10_hip
-target_link_libraries(c10_hip PUBLIC c10)
-
-target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES})
-
-target_include_directories(
-    c10_hip PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
-
 add_subdirectory(test)
 
 # ---[ Installation
-install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
         DESTINATION include
         FILES_MATCHING PATTERN "*.h")
diff --git a/c10/test/CMakeLists.txt b/c10/test/CMakeLists.txt
index a1ca4bb51b7..7f2a61246c6 100644
--- a/c10/test/CMakeLists.txt
+++ b/c10/test/CMakeLists.txt
@@ -9,7 +9,7 @@ if(BUILD_TEST)
     if(NOT MSVC)
       target_compile_options(${test_name} PRIVATE -Wno-unused-variable)
     endif()
-    target_link_libraries(${test_name} c10 gmock gtest gtest_main)
+    target_link_libraries(${test_name} ${C10_LIB} gmock gtest gtest_main)
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
     if(INSTALL_TEST)
       install(TARGETS ${test_name} DESTINATION test)
diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt
index c14f1790d9d..d06d0f0aa92 100644
--- a/c10/xpu/CMakeLists.txt
+++ b/c10/xpu/CMakeLists.txt
@@ -4,6 +4,10 @@
 
 include(../../cmake/public/xpu.cmake)
 
+if(NOT BUILD_LIBTORCHLESS)
+  find_library(C10_XPU_LIB c10_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 set(C10_XPU_SRCS
     XPUCachingAllocator.cpp
     XPUFunctions.cpp
@@ -19,7 +23,7 @@ set(C10_XPU_HEADERS
     XPUStream.h
     impl/XPUGuardImpl.h
 )
-
+if(NOT BUILD_LIBTORCHLESS)
 add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
 target_compile_options(c10_xpu PRIVATE "-DC10_XPU_BUILD_MAIN_LIB")
 # Enable hidden visibility if compiler supports it.
@@ -35,11 +39,13 @@ target_include_directories(
     $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
     $<INSTALL_INTERFACE:include>
     )
-
-add_subdirectory(test)
+  install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
+  set(C10_XPU_LIB c10_xpu)
+  add_subdirectory(test)
+endif()
 
 # ---[ Installation
-install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
+
 foreach(file ${C10_XPU_HEADERS})
   get_filename_component(dir ${file} DIRECTORY)
   install(FILES ${file} DESTINATION include/c10/xpu/${dir})
diff --git a/c10/xpu/test/CMakeLists.txt b/c10/xpu/test/CMakeLists.txt
index fa91cc9d171..0f0c85c68c8 100644
--- a/c10/xpu/test/CMakeLists.txt
+++ b/c10/xpu/test/CMakeLists.txt
@@ -11,7 +11,7 @@ if(BUILD_TEST)
     get_filename_component(test_file_name ${test_src} NAME_WE)
     set(test_name "c10_xpu_${test_file_name}")
     add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} c10_xpu gtest_main)
+    target_link_libraries(${test_name} ${C10_XPU_LIB} gtest_main)
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
     if(INSTALL_TEST)
       install(TARGETS ${test_name} DESTINATION test)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 5b8fce16a42..8366f065923 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -785,6 +785,32 @@ set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_
 # END formerly-libtorch sources
 # ==========================================================
 
+if(BUILD_LIBTORCHLESS)
+  find_library(TORCH_LIB torch PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  find_library(TORCH_CPU_LIB torch_cpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+
+  if(USE_CUDA)
+    find_library(TORCH_CUDA_LIB torch_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+
+  if(USE_ROCM)
+    find_library(TORCH_HIP_LIB torch_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+
+  if(USE_XPU)
+    find_library(TORCH_XPU_LIB torch_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+  add_subdirectory(../torch torch)
+else()
+  set(TORCH_LIB torch)
+  set(TORCH_CPU_LIB torch_cpu)
+  set(TORCH_CUDA_LIB torch_cuda)
+  set(TORCH_HIP_LIB torch_hip)
+  set(TORCH_XPU_LIB torch_xpu)
+endif()
+
+
+if(NOT BUILD_LIBTORCHLESS)
 add_library(torch_cpu ${Caffe2_CPU_SRCS})
 if(HAVE_SOVERSION)
   set_target_properties(torch_cpu PROPERTIES
@@ -1622,6 +1648,7 @@ endif()
 # ---[ XPU library.
 if(USE_XPU)
   target_link_libraries(torch_xpu INTERFACE torch::xpurt)
+
   target_link_libraries(torch_xpu PUBLIC c10_xpu)
 
   target_include_directories(
@@ -1701,6 +1728,7 @@ if(USE_ROCM)
 
   # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
   target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
+
   target_link_libraries(torch_hip PUBLIC c10_hip)
 
   if(NOT INTERN_BUILD_MOBILE)
@@ -1921,6 +1949,7 @@ if(MSVC)
     set_source_files_properties(${tmp_path} PROPERTIES COMPILE_FLAGS "-Xcompiler /Zc:lambda")
   endforeach()
 endif()
+endif()
 
 # Note: we only install the caffe2 python files if BUILD_CAFFE2_OPS is ON
 # This is because the build rules here written in such a way that they always
diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
index 1fa28c8aee8..bdfa4bfe455 100644
--- a/functorch/CMakeLists.txt
+++ b/functorch/CMakeLists.txt
@@ -16,7 +16,12 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_API_INCLUDE_EXTENSION_H
 target_compile_options(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
 target_compile_options_if_supported(${PROJECT_NAME} "-Wmissing-prototypes")
 target_compile_options_if_supported(${PROJECT_NAME} "-Werror=missing-prototypes")
-target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python)
+if(BUILD_LIBTORCHLESS)
+  target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIB} torch_python)
+else()
+  # functorch cannot use the alias to build on windows
+  target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python)
+endif()
 target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
 
 set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
diff --git a/setup.py b/setup.py
index 6c02c8207bc..62051612367 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,4 @@
 # Welcome to the PyTorch setup.py.
-#
 # Environment variables you are probably interested in:
 #
 #   DEBUG
@@ -199,7 +198,15 @@
 #
 #   USE_PRIORITIZED_TEXT_FOR_LD
 #      Uses prioritized text form cmake/prioritized_text.txt for LD
+#
+#   BUILD_LIBTORCH_WHL
+#      Builds libtorch.so and its dependencies as a wheel
+#
+#   BUILD_PYTHON_ONLY
+#      Builds pytorch as a wheel using libtorch.so from a seperate wheel
 
+import os
+import pkgutil
 import sys
 
 if sys.platform == "win32" and sys.maxsize.bit_length() == 31:
@@ -210,6 +217,34 @@ if sys.platform == "win32" and sys.maxsize.bit_length() == 31:
 
 import platform
 
+
+def _get_package_path(package_name):
+    loader = pkgutil.find_loader(package_name)
+    if loader:
+        # The package might be a namespace package, so get_data may fail
+        try:
+            file_path = loader.get_filename()
+            return os.path.dirname(file_path)
+        except AttributeError:
+            pass
+    return None
+
+
+BUILD_LIBTORCH_WHL = os.getenv("BUILD_LIBTORCH_WHL", "0") == "1"
+BUILD_PYTHON_ONLY = os.getenv("BUILD_PYTHON_ONLY", "0") == "1"
+
+
+# set up appropriate env variables
+if BUILD_LIBTORCH_WHL:
+    # Set up environment variables for ONLY building libtorch.so and not libtorch_python.so
+    # functorch is not supported without python
+    os.environ["BUILD_FUNCTORCH"] = "OFF"
+
+
+if BUILD_PYTHON_ONLY:
+    os.environ["BUILD_LIBTORCHLESS"] = "ON"
+    os.environ["LIBTORCH_LIB_PATH"] = f"{_get_package_path('libtorch')}/lib"
+
 python_min_version = (3, 8, 0)
 python_min_version_str = ".".join(map(str, python_min_version))
 if sys.version_info < python_min_version:
@@ -222,7 +257,6 @@ import filecmp
 import glob
 import importlib
 import json
-import os
 import shutil
 import subprocess
 import sysconfig
@@ -314,7 +348,10 @@ cmake_python_include_dir = sysconfig.get_path("include")
 ################################################################################
 # Version, create_version_file, and package_name
 ################################################################################
-package_name = os.getenv("TORCH_PACKAGE_NAME", "torch")
+
+DEFAULT_PACKAGE_NAME = "libtorch" if BUILD_LIBTORCH_WHL else "torch"
+
+package_name = os.getenv("TORCH_PACKAGE_NAME", DEFAULT_PACKAGE_NAME)
 package_type = os.getenv("PACKAGE_TYPE", "wheel")
 version = get_torch_version()
 report(f"Building wheel {package_name}-{version}")
@@ -437,11 +474,12 @@ def build_deps():
 
     check_submodules()
     check_pydep("yaml", "pyyaml")
+    build_python = not BUILD_LIBTORCH_WHL
 
     build_caffe2(
         version=version,
         cmake_python_library=cmake_python_library,
-        build_python=True,
+        build_python=build_python,
         rerun_cmake=RERUN_CMAKE,
         cmake_only=CMAKE_ONLY,
         cmake=cmake,
@@ -698,6 +736,8 @@ class build_ext(setuptools.command.build_ext.build_ext):
             "caffe2.python.caffe2_pybind11_state_gpu",
             "caffe2.python.caffe2_pybind11_state_hip",
         ]
+        if BUILD_LIBTORCH_WHL:
+            caffe2_pybind_exts = []
         i = 0
         while i < len(self.extensions):
             ext = self.extensions[i]
@@ -929,9 +969,14 @@ def configure_extension_build():
 
     main_compile_args = []
     main_libraries = ["torch_python"]
+
     main_link_args = []
     main_sources = ["torch/csrc/stub.c"]
 
+    if BUILD_LIBTORCH_WHL:
+        main_libraries = ["torch"]
+        main_sources = []
+
     if cmake_cache_vars["USE_CUDA"]:
         library_dirs.append(os.path.dirname(cmake_cache_vars["CUDA_CUDA_LIB"]))
 
@@ -1053,7 +1098,6 @@ def configure_extension_build():
             "default = torch.distributed.elastic.multiprocessing:DefaultLogsSpecs",
         ],
     }
-
     return extensions, cmdclass, packages, entry_points, extra_install_requires
 
 
@@ -1080,6 +1124,11 @@ def print_box(msg):
 
 
 def main():
+    if BUILD_LIBTORCH_WHL and BUILD_PYTHON_ONLY:
+        raise RuntimeError(
+            "Conflict: 'BUILD_LIBTORCH_WHL' and 'BUILD_PYTHON_ONLY' can't both be 1. Set one to 0 and rerun."
+        )
+
     # the list of runtime dependencies required by this built package
     install_requires = [
         "filelock",
@@ -1091,6 +1140,9 @@ def main():
         'mkl>=2021.1.1,<=2021.4.0; platform_system == "Windows"',
     ]
 
+    if BUILD_PYTHON_ONLY:
+        install_requires.append("libtorch")
+
     use_prioritized_text = str(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD", ""))
     if (
         use_prioritized_text == ""
@@ -1166,10 +1218,6 @@ def main():
         "nn/parallel/*.pyi",
         "utils/data/*.pyi",
         "utils/data/datapipes/*.pyi",
-        "lib/*.so*",
-        "lib/*.dylib*",
-        "lib/*.dll",
-        "lib/*.lib",
         "lib/*.pdb",
         "lib/torch_shm_manager",
         "lib/*.h",
@@ -1335,6 +1383,23 @@ def main():
         "utils/model_dump/*.mjs",
     ]
 
+    if BUILD_PYTHON_ONLY:
+        torch_package_data.extend(
+            [
+                "lib/libtorch_python*",
+                "lib/*shm*",
+                "lib/libtorch_global_deps*",
+            ]
+        )
+    else:
+        torch_package_data.extend(
+            [
+                "lib/*.so*",
+                "lib/*.dylib*",
+                "lib/*.dll",
+                "lib/*.lib",
+            ]
+        )
     if get_cmake_cache_vars()["BUILD_CAFFE2"]:
         torch_package_data.extend(
             [
@@ -1377,6 +1442,29 @@ def main():
         "packaged/autograd/*",
         "packaged/autograd/templates/*",
     ]
+
+    if BUILD_LIBTORCH_WHL:
+        modified_packages = []
+        for package in packages:
+            parts = package.split(".")
+            if parts[0] == "torch":
+                modified_packages.append(DEFAULT_PACKAGE_NAME + package[len("torch") :])
+        packages = modified_packages
+        package_dir = {"libtorch": "torch"}
+        torch_package_dir_name = "libtorch"
+        package_data = {"libtorch": torch_package_data}
+        extensions = []
+    else:
+        torch_package_dir_name = "torch"
+        package_dir = {}
+        package_data = {
+            "torch": torch_package_data,
+            "torchgen": torchgen_package_data,
+            "caffe2": [
+                "python/serialized_test/data/operator_test/*.zip",
+            ],
+        }
+
     setup(
         name=package_name,
         version=version,
@@ -1392,13 +1480,8 @@ def main():
         entry_points=entry_points,
         install_requires=install_requires,
         extras_require=extras_require,
-        package_data={
-            "torch": torch_package_data,
-            "torchgen": torchgen_package_data,
-            "caffe2": [
-                "python/serialized_test/data/operator_test/*.zip",
-            ],
-        },
+        package_data=package_data,
+        package_dir=package_dir,
         url="https://pytorch.org/",
         download_url="https://github.com/pytorch/pytorch/tags",
         author="PyTorch Team",
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 8f879a8ecc7..3a3cf23495b 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -296,6 +296,8 @@ endif()
 
 
 add_library(torch_python SHARED ${TORCH_PYTHON_SRCS})
+add_dependencies(torch_python Caffe2_PROTO)
+add_dependencies(torch_python onnx_proto)
 # Avoid numpy for the DEPLOY build
 if(USE_NUMPY)
   target_link_libraries(torch_python PRIVATE numpy::numpy)
@@ -344,7 +346,7 @@ endif()
 
 target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB")
 
-target_link_libraries(torch_python PRIVATE torch_library ${TORCH_PYTHON_LINK_LIBRARIES})
+target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES})
 
 target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
 
@@ -359,7 +361,9 @@ endif()
 
 if(BUILD_ONEDNN_GRAPH)
   target_compile_definitions(torch_python PRIVATE "-DBUILD_ONEDNN_GRAPH")
-  target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH")
+  if(NOT BUILD_LIBTORCHLESS)
+    target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH")
+  endif()
 endif()
 
 if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
@@ -401,7 +405,11 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
           ${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
           )
   # Pybind11 requires explicit linking of the torch_python library
-  target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11)
+  if(BUILD_LIBTORCHLESS)
+    target_link_libraries(nnapi_backend PRIVATE ${TORCH_LIB} torch_python pybind::pybind11)
+  else()
+    target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11)
+  endif()
 endif()
 
 set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
diff --git a/torch/lib/libshm/CMakeLists.txt b/torch/lib/libshm/CMakeLists.txt
index a3b41d0a0b0..8a7329ddab7 100644
--- a/torch/lib/libshm/CMakeLists.txt
+++ b/torch/lib/libshm/CMakeLists.txt
@@ -22,7 +22,7 @@ set_target_properties(shm PROPERTIES
   PREFIX "lib"
   IMPORT_PREFIX "lib"
   CXX_STANDARD 17)
-target_link_libraries(shm PRIVATE torch_cpu)
+target_link_libraries(shm PRIVATE ${TORCH_CPU_LIB})
 
 if(UNIX AND NOT APPLE)
   include(CheckLibraryExists)
@@ -60,7 +60,12 @@ if(UNIX AND NOT APPLE)
 endif()
 
 add_executable(torch_shm_manager manager.cpp)
-target_link_libraries(torch_shm_manager PRIVATE shm c10)
+if(BUILD_LIBTORCHLESS)
+  target_link_libraries(torch_shm_manager PRIVATE shm ${C10_LIB})
+else()
+  # we need to link directly to c10 here otherwise we miss symbols
+  target_link_libraries(torch_shm_manager PRIVATE shm c10)
+endif()
 set_target_properties(torch_shm_manager PROPERTIES
   INSTALL_RPATH "${_rpath_portable_origin}/../lib")