diff --git a/CMakeLists.txt b/CMakeLists.txt index 7462b57904e..10a92dcc7c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,30 +1,27 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -#cmake_policy(SET CMP0022 NEW) -#cmake_policy(SET CMP0023 NEW) +# cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW) -# Use compiler ID "AppleClang" instead of "Clang" for XCode. -# Not setting this sometimes makes XCode C compiler gets detected as "Clang", -# even when the C++ one is detected as "AppleClang". +# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this +# sometimes makes XCode C compiler gets detected as "Clang", even when the C++ +# one is detected as "AppleClang". cmake_policy(SET CMP0010 NEW) cmake_policy(SET CMP0025 NEW) # Enables CMake to set LTO on compilers other than Intel. cmake_policy(SET CMP0069 NEW) -# Enable the policy for CMake subprojects. -# protobuf currently causes issues -#set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) +# Enable the policy for CMake subprojects. protobuf currently causes issues +# set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) -# Suppress warning flags in default MSVC configuration. It's not -# mandatory that we do this (and we don't if cmake is old), but it's -# nice when it's possible, and it's possible on our Windows configs. +# Suppress warning flags in default MSVC configuration. It's not mandatory that +# we do this (and we don't if cmake is old), but it's nice when it's possible, +# and it's possible on our Windows configs. cmake_policy(SET CMP0092 NEW) # Prohibit in-source builds if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) -message(FATAL_ERROR "In-source build are not supported") + message(FATAL_ERROR "In-source build are not supported") endif() - # ---[ Project and semantic versioning. project(Torch CXX C) @@ -40,33 +37,49 @@ set(CMAKE_INSTALL_MESSAGE NEVER) string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard) if(env_cxx_standard GREATER -1) message( - WARNING "C++ standard version definition detected in environment variable." - "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.") + WARNING + "C++ standard version definition detected in environment variable." + "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment." + ) endif() -set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.") -set(CMAKE_C_STANDARD 11 CACHE STRING "The C standard whose features are requested to build this target.") +set(CMAKE_CXX_STANDARD + 17 + CACHE STRING + "The C++ standard whose features are requested to build this target.") +set(CMAKE_C_STANDARD + 11 + CACHE STRING + "The C standard whose features are requested to build this target.") # ---[ Utils include(cmake/public/utils.cmake) # --- [ Check that minimal gcc version is 9.3+ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3) - message(FATAL_ERROR "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}") + message( + FATAL_ERROR + "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}" + ) endif() -# This define is needed to preserve behavior given anticpated changes to cccl/thrust +# This define is needed to preserve behavior given anticpated changes to +# cccl/thrust # https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html -string(APPEND CMAKE_CUDA_FLAGS " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS") +string(APPEND CMAKE_CUDA_FLAGS + " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS") if(LINUX) include(cmake/CheckAbi.cmake) - string(APPEND CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") - string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") + string(APPEND CMAKE_CXX_FLAGS + " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") + string(APPEND CMAKE_CUDA_FLAGS + " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1) set(CXX_STANDARD_REQUIRED ON) else() - # Please note this is required in order to ensure compatibility between gcc 9 and gcc 7 - # This could be removed when all Linux PyTorch binary builds are compiled by the same toolchain again + # Please note this is required in order to ensure compatibility between gcc + # 9 and gcc 7 This could be removed when all Linux PyTorch binary builds are + # compiled by the same toolchain again append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS) endif() endif() @@ -75,12 +88,10 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_LINK_WHAT_YOU_USE TRUE) # One variable that determines whether the current cmake process is being run -# with the main Caffe2 library. This is useful for building modules - if -# modules are built with the main Caffe2 library then one does not need to do -# find caffe2 in the cmake script. One can usually guard it in some way like -# if(NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) -# find_package(Caffe2 REQUIRED) -# endif() +# with the main Caffe2 library. This is useful for building modules - if modules +# are built with the main Caffe2 library then one does not need to do find +# caffe2 in the cmake script. One can usually guard it in some way like if(NOT +# CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) find_package(Caffe2 REQUIRED) endif() set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON) # Googletest's cmake files are going to set it on once they are processed. Let's @@ -94,23 +105,26 @@ if(NOT DEFINED BLAS_SET_BY_USER) message(STATUS "Not forcing any particular BLAS to be found") set(BLAS_SET_BY_USER FALSE) endif() - set(BLAS_SET_BY_USER ${BLAS_SET_BY_USER} CACHE STRING "Marks whether BLAS was manually set by user or auto-detected") + set(BLAS_SET_BY_USER + ${BLAS_SET_BY_USER} + CACHE STRING + "Marks whether BLAS was manually set by user or auto-detected") endif() # Apple specific if(APPLE) - # These lines are an attempt to make find_package(cuda) pick up - # libcuda.dylib, and not cuda.framework. It doesn't work all - # the time, but it seems to help for some users. - # TODO: replace this with a more robust fix + # These lines are an attempt to make find_package(cuda) pick up libcuda.dylib, + # and not cuda.framework. It doesn't work all the time, but it seems to help + # for some users. TODO: replace this with a more robust fix set(CMAKE_FIND_FRAMEWORK LAST) set(CMAKE_FIND_APPBUNDLE LAST) # Get clang version on macOS - execute_process( COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version_string ) - string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2" CLANG_VERSION_STRING ${clang_full_version_string}) - message( STATUS "CLANG_VERSION_STRING: " ${CLANG_VERSION_STRING} ) - + execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_VARIABLE clang_full_version_string) + string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2" + CLANG_VERSION_STRING ${clang_full_version_string}) + message(STATUS "CLANG_VERSION_STRING: " ${CLANG_VERSION_STRING}) # RPATH stuff set(CMAKE_MACOSX_RPATH ON) @@ -123,27 +137,40 @@ if(APPLE) OUTPUT_VARIABLE _macosx_sdk_version OUTPUT_STRIP_TRAILING_WHITESPACE) if(_exit_code EQUAL 0) - set(_MPS_supported_os_version OFF) - if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3) - set(_MPS_supported_os_version ON) - endif() - message(STATUS "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}") - execute_process( - COMMAND bash -c "xcrun --sdk macosx --show-sdk-path" - OUTPUT_VARIABLE _macosx_sdk_path - OUTPUT_STRIP_TRAILING_WHITESPACE) - set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/") - set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/") + set(_MPS_supported_os_version OFF) + if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3) + set(_MPS_supported_os_version ON) + endif() + message( + STATUS + "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}" + ) + execute_process( + COMMAND bash -c "xcrun --sdk macosx --show-sdk-path" + OUTPUT_VARIABLE _macosx_sdk_path + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/") + set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/") - find_library(_MPS_fwrk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_FRAMEWORK_SEARCH_PATH} NO_DEFAULT_PATH) - find_library(_MPS_sdk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_SDK_SEARCH_PATH} NO_DEFAULT_PATH) + find_library( + _MPS_fwrk_path_ + NAMES MetalPerformanceShadersGraph MetalPerformanceShaders + PATHS ${_FRAMEWORK_SEARCH_PATH} + NO_DEFAULT_PATH) + find_library( + _MPS_sdk_path_ + NAMES MetalPerformanceShadersGraph MetalPerformanceShaders + PATHS ${_SDK_SEARCH_PATH} + NO_DEFAULT_PATH) - if(_MPS_supported_os_version AND _MPS_fwrk_path_ AND _MPS_sdk_path_) - set(MPS_FOUND ON) - message(STATUS "MPSGraph framework found") - else() - message(STATUS "MPSGraph framework not found") - endif() + if(_MPS_supported_os_version + AND _MPS_fwrk_path_ + AND _MPS_sdk_path_) + set(MPS_FOUND ON) + message(STATUS "MPSGraph framework found") + else() + message(STATUS "MPSGraph framework not found") + endif() else() message(STATUS "MPS: unable to get MacOS sdk version") message(STATUS "MPSGraph framework not found") @@ -160,66 +187,72 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") set(CPU_AARCH64 ON) endif() - -# For non-supported platforms, turn USE_DISTRIBUTED off by default. -# It is not tested and likely won't work without additional changes. +# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not +# tested and likely won't work without additional changes. if(NOT LINUX AND NOT WIN32) - set(USE_DISTRIBUTED OFF CACHE STRING "Use distributed") - # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), - # then make Gloo build with the libuv transport. + set(USE_DISTRIBUTED + OFF + CACHE STRING "Use distributed") + # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), then make + # Gloo build with the libuv transport. if(APPLE AND USE_DISTRIBUTED) - set(USE_LIBUV ON CACHE STRING "") + set(USE_LIBUV + ON + CACHE STRING "") endif() endif() -# ---[ Options. -# Note to developers: if you add an option below, make sure you also add it to -# cmake/Summary.cmake so that the summary prints out the option values. +# ---[ Options. Note to developers: if you add an option below, make sure you +# also add it to cmake/Summary.cmake so that the summary prints out the option +# values. include(CMakeDependentOption) option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) option(BUILD_BINARY "Build C++ binaries" OFF) option(BUILD_DOCS "Build Caffe2 documentation" OFF) -option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON) +option(BUILD_CUSTOM_PROTOBUF + "Build and use Caffe2's own protobuf under third_party" ON) option(BUILD_PYTHON "Build Python binaries" ON) option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF) option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON) cmake_dependent_option( - CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON - "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF) + CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON + "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF) cmake_dependent_option( - CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON - "NOT BUILD_SHARED_LIBS" OFF) + CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON + "NOT BUILD_SHARED_LIBS" OFF) option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF) option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF) -option(BUILD_STATIC_RUNTIME_BENCHMARK "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF) -option(BUILD_MOBILE_BENCHMARK "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF) -option(BUILD_MOBILE_TEST "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF) +option(BUILD_STATIC_RUNTIME_BENCHMARK + "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF) +option( + BUILD_MOBILE_BENCHMARK + "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" + OFF) +option( + BUILD_MOBILE_TEST + "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" + OFF) option(BUILD_JNI "Build JNI bindings" OFF) -option(BUILD_MOBILE_AUTOGRAD "Build autograd function in mobile build (in development)" OFF) -cmake_dependent_option( - INSTALL_TEST "Install test binaries if BUILD_TEST is on" ON - "BUILD_TEST" OFF) +option(BUILD_MOBILE_AUTOGRAD + "Build autograd function in mobile build (in development)" OFF) +cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on" + ON "BUILD_TEST" OFF) option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF) option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON) option(USE_ASAN "Use Address+Undefined Sanitizers" OFF) option(USE_TSAN "Use Thread Sanitizer" OFF) option(USE_CUDA "Use CUDA" ON) +cmake_dependent_option(USE_XPU "Use XPU. Only available on Linux." ON "LINUX" + OFF) cmake_dependent_option( - USE_XPU "Use XPU. Only available on Linux." ON - "LINUX" OFF) -cmake_dependent_option( - BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) + BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON + "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) -cmake_dependent_option( - USE_CUDNN "Use cuDNN" ON - "USE_CUDA" OFF) -cmake_dependent_option( - USE_STATIC_CUDNN "Use cuDNN static libraries" OFF - "USE_CUDNN" OFF) -cmake_dependent_option( - USE_CUSPARSELT "Use cuSPARSELt" ON - "USE_CUDA" OFF) +cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) +cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF + "USE_CUDNN" OFF) +cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF) option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON) option(USE_KINETO "Use Kineto profiling library" ON) option(USE_CUPTI_SO "Use CUPTI as a shared library" ON) @@ -231,33 +264,25 @@ option(USE_MAGMA "Use MAGMA" ON) option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) -cmake_dependent_option( - USE_MPS "Use MPS for macOS build" ON - "MPS_FOUND" OFF) -cmake_dependent_option( - USE_NCCL "Use NCCL" ON - "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) -cmake_dependent_option(USE_RCCL "Use RCCL" ON - USE_NCCL OFF) -cmake_dependent_option( - USE_STATIC_NCCL "Use static NCCL" OFF - "USE_NCCL" OFF) -cmake_dependent_option( - USE_SYSTEM_NCCL "Use system-wide NCCL" OFF - "USE_NCCL" OFF) +cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) +cmake_dependent_option(USE_NCCL "Use NCCL" ON + "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) +cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) +cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) +cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL" + OFF) option(USE_NNAPI "Use NNAPI" OFF) option(USE_NNPACK "Use NNPACK" ON) -cmake_dependent_option( - USE_NUMA "Use NUMA. Only available on Linux." ON - "LINUX" OFF) -cmake_dependent_option( - USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." OFF - "USE_CUDA" OFF) +cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX" + OFF) +cmake_dependent_option(USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." + OFF "USE_CUDA" OFF) option(USE_NUMPY "Use NumPy" ON) option(USE_OBSERVERS "Use observers module." OFF) option(USE_OPENCL "Use OpenCL" OFF) option(USE_OPENMP "Use OpenMP for parallel code" ON) -option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build." OFF) +option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build." + OFF) option(USE_PROF "Use profiling" OFF) option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON) @@ -269,9 +294,7 @@ cmake_dependent_option( "LINUX" OFF) if(NOT DEFINED USE_VULKAN) - cmake_dependent_option( - USE_VULKAN "Use Vulkan GPU backend" ON - "ANDROID" OFF) + cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF) endif() option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF) @@ -281,39 +304,49 @@ cmake_dependent_option( USE_LITE_AOTI "Include AOTI sources" OFF "BUILD_LITE_INTERPRETER" OFF) option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF) -option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF) +option(USE_VULKAN_RELAXED_PRECISION + "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF) # option USE_XNNPACK: try to enable xnnpack by default. option(USE_XNNPACK "Use XNNPACK" ON) option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF) # Ensure that an ITT build is the default for x86 CPUs +cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality" + ON "CPU_INTEL" OFF) +# Ensure that an MKLDNN build is the default for x86 CPUs but optional for +# AArch64 (dependent on -DUSE_MKLDNN). cmake_dependent_option( - USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON - "CPU_INTEL" OFF) -# Ensure that an MKLDNN build is the default for x86 CPUs -# but optional for AArch64 (dependent on -DUSE_MKLDNN). -cmake_dependent_option( - USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." "${CPU_INTEL}" - "CPU_INTEL OR CPU_AARCH64" OFF) + USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." + "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF) cmake_dependent_option( USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF "USE_MKLDNN AND CPU_AARCH64" OFF) set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN}) -cmake_dependent_option( - USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF - "USE_MKLDNN" OFF) +cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN" + OFF) option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF) option(USE_DISTRIBUTED "Use distributed" ON) cmake_dependent_option( - USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON - "USE_DISTRIBUTED" OFF) + USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON + "USE_DISTRIBUTED" OFF) cmake_dependent_option( - USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF - "USE_DISTRIBUTED" OFF) + USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF + "USE_DISTRIBUTED" OFF) +cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF) +cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC" + OFF) cmake_dependent_option( - USE_SYSTEM_UCC "Use system-wide UCC" OFF - "USE_UCC" OFF) + USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON + "USE_DISTRIBUTED" OFF) cmake_dependent_option( - USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC" OFF) + USE_GLOO_WITH_OPENSSL + "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF + "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF) +cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON + "USE_DISTRIBUTED;USE_GLOO" OFF) +cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON + "USE_DISTRIBUTED;USE_NCCL" OFF) +cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" + OFF) cmake_dependent_option( USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON "USE_DISTRIBUTED" OFF) @@ -334,22 +367,29 @@ cmake_dependent_option( USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF) -option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF) -cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF) +option(BUILD_LIBTORCH_CPU_WITH_DEBUG + "Enable RelWithDebInfo for libtorch_cpu target only" OFF) +cmake_dependent_option( + USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF) option(WERROR "Build with -Werror supported by the compiler" OFF) -option(DEBUG_CUDA "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)" OFF) +option( + DEBUG_CUDA + "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)" + OFF) option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF) -option(USE_PER_OPERATOR_HEADERS "Whether ATen should generate separate headers for each operator" ON) +option(USE_PER_OPERATOR_HEADERS + "Whether ATen should generate separate headers for each operator" ON) cmake_dependent_option( - BUILD_LAZY_TS_BACKEND "Build the lazy Torchscript backend, not compatible with mobile builds" ON - "NOT INTERN_BUILD_MOBILE" OFF) -cmake_dependent_option( - BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) -cmake_dependent_option( - BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF) + BUILD_LAZY_TS_BACKEND + "Build the lazy Torchscript backend, not compatible with mobile builds" ON + "NOT INTERN_BUILD_MOBILE" OFF) +cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) +cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" + OFF "USE_CUDA" OFF) option(USE_MIMALLOC "Use mimalloc" OFF) -# Enable third party mimalloc library to improve memory allocation performance on Windows. +# Enable third party mimalloc library to improve memory allocation performance +# on Windows. if(WIN32) set(USE_MIMALLOC ON) endif() @@ -357,11 +397,20 @@ endif() if(USE_CCACHE) find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) - set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "C compiler launcher") - set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher") - set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher") + set(CMAKE_C_COMPILER_LAUNCHER + "${CCACHE_PROGRAM}" + CACHE STRING "C compiler launcher") + set(CMAKE_CXX_COMPILER_LAUNCHER + "${CCACHE_PROGRAM}" + CACHE STRING "CXX compiler launcher") + set(CMAKE_CUDA_COMPILER_LAUNCHER + "${CCACHE_PROGRAM}" + CACHE STRING "CUDA compiler launcher") else() - message(STATUS "Could not find ccache. Consider installing ccache to speed up compilation.") + message( + STATUS + "Could not find ccache. Consider installing ccache to speed up compilation." + ) endif() endif() @@ -383,8 +432,10 @@ if(WIN32) set(USE_DISTRIBUTED OFF) set(USE_GLOO OFF) message( - WARNING "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " - "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv.") + WARNING + "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " + "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." + ) else() set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../) endif() @@ -392,12 +443,13 @@ if(WIN32) endif() if(USE_GLOO_WITH_OPENSSL) - set(USE_TCP_OPENSSL_LOAD ON CACHE STRING "") + set(USE_TCP_OPENSSL_LOAD + ON + CACHE STRING "") endif() # Linux distributions do not want too many embedded sources, in that sense we -# need to be able to build pytorch with an (almost) empty third_party -# directory. +# need to be able to build pytorch with an (almost) empty third_party directory. # USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_* # variables on. Individual USE_SYSTEM_* variables can be toggled with # USE_SYSTEM_LIBS being "OFF". @@ -437,61 +489,91 @@ if(USE_SYSTEM_LIBS) endif() # Used when building Caffe2 through setup.py -option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" ON) +option(BUILDING_WITH_TORCH_LIBS + "Tell cmake if Caffe2 is being built alongside torch libs" ON) -# /Z7 override option -# When generating debug symbols, CMake default to use the flag /Zi. -# However, it is not compatible with sccache. So we rewrite it off. +# /Z7 override option When generating debug symbols, CMake default to use the +# flag /Zi. However, it is not compatible with sccache. So we rewrite it off. # But some users don't use sccache; this override is for them. cmake_dependent_option( - MSVC_Z7_OVERRIDE "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)" ON - "MSVC" OFF) + MSVC_Z7_OVERRIDE + "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)" + ON + "MSVC" + OFF) if(NOT USE_SYSTEM_ONNX) - set(ONNX_NAMESPACE "onnx_torch" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.") + set(ONNX_NAMESPACE + "onnx_torch" + CACHE + STRING + "A namespace for ONNX; needed to build with other frameworks that share ONNX." + ) else() - set(ONNX_NAMESPACE "onnx" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.") + set(ONNX_NAMESPACE + "onnx" + CACHE + STRING + "A namespace for ONNX; needed to build with other frameworks that share ONNX." + ) endif() -set(SELECTED_OP_LIST "" CACHE STRING - "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.") +set(SELECTED_OP_LIST + "" + CACHE + STRING + "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default." +) option( - STATIC_DISPATCH_BACKEND - "Name of the backend for which static dispatch code is generated, e.g.: CPU." - "") -option(USE_LIGHTWEIGHT_DISPATCH "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." OFF) -if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND) - message(FATAL_ERROR "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.") -endif() + STATIC_DISPATCH_BACKEND + "Name of the backend for which static dispatch code is generated, e.g.: CPU." + "") option( - TRACING_BASED - "Master flag to build Lite Interpreter with tracing build option" + USE_LIGHTWEIGHT_DISPATCH + "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." OFF) +if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND) + message( + FATAL_ERROR + "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.") +endif() +option(TRACING_BASED + "Master flag to build Lite Interpreter with tracing build option" OFF) option(BUILD_EXECUTORCH "Master flag to build Executorch" ON) -# This is a fix for a rare build issue on Ubuntu: -# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk +# This is a fix for a rare build issue on Ubuntu: symbol lookup error: +# miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: +# mkl_blas_dsyrk # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu if(LINUX) - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") endif() if(MSVC) - # MSVC by default does not apply the correct __cplusplus version as specified by the C++ standard - # because MSVC is not a completely compliant implementation. This option forces MSVC to use the - # appropriate value given the requested --std option. This fixes a compilation issue mismatch - # between GCC/Clang and MSVC. + # MSVC by default does not apply the correct __cplusplus version as specified + # by the C++ standard because MSVC is not a completely compliant + # implementation. This option forces MSVC to use the appropriate value given + # the requested --std option. This fixes a compilation issue mismatch between + # GCC/Clang and MSVC. # - # See: - # * https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170 + # See: * + # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170 # * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /Zc:__cplusplus") set(CMAKE_NINJA_CMCLDEPS_RC OFF) - foreach(flag_var - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + foreach( + flag_var + CMAKE_C_FLAGS + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELWITHDEBINFO) # Replace /Zi and /ZI with /Z7 if(MSVC_Z7_OVERRIDE) if(${flag_var} MATCHES "/Z[iI]") @@ -510,12 +592,12 @@ if(MSVC) endif() # /bigobj increases number of sections in .obj file, which is needed to link - # against libraries in Python 2.7 under Windows - # For Visual Studio generators, if /MP is not added, then we may need - # to add /MP to the flags. + # against libraries in Python 2.7 under Windows For Visual Studio + # generators, if /MP is not added, then we may need to add /MP to the flags. # For other generators like ninja, we don't need to add /MP because it is # already handled by the generator itself. - if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES "/MP") + if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES + "/MP") set(${flag_var} "${${flag_var}} /MP /bigobj") else() set(${flag_var} "${${flag_var}} /bigobj") @@ -523,37 +605,44 @@ if(MSVC) endforeach(flag_var) foreach(flag_var - CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL) + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL) if(${flag_var} MATCHES "/Z[iI7]") string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}") endif() endforeach(flag_var) - foreach(flag_var - CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO - CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO - CMAKE_SHARED_LINKER_FLAGS_DEBUG CMAKE_STATIC_LINKER_FLAGS_DEBUG - CMAKE_EXE_LINKER_FLAGS_DEBUG CMAKE_MODULE_LINKER_FLAGS_DEBUG) + foreach( + flag_var + CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_SHARED_LINKER_FLAGS_DEBUG + CMAKE_STATIC_LINKER_FLAGS_DEBUG + CMAKE_EXE_LINKER_FLAGS_DEBUG + CMAKE_MODULE_LINKER_FLAGS_DEBUG) # Switch off incremental linking in debug/relwithdebinfo builds - if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES "/INCREMENTAL:NO") - string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} "${${flag_var}}") + if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES + "/INCREMENTAL:NO") + string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} + "${${flag_var}}") endif() endforeach(flag_var) - foreach(flag_var - CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS - CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS) + foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS + CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS) string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099") endforeach(flag_var) - foreach(flag_var - CMAKE_SHARED_LINKER_FLAGS) - # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest filename - # explicitly helps fix the linker error when linking torch_python.dll. The manifest - # file would still be there in the correct format torch_python.dll.manifest + foreach(flag_var CMAKE_SHARED_LINKER_FLAGS) + # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest + # filename explicitly helps fix the linker error when linking + # torch_python.dll. The manifest file would still be there in the correct + # format torch_python.dll.manifest if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest") - string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var} "${${flag_var}}") + string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var} + "${${flag_var}}") endif() endforeach(flag_var) @@ -567,11 +656,12 @@ endif(MSVC) string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not -# applicable to mobile are disabled by this variable. -# Setting `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can -# force it to do mobile build with host toolchain - which is useful for testing -# purpose. -if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) +# applicable to mobile are disabled by this variable. Setting +# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it +# to do mobile build with host toolchain - which is useful for testing purpose. +if(ANDROID + OR IOS + OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) set(INTERN_BUILD_MOBILE ON) message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND") set(BUILD_LAZY_TS_BACKEND OFF) @@ -585,12 +675,11 @@ if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) string(APPEND CMAKE_C_FLAGS " -fdata-sections") # Please note that the use of the following flags is required when linking - # against libtorch_cpu.a for mobile builds. - # -Wl,--whole-archive -ltorch_cpu -Wl,--no-whole-archive + # against libtorch_cpu.a for mobile builds. -Wl,--whole-archive -ltorch_cpu + # -Wl,--no-whole-archive # - # This allows global constructors to be included and run. Global - # constructors are used for operator/kernel registration with the - # PyTorch Dispatcher. + # This allows global constructors to be included and run. Global constructors + # are used for operator/kernel registration with the PyTorch Dispatcher. if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) # C10_MOBILE is derived from Android/iOS toolchain macros in @@ -599,10 +688,10 @@ if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) endif() if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET}) - # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), - # then define C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the - # number of dispatch keys in OperatorEntry::dispatchTable_ - # to reduce peak memory during library initialization. + # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), then define + # C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the number of dispatch keys in + # OperatorEntry::dispatchTable_ to reduce peak memory during library + # initialization. string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS") endif() endif() @@ -614,8 +703,8 @@ if(NOT DEFINED USE_BLAS) set(USE_BLAS ON) endif() -# Build libtorch mobile library, which contains ATen/TH ops and native support for -# TorchScript model, but doesn't contain not-yet-unified caffe2 ops; +# Build libtorch mobile library, which contains ATen/TH ops and native support +# for TorchScript model, but doesn't contain not-yet-unified caffe2 ops; if(INTERN_BUILD_MOBILE) if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "") string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT") @@ -636,8 +725,8 @@ if(INTERN_BUILD_MOBILE) else() set(INTERN_USE_EIGEN_BLAS OFF) endif() - # Disable developing mobile interpreter for actual mobile build. - # Enable it elsewhere to capture build error. + # Disable developing mobile interpreter for actual mobile build. Enable it + # elsewhere to capture build error. set(INTERN_DISABLE_MOBILE_INTERP ON) endif() @@ -647,19 +736,23 @@ file(READ version.txt TORCH_DEFAULT_VERSION) string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}") if("${TORCH_DEFAULT_VERSION} " STREQUAL " ") message(WARNING "Could not get version from base 'version.txt'") - # If we can't get the version from the version file we should probably - # set it to something non-sensical like 0.0.0 + # If we can't get the version from the version file we should probably set it + # to something non-sensical like 0.0.0 set(TORCH_DEFAULT_VERSION, "0.0.0") endif() -set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" CACHE STRING "Torch build version") +set(TORCH_BUILD_VERSION + "${TORCH_DEFAULT_VERSION}" + CACHE STRING "Torch build version") if(DEFINED ENV{PYTORCH_BUILD_VERSION}) - set(TORCH_BUILD_VERSION "$ENV{PYTORCH_BUILD_VERSION}" - CACHE STRING "Torch build version" FORCE) + set(TORCH_BUILD_VERSION + "$ENV{PYTORCH_BUILD_VERSION}" + CACHE STRING "Torch build version" FORCE) endif() if(NOT TORCH_BUILD_VERSION) # An empty string was specified so force version to the default - set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" - CACHE STRING "Torch build version" FORCE) + set(TORCH_BUILD_VERSION + "${TORCH_DEFAULT_VERSION}" + CACHE STRING "Torch build version" FORCE) endif() caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION}) caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION}) @@ -677,32 +770,53 @@ enable_testing() # ---[ Build variables set within the cmake tree include(cmake/BuildVariables.cmake) -set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should build.") +set(CAFFE2_ALLOWLIST + "" + CACHE STRING "A allowlist file of files that one should build.") # Set default build type if(NOT CMAKE_BUILD_TYPE) - message(STATUS "Build type not set - defaulting to Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) + message(STATUS "Build type not set - defaulting to Release") + set(CMAKE_BUILD_TYPE + "Release" + CACHE + STRING + "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." + FORCE) endif() # The below means we are cross compiling for arm64 or x86_64 on MacOSX -if(NOT IOS AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$") +if(NOT IOS + AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" + AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$") set(CROSS_COMPILING_MACOSX TRUE) - # We need to compile a universal protoc to not fail protobuf build - # We set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed the cmake compiler check for cross-compiling - set(protoc_build_command "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1") - # We write to a temp scriptfile because CMake COMMAND dislikes double quotes in commands - file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh "#!/bin/bash\n${protoc_build_command}") - file(COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh DESTINATION ${PROJECT_SOURCE_DIR}/scripts/ FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ) - execute_process(COMMAND ./scripts/tmp_protoc_script.sh - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT) - file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh) + # We need to compile a universal protoc to not fail protobuf build We set + # CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed + # the cmake compiler check for cross-compiling + set(protoc_build_command + "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1" + ) + # We write to a temp scriptfile because CMake COMMAND dislikes double quotes + # in commands + file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh + "#!/bin/bash\n${protoc_build_command}") + file( + COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh + DESTINATION ${PROJECT_SOURCE_DIR}/scripts/ + FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ) + execute_process( + COMMAND ./scripts/tmp_protoc_script.sh + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT) + file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh + ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh) if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0") message(FATAL_ERROR "Could not compile universal protoc.") endif() - set(PROTOBUF_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") - set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") + set(PROTOBUF_PROTOC_EXECUTABLE + "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") + set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE + "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") endif() # ---[ Misc checks to cope with various compiler modes @@ -711,9 +825,12 @@ include(cmake/MiscCheck.cmake) # External projects include(ExternalProject) -# ---[ Dependencies -# ---[ FBGEMM doesn't work on x86 32bit and CMAKE_SYSTEM_PROCESSOR thinks its 64bit -if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL 4) OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86")) +# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and +# CMAKE_SYSTEM_PROCESSOR thinks its 64bit +if(USE_FBGEMM + AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL + 4) + OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86")) set(USE_FBGEMM OFF) endif() @@ -724,16 +841,15 @@ if(MSVC) append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS) endif() -# Note for ROCM platform: -# 1. USE_ROCM is always ON until include(cmake/Dependencies.cmake) -# 2. USE_CUDA will become OFF during re-configuration -# Truth Table: -# CUDA 1st pass: USE_CUDA=True;USE_ROCM=True, FLASH evaluates to ON by default -# CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False, FLASH evaluates to ON by default -# ROCM 1st pass: USE_CUDA=True;USE_ROCM=True, FLASH evaluates to ON by default -# ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True, FLASH evaluates to ON by default -# CPU 1st pass: USE_CUDA=False(Cmd Option);USE_ROCM=True, FLASH evaluates to OFF by default -# CPU 2nd pass: USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default +# Note for ROCM platform: 1. USE_ROCM is always ON until +# include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during +# re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True, +# FLASH evaluates to ON by default CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False, +# FLASH evaluates to ON by default ROCM 1st pass: USE_CUDA=True;USE_ROCM=True, +# FLASH evaluates to ON by default ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True, +# FLASH evaluates to ON by default CPU 1st pass: USE_CUDA=False(Cmd +# Option);USE_ROCM=True, FLASH evaluates to OFF by default CPU 2nd pass: +# USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default # Thus we cannot tell ROCM 2nd pass and CPU 1st pass # # The only solution is to include(cmake/Dependencies.cmake), and defer the @@ -744,35 +860,34 @@ include(cmake/Dependencies.cmake) cmake_dependent_option( USE_FLASH_ATTENTION "Whether to build the flash_attention kernel for scaled dot product attention.\ - Will be disabled if not supported by the platform" ON - "USE_CUDA OR USE_ROCM;NOT MSVC" OFF) + Will be disabled if not supported by the platform" + ON + "USE_CUDA OR USE_ROCM;NOT MSVC" + OFF) -# We are currenlty not using alibi attention for Flash -# So we disable this feature by default -# We dont currently document this feature because we don't +# We are currenlty not using alibi attention for Flash So we disable this +# feature by default We dont currently document this feature because we don't # Suspect users building from source will need this add_definitions(-DFLASHATTENTION_DISABLE_ALIBI) -# CAVEAT: Again, do not check USE_ROCM here -# Flash Attention2 will error while building for sm52 while Mem Eff Attention won't +# CAVEAT: Again, do not check USE_ROCM here Flash Attention2 will error while +# building for sm52 while Mem Eff Attention won't cmake_dependent_option( USE_MEM_EFF_ATTENTION "Enable memory-efficient attention for scaled dot product attention.\ - Will be disabled if not supported by the platform" ON - "USE_CUDA" OFF) + Will be disabled if not supported by the platform" ON "USE_CUDA" OFF) if(DEBUG_CUDA) string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo") string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo") - # CUDA-12.1 crashes when trying to compile with --source-in-ptx - # See https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893 + # CUDA-12.1 crashes when trying to compile with --source-in-ptx See + # https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893 if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1) string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx") string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx") endif() endif(DEBUG_CUDA) - if(USE_FBGEMM) string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM") endif() @@ -836,12 +951,15 @@ include(cmake/Allowlist.cmake) # ---[ Set link flag, handle additional deps for gcc 4.8 and above if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) - message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line") + message( + STATUS + "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line" + ) list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc) endif() -# ---[ Build flags -# Re-include to override append_cxx_flag_if_supported from third_party/FBGEMM +# ---[ Build flags Re-include to override append_cxx_flag_if_supported from +# third_party/FBGEMM include(cmake/public/utils.cmake) if(NOT MSVC) string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC") @@ -855,7 +973,8 @@ if(NOT MSVC) append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-missing-field-initializers" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-missing-field-initializers" + CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS) @@ -868,13 +987,16 @@ if(NOT MSVC) append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Winconsistent-missing-override" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Winconsistent-missing-override" + CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override" + CMAKE_CXX_FLAGS) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed") endif() if(CMAKE_COMPILER_IS_GNUCXX) - # Suppress "The ABI for passing parameters with 64-byte alignment has changed in GCC 4.6" + # Suppress "The ABI for passing parameters with 64-byte alignment has + # changed in GCC 4.6" string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") endif() @@ -886,37 +1008,46 @@ if(NOT MSVC) message(WARNING "Refusing to use gold when USE_MPI=1") else() execute_process( - COMMAND - "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version - ERROR_QUIET - OUTPUT_VARIABLE LD_VERSION) + COMMAND "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version + ERROR_QUIET + OUTPUT_VARIABLE LD_VERSION) if(NOT "${LD_VERSION}" MATCHES "GNU gold") - message(WARNING "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off") + message( + WARNING + "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off" + ) set(USE_GOLD_LINKER OFF) else() message(STATUS "ld.gold is available, using it to link") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold") - set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold") + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold") + set(CMAKE_MODULE_LINKER_FLAGS + "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold") endif() endif() endif() append_cxx_flag_if_supported("-Wno-error=pedantic" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-override" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-destructor-override" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-override" + CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported( + "-Wno-error=inconsistent-missing-destructor-override" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-invalid-partial-specialization" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-invalid-partial-specialization" + CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" + CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS) if(${USE_COLORIZE_OUTPUT}) - # Why compiler checks are necessary even when `try_compile` is used - # Because of the bug in ccache that can incorrectly identify `-fcolor-diagnostics` - # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for older ccache) - # and https://github.com/ccache/ccache/issues/1275 (for newer ones) + # Why compiler checks are necessary even when `try_compile` is used Because + # of the bug in ccache that can incorrectly identify `-fcolor-diagnostics` + # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for + # older ccache) and https://github.com/ccache/ccache/issues/1275 (for newer + # ones) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS) else() @@ -948,8 +1079,8 @@ else() add_compile_definitions(_UCRT_LEGACY_INFINITY) # disable min/max macros add_compile_definitions(NOMINMAX) - # Turn off these warnings on Windows. - # destructor was implicitly defined as delete + # Turn off these warnings on Windows. destructor was implicitly defined as + # delete append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS) # unknown pragma append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS) @@ -969,10 +1100,10 @@ else() append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS) endif() - if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") include(CheckCSourceCompiles) - check_c_source_compiles("#include + check_c_source_compiles( + "#include int main() { float a[] = {1.0, 1.0}; float32x4x2_t v; @@ -980,7 +1111,8 @@ int main() { v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL)); vst1q_f32_x2(a, v); return 0; -}" HAS_VST1) +}" + HAS_VST1) if(NOT HAS_VST1) string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1") @@ -989,47 +1121,60 @@ endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") include(CheckCSourceCompiles) - check_c_source_compiles("#include + check_c_source_compiles( + "#include int main() { float a[] = {1.0, 1.0}; vld1q_f32_x2(a); return 0; -}" HAS_VLD1) +}" + HAS_VLD1) if(NOT HAS_VLD1) string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1") endif() endif() - # Add code coverage flags to supported compilers if(USE_CPP_CODE_COVERAGE) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path") - string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path") + string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path") + string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path") elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping") - string(APPEND CMAKE_CXX_FLAGS " -fprofile-instr-generate -fcoverage-mapping") + string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping") + string(APPEND CMAKE_CXX_FLAGS + " -fprofile-instr-generate -fcoverage-mapping") else() - message(ERROR "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported") + message( + ERROR + "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported") endif() endif() if(APPLE) - if(USE_MPS) - string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc") - string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS") - string(APPEND CMAKE_SHARED_LINKER_FLAGS " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal") - # To suppress MPSGraph availability warnings - append_cxx_flag_if_supported("-Wno-unguarded-availability-new" CMAKE_OBJCXX_FLAGS) - endif() - append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) + if(USE_MPS) + string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc") + string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS") + string( + APPEND + CMAKE_SHARED_LINKER_FLAGS + " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal" + ) + # To suppress MPSGraph availability warnings + append_cxx_flag_if_supported("-Wno-unguarded-availability-new" + CMAKE_OBJCXX_FLAGS) + endif() + append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) endif() if(EMSCRIPTEN) - string(APPEND CMAKE_CXX_FLAGS " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0") + string( + APPEND + CMAKE_CXX_FLAGS + " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0" + ) endif() append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS) @@ -1048,15 +1193,13 @@ if(NOT APPLE AND UNIX) list(APPEND Caffe2_DEPENDENCY_LIBS dl) endif() -# Prefix path to Caffe2 headers. -# If a directory containing installed Caffe2 headers was inadvertently -# added to the list of include directories, prefixing +# Prefix path to Caffe2 headers. If a directory containing installed Caffe2 +# headers was inadvertently added to the list of include directories, prefixing # PROJECT_SOURCE_DIR means this source tree always takes precedence. include_directories(BEFORE ${PROJECT_SOURCE_DIR}) -# Prefix path to generated Caffe2 headers. -# These need to take precedence over their empty counterparts located -# in PROJECT_SOURCE_DIR. +# Prefix path to generated Caffe2 headers. These need to take precedence over +# their empty counterparts located in PROJECT_SOURCE_DIR. include_directories(BEFORE ${PROJECT_BINARY_DIR}) include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/) @@ -1096,107 +1239,108 @@ if(BUILD_DOCS) configure_file(${DOXYGEN_C_IN} ${DOXYGEN_C_OUT} @ONLY) configure_file(${DOXYGEN_P_IN} ${DOXYGEN_P_OUT} @ONLY) - add_custom_target(doc_doxygen_c ALL - COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Generating C++ API documentation with Doxygen" - VERBATIM) + add_custom_target( + doc_doxygen_c ALL + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating C++ API documentation with Doxygen" + VERBATIM) - add_custom_target(doc_doxygen_python ALL - COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Generating Python API documentation with Doxygen" - VERBATIM) + add_custom_target( + doc_doxygen_python ALL + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating Python API documentation with Doxygen" + VERBATIM) else() - message(FATAL_ERROR "Doxygen needs to be installed to generate the documentation") + message( + FATAL_ERROR "Doxygen needs to be installed to generate the documentation") endif() endif() -# ---[ CMake related files -# Uninistall option. +# ---[ CMake related files Uninistall option. if(NOT TARGET caffe2_uninstall) configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake - IMMEDIATE @ONLY) + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake IMMEDIATE @ONLY) - add_custom_target(caffe2_uninstall - COMMAND ${CMAKE_COMMAND} -P - ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) + add_custom_target( + caffe2_uninstall COMMAND ${CMAKE_COMMAND} -P + ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) endif() -# ---[ Make configuration files for cmake to allow dependent libraries -# easier access to Caffe2. +# ---[ Make configuration files for cmake to allow dependent libraries easier +# access to Caffe2. -if((NOT USE_GLOG) OR (NOT USE_GFLAGS) OR BUILD_CUSTOM_PROTOBUF) - message(WARNING - "Generated cmake files are only fully tested if one builds " - "with system glog, gflags, and protobuf. Other settings may " - "generate files that are not well tested.") +if((NOT USE_GLOG) + OR(NOT USE_GFLAGS) + OR BUILD_CUSTOM_PROTOBUF) + message(WARNING "Generated cmake files are only fully tested if one builds " + "with system glog, gflags, and protobuf. Other settings may " + "generate files that are not well tested.") endif() if(USE_CUDA OR USE_ROCM) - # TODO: check if we should include other cuda dependency libraries - # to the interface as well. + # TODO: check if we should include other cuda dependency libraries to the + # interface as well. endif() -# Note(jiayq): when building static libraries, all PRIVATE dependencies -# will also become interface libraries, and as a result if there are any -# dependency libraries that are not exported, the following install export -# script will fail. As a result, we will only provide the targets cmake -# files for shared lib installation. For more info, read: +# Note(jiayq): when building static libraries, all PRIVATE dependencies will +# also become interface libraries, and as a result if there are any dependency +# libraries that are not exported, the following install export script will +# fail. As a result, we will only provide the targets cmake files for shared lib +# installation. For more info, read: # https://cmake.org/pipermail/cmake/2016-May/063400.html if(BUILD_SHARED_LIBS) - configure_file( - ${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in - ${PROJECT_BINARY_DIR}/Caffe2Config.cmake - @ONLY) - install(FILES - ${PROJECT_BINARY_DIR}/Caffe2Config.cmake + configure_file(${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in + ${PROJECT_BINARY_DIR}/Caffe2Config.cmake @ONLY) + install( + FILES ${PROJECT_BINARY_DIR}/Caffe2Config.cmake + DESTINATION share/cmake/Caffe2 + COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake + ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake + DESTINATION share/cmake/Caffe2/public + COMPONENT dev) + install( + DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) + if(NOT BUILD_LIBTORCHLESS) + install( + EXPORT Caffe2Targets DESTINATION share/cmake/Caffe2 - COMPONENT dev) - install(FILES - ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake - ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake - DESTINATION share/cmake/Caffe2/public - COMPONENT dev) - install(DIRECTORY - ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix - DESTINATION share/cmake/Caffe2/ - COMPONENT dev) - install(FILES - ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake - DESTINATION share/cmake/Caffe2/ - COMPONENT dev) - install(FILES - ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake - DESTINATION share/cmake/Caffe2/ - COMPONENT dev) - install(FILES - ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake - DESTINATION share/cmake/Caffe2/ - COMPONENT dev) - - install(EXPORT Caffe2Targets DESTINATION share/cmake/Caffe2 FILE Caffe2Targets.cmake COMPONENT dev) + endif() else() - message(WARNING - "Generated cmake files are only available when building " - "shared libs.") + message(WARNING "Generated cmake files are only available when building " + "shared libs.") endif() -# ---[ Binaries -# Binaries will be built after the Caffe2 main libraries and the modules -# are built. For the binaries, they will be linked to the Caffe2 main +# ---[ Binaries Binaries will be built after the Caffe2 main libraries and the +# modules are built. For the binaries, they will be linked to the Caffe2 main # libraries, as well as all the modules that are built with Caffe2 (the ones # built in the previous Modules section above). if(BUILD_BINARY) @@ -1222,29 +1366,38 @@ endif() # Parse custom debug info if(DEFINED USE_CUSTOM_DEBINFO) - string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}") - message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}") + string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}") + message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}") - string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}") + string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}") - # Set the COMPILE_FLAGS property for each source file - foreach(SOURCE_FILE ${SOURCE_FILES_LIST}) - # We have to specify the scope here. We do this by specifying the - # targets we care about and caffe2/ for all test targets defined there - set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch") - set_source_files_properties(${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS} PROPERTIES COMPILE_FLAGS "-g") - endforeach() + # Set the COMPILE_FLAGS property for each source file + foreach(SOURCE_FILE ${SOURCE_FILES_LIST}) + # We have to specify the scope here. We do this by specifying the targets we + # care about and caffe2/ for all test targets defined there + if(BUILD_LIBTORCHLESS) + set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}") + else() + # @todo test if we can remove this + set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch") + endif() + set_source_files_properties( + ${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS} + PROPERTIES COMPILE_FLAGS "-g") + endforeach() - # Link everything with debug info when any file is in debug mode - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g") + # Link everything with debug info when any file is in debug mode + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g") endif() # Bundle PTXAS if needed if(BUILD_BUNDLE_PTXAS AND USE_CUDA) - if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas") - message(STATUS "Copying PTXAS into the bin folder") - file(COPY "${CUDAToolkit_BIN_DIR}/ptxas" DESTINATION "${PROJECT_BINARY_DIR}") - endif() - install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR}") + if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas") + message(STATUS "Copying PTXAS into the bin folder") + file(COPY "${CUDAToolkit_BIN_DIR}/ptxas" + DESTINATION "${PROJECT_BINARY_DIR}") + endif() + install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" + DESTINATION "${CMAKE_INSTALL_BINDIR}") endif() diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt index 1f742f4c176..82eb9690383 100644 --- a/c10/CMakeLists.txt +++ b/c10/CMakeLists.txt @@ -12,120 +12,128 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # protobuf header files, because protobuf header files will transitively force # one to link against a specific protobuf version. -# ---[ Configure macro file. -set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in -set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in -set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in -set(C10_USE_NUMA ${USE_NUMA}) -set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) -set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) -configure_file( - ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in - ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h) - -# Note: if you want to add ANY dependency to the c10 library, make sure you -# check with the core PyTorch developers as the dependency will be -# transitively passed on to all libraries dependent on PyTorch. -file(GLOB C10_SRCS - *.cpp - core/*.cpp - core/impl/*.cpp - mobile/*.cpp - macros/*.cpp - util/*.cpp - ) -file(GLOB C10_HEADERS - *.h - core/*.h - core/impl/*.h - mobile/*.h - macros/*.h - util/*.h - ) -add_library(c10 ${C10_SRCS} ${C10_HEADERS}) -target_compile_options_if_supported(c10 "-Wdeprecated") -if(HAVE_SOVERSION) - set_target_properties(c10 PROPERTIES - VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) -endif() -# If building shared library, set dllimport/dllexport proper. -target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB") -# Enable hidden visibility if compiler supports it. -if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) - target_compile_options(c10 PRIVATE "-fvisibility=hidden") +if(BUILD_LIBTORCHLESS) + find_library(C10_LIB c10 PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) +else() + set(C10_LIB c10) endif() -option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF) -if(C10_USE_IWYU) - find_program(iwyu NAMES include-what-you-use) - if(iwyu) - set(iwyu_cmd - "include-what-you-use" - "-Xiwyu" - "--transitive_includes_only" - "-Xiwyu" - "--no_fwd_decls" - "-Xiwyu" - "--prefix_header_includes=keep" - "-Xiwyu" - "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp" - ) - set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd}) + # ---[ Configure macro file. + set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in + set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in + set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in + set(C10_USE_NUMA ${USE_NUMA}) + set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) + set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) + configure_file( + ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in + ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h) + + # Note: if you want to add ANY dependency to the c10 library, make sure you + # check with the core PyTorch developers as the dependency will be + # transitively passed on to all libraries dependent on PyTorch. + file(GLOB C10_SRCS + *.cpp + core/*.cpp + core/impl/*.cpp + mobile/*.cpp + macros/*.cpp + util/*.cpp + ) + file(GLOB C10_HEADERS + *.h + core/*.h + core/impl/*.h + mobile/*.h + macros/*.h + util/*.h + ) +if(NOT BUILD_LIBTORCHLESS) + add_library(c10 ${C10_SRCS} ${C10_HEADERS}) + target_compile_options_if_supported(c10 "-Wdeprecated") + if(HAVE_SOVERSION) + set_target_properties(c10 PROPERTIES + VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) + endif() + # If building shared library, set dllimport/dllexport proper. + target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB") + # Enable hidden visibility if compiler supports it. + if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) + target_compile_options(c10 PRIVATE "-fvisibility=hidden") endif() -endif() -if(WERROR) - target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare") - target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow") -endif() + option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF) + if(C10_USE_IWYU) + find_program(iwyu NAMES include-what-you-use) + if(iwyu) + set(iwyu_cmd + "include-what-you-use" + "-Xiwyu" + "--transitive_includes_only" + "-Xiwyu" + "--no_fwd_decls" + "-Xiwyu" + "--prefix_header_includes=keep" + "-Xiwyu" + "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp" + ) + set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd}) + endif() + endif() -# ---[ Dependency of c10 -if(C10_USE_GFLAGS) - target_link_libraries(c10 PUBLIC gflags) -endif() + if(WERROR) + target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare") + target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow") + endif() -if(C10_USE_GLOG) - target_link_libraries(c10 PUBLIC glog::glog) -endif() -target_link_libraries(c10 PRIVATE fmt::fmt-header-only) + # ---[ Dependency of c10 + if(C10_USE_GFLAGS) + target_link_libraries(c10 PUBLIC gflags) + endif() -if(C10_USE_NUMA) - message(STATUS "NUMA paths:") - message(STATUS ${Numa_INCLUDE_DIR}) - message(STATUS ${Numa_LIBRARIES}) - target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR}) - target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES}) -else() - message(STATUS "don't use NUMA") -endif() + if(C10_USE_GLOG) + target_link_libraries(c10 PUBLIC glog::glog) + endif() + target_link_libraries(c10 PRIVATE fmt::fmt-header-only) -if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") - target_link_libraries(c10 PRIVATE cpuinfo) -endif() + if(C10_USE_NUMA) + message(STATUS "NUMA paths:") + message(STATUS ${Numa_INCLUDE_DIR}) + message(STATUS ${Numa_LIBRARIES}) + target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR}) + target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES}) + else() + message(STATUS "don't use NUMA") + endif() -find_package(Backtrace) -if(Backtrace_FOUND) - target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS}) - target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES}) - target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1) -else() - target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0) -endif() + if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") + target_link_libraries(c10 PRIVATE cpuinfo) + endif() -if(USE_MIMALLOC) - target_link_libraries(c10 PRIVATE "mimalloc-static") - add_dependencies(c10 mimalloc-static) -endif() + find_package(Backtrace) + if(Backtrace_FOUND) + target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS}) + target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES}) + target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1) + else() + target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0) + endif() -if(ANDROID) - target_link_libraries(c10 PRIVATE log) -endif() + if(USE_MIMALLOC) + target_link_libraries(c10 PRIVATE "mimalloc-static") + add_dependencies(c10 mimalloc-static) + endif() -target_include_directories( - c10 PUBLIC - $ - $ - $) + if(ANDROID) + target_link_libraries(c10 PRIVATE log) + endif() + + target_include_directories( + c10 PUBLIC + $ + $ + $) +endif() add_subdirectory(test) add_subdirectory(benchmark) @@ -144,11 +152,14 @@ if(USE_XPU) add_subdirectory(xpu) endif() -# ---[ Installation -# Note: for now, we will put all export path into one single Caffe2Targets group -# to deal with the cmake deployment need. Inside the Caffe2Targets set, the -# individual libraries like libc10.so and libcaffe2.so are still self-contained. -install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib) +if(NOT BUILD_LIBTORCHLESS) + # ---[ Installation + # Note: for now, we will put all export path into one single Caffe2Targets group + # to deal with the cmake deployment need. Inside the Caffe2Targets set, the + # individual libraries like libc10.so and libcaffe2.so are still self-contained. + install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib) +endif() + install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") diff --git a/c10/benchmark/CMakeLists.txt b/c10/benchmark/CMakeLists.txt index 9658e603761..16b268e3800 100644 --- a/c10/benchmark/CMakeLists.txt +++ b/c10/benchmark/CMakeLists.txt @@ -6,7 +6,7 @@ if(BUILD_TEST) get_filename_component(bench_file_name ${bench_src} NAME_WE) set(bench_name "c10_${bench_file_name}") add_executable(${bench_name} "${bench_src}") - target_link_libraries(${bench_name} c10 benchmark) + target_link_libraries(${bench_name} ${C10_LIB} benchmark) if(INSTALL_TEST) install(TARGETS ${bench_name} DESTINATION test) endif() diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt index c5c45c68d8f..893a8556297 100644 --- a/c10/cuda/CMakeLists.txt +++ b/c10/cuda/CMakeLists.txt @@ -12,6 +12,10 @@ configure_file( ${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in ${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h) +if(BUILD_LIBTORCHLESS) + find_library(C10_CUDA_LIB c10_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) +endif() + # Note: if you want to add ANY dependency to the c10 library, make sure you # check with the core PyTorch developers as the dependency will be # transitively passed on to all libraries dependent on PyTorch. @@ -47,36 +51,42 @@ set(C10_CUDA_HEADERS impl/CUDATest.h ) set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) -torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS}) -set(CUDA_LINK_LIBRARIES_KEYWORD) -# If building shared library, set dllimport/dllexport proper. -target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB") -# Enable hidden visibility if compiler supports it. -if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) - target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden") -endif() -# ---[ Dependency of c10_cuda -target_link_libraries(c10_cuda PUBLIC c10 torch::cudart) +if(NOT BUILD_LIBTORCHLESS) + torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS}) + set(CUDA_LINK_LIBRARIES_KEYWORD) + # If building shared library, set dllimport/dllexport proper. + target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB") + # Enable hidden visibility if compiler supports it. + if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) + target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden") + endif() -if(NOT WIN32) -target_link_libraries(c10_cuda PRIVATE dl) -target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED") -endif() + # ---[ Dependency of c10_cuda + target_link_libraries(c10_cuda PUBLIC ${C10_LIB} torch::cudart) -target_include_directories( - c10_cuda PUBLIC - $ - $ - $) + if(NOT WIN32) + target_link_libraries(c10_cuda PRIVATE dl) + target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED") + endif() -add_subdirectory(test) + target_include_directories( + c10_cuda PUBLIC + $ + $ + $) + set(C10_CUDA_LIB c10_cuda) # ---[ Installation # Note: for now, we will put all export path into one single Caffe2Targets group # to deal with the cmake deployment need. Inside the Caffe2Targets set, the # individual libraries like libc10.so and libcaffe2.so are still self-contained. install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib) + +endif() + +add_subdirectory(test) + foreach(file ${C10_CUDA_HEADERS}) get_filename_component( dir ${file} DIRECTORY ) install( FILES ${file} DESTINATION include/c10/cuda/${dir} ) diff --git a/c10/cuda/test/CMakeLists.txt b/c10/cuda/test/CMakeLists.txt index eed7fdff42c..7a93087f5ee 100644 --- a/c10/cuda/test/CMakeLists.txt +++ b/c10/cuda/test/CMakeLists.txt @@ -15,7 +15,7 @@ if(BUILD_TEST) get_filename_component(test_file_name ${test_src} NAME_WE) set(test_name "c10_cuda_${test_file_name}") add_executable(${test_name} "${test_src}") - target_link_libraries(${test_name} c10_cuda gtest_main) + target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gtest_main) add_test(NAME ${test_name} COMMAND $) if(INSTALL_TEST) install(TARGETS ${test_name} DESTINATION test) diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt index f4273ca2055..a6442e01d2e 100644 --- a/c10/hip/CMakeLists.txt +++ b/c10/hip/CMakeLists.txt @@ -5,6 +5,10 @@ include(../../cmake/public/utils.cmake) +if(BUILD_LIBTORCHLESS) + find_library(C10_HIP_LIB c10_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) +endif() + # ---[ Configure macro file. set(C10_HIP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in configure_file( @@ -26,36 +30,40 @@ file(GLOB __c10_hip_srcs_cpp *.cc impl/*.cc) set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) file(GLOB_RECURSE C10_HIP_HEADERS *.h) -hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS}) -# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake -target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS}) +if(NOT BUILD_LIBTORCHLESS) + hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS}) -# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be -# minimal. I'm not sure if we need hip_hcc or not; for now leave it out + # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake + target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS}) -# If building shared library, set dllimport/dllexport proper. -target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB") -# Enable hidden visibility if compiler supports it. -if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) - target_compile_options(c10_hip PRIVATE "-fvisibility=hidden") + # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be + # minimal. I'm not sure if we need hip_hcc or not; for now leave it out + + # If building shared library, set dllimport/dllexport proper. + target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB") + # Enable hidden visibility if compiler supports it. + if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) + target_compile_options(c10_hip PRIVATE "-fvisibility=hidden") + endif() + + # ---[ Dependency of c10_hip + target_link_libraries(c10_hip PUBLIC c10) + + target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES}) + + target_include_directories( + c10_hip PUBLIC + $ + $ + $) + install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib) + set(C10_HIP_LIB c10_hip) endif() -# ---[ Dependency of c10_hip -target_link_libraries(c10_hip PUBLIC c10) - -target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES}) - -target_include_directories( - c10_hip PUBLIC - $ - $ - $) - add_subdirectory(test) # ---[ Installation -install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") diff --git a/c10/test/CMakeLists.txt b/c10/test/CMakeLists.txt index a1ca4bb51b7..7f2a61246c6 100644 --- a/c10/test/CMakeLists.txt +++ b/c10/test/CMakeLists.txt @@ -9,7 +9,7 @@ if(BUILD_TEST) if(NOT MSVC) target_compile_options(${test_name} PRIVATE -Wno-unused-variable) endif() - target_link_libraries(${test_name} c10 gmock gtest gtest_main) + target_link_libraries(${test_name} ${C10_LIB} gmock gtest gtest_main) add_test(NAME ${test_name} COMMAND $) if(INSTALL_TEST) install(TARGETS ${test_name} DESTINATION test) diff --git a/c10/xpu/CMakeLists.txt b/c10/xpu/CMakeLists.txt index c14f1790d9d..d06d0f0aa92 100644 --- a/c10/xpu/CMakeLists.txt +++ b/c10/xpu/CMakeLists.txt @@ -4,6 +4,10 @@ include(../../cmake/public/xpu.cmake) +if(NOT BUILD_LIBTORCHLESS) + find_library(C10_XPU_LIB c10_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) +endif() + set(C10_XPU_SRCS XPUCachingAllocator.cpp XPUFunctions.cpp @@ -19,7 +23,7 @@ set(C10_XPU_HEADERS XPUStream.h impl/XPUGuardImpl.h ) - +if(NOT BUILD_LIBTORCHLESS) add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS}) target_compile_options(c10_xpu PRIVATE "-DC10_XPU_BUILD_MAIN_LIB") # Enable hidden visibility if compiler supports it. @@ -35,11 +39,13 @@ target_include_directories( $ $ ) - -add_subdirectory(test) + install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib) + set(C10_XPU_LIB c10_xpu) + add_subdirectory(test) +endif() # ---[ Installation -install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib) + foreach(file ${C10_XPU_HEADERS}) get_filename_component(dir ${file} DIRECTORY) install(FILES ${file} DESTINATION include/c10/xpu/${dir}) diff --git a/c10/xpu/test/CMakeLists.txt b/c10/xpu/test/CMakeLists.txt index fa91cc9d171..0f0c85c68c8 100644 --- a/c10/xpu/test/CMakeLists.txt +++ b/c10/xpu/test/CMakeLists.txt @@ -11,7 +11,7 @@ if(BUILD_TEST) get_filename_component(test_file_name ${test_src} NAME_WE) set(test_name "c10_xpu_${test_file_name}") add_executable(${test_name} "${test_src}") - target_link_libraries(${test_name} c10_xpu gtest_main) + target_link_libraries(${test_name} ${C10_XPU_LIB} gtest_main) add_test(NAME ${test_name} COMMAND $) if(INSTALL_TEST) install(TARGETS ${test_name} DESTINATION test) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 5b8fce16a42..8366f065923 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -785,6 +785,32 @@ set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_ # END formerly-libtorch sources # ========================================================== +if(BUILD_LIBTORCHLESS) + find_library(TORCH_LIB torch PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) + find_library(TORCH_CPU_LIB torch_cpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) + + if(USE_CUDA) + find_library(TORCH_CUDA_LIB torch_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) + endif() + + if(USE_ROCM) + find_library(TORCH_HIP_LIB torch_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) + endif() + + if(USE_XPU) + find_library(TORCH_XPU_LIB torch_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) + endif() + add_subdirectory(../torch torch) +else() + set(TORCH_LIB torch) + set(TORCH_CPU_LIB torch_cpu) + set(TORCH_CUDA_LIB torch_cuda) + set(TORCH_HIP_LIB torch_hip) + set(TORCH_XPU_LIB torch_xpu) +endif() + + +if(NOT BUILD_LIBTORCHLESS) add_library(torch_cpu ${Caffe2_CPU_SRCS}) if(HAVE_SOVERSION) set_target_properties(torch_cpu PROPERTIES @@ -1622,6 +1648,7 @@ endif() # ---[ XPU library. if(USE_XPU) target_link_libraries(torch_xpu INTERFACE torch::xpurt) + target_link_libraries(torch_xpu PUBLIC c10_xpu) target_include_directories( @@ -1701,6 +1728,7 @@ if(USE_ROCM) # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added. target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS}) # experiment + target_link_libraries(torch_hip PUBLIC c10_hip) if(NOT INTERN_BUILD_MOBILE) @@ -1921,6 +1949,7 @@ if(MSVC) set_source_files_properties(${tmp_path} PROPERTIES COMPILE_FLAGS "-Xcompiler /Zc:lambda") endforeach() endif() +endif() # Note: we only install the caffe2 python files if BUILD_CAFFE2_OPS is ON # This is because the build rules here written in such a way that they always diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt index 1fa28c8aee8..bdfa4bfe455 100644 --- a/functorch/CMakeLists.txt +++ b/functorch/CMakeLists.txt @@ -16,7 +16,12 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_API_INCLUDE_EXTENSION_H target_compile_options(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS}) target_compile_options_if_supported(${PROJECT_NAME} "-Wmissing-prototypes") target_compile_options_if_supported(${PROJECT_NAME} "-Werror=missing-prototypes") -target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python) +if(BUILD_LIBTORCHLESS) + target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIB} torch_python) +else() + # functorch cannot use the alias to build on windows + target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python) +endif() target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11) set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY diff --git a/setup.py b/setup.py index 6c02c8207bc..62051612367 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ # Welcome to the PyTorch setup.py. -# # Environment variables you are probably interested in: # # DEBUG @@ -199,7 +198,15 @@ # # USE_PRIORITIZED_TEXT_FOR_LD # Uses prioritized text form cmake/prioritized_text.txt for LD +# +# BUILD_LIBTORCH_WHL +# Builds libtorch.so and its dependencies as a wheel +# +# BUILD_PYTHON_ONLY +# Builds pytorch as a wheel using libtorch.so from a seperate wheel +import os +import pkgutil import sys if sys.platform == "win32" and sys.maxsize.bit_length() == 31: @@ -210,6 +217,34 @@ if sys.platform == "win32" and sys.maxsize.bit_length() == 31: import platform + +def _get_package_path(package_name): + loader = pkgutil.find_loader(package_name) + if loader: + # The package might be a namespace package, so get_data may fail + try: + file_path = loader.get_filename() + return os.path.dirname(file_path) + except AttributeError: + pass + return None + + +BUILD_LIBTORCH_WHL = os.getenv("BUILD_LIBTORCH_WHL", "0") == "1" +BUILD_PYTHON_ONLY = os.getenv("BUILD_PYTHON_ONLY", "0") == "1" + + +# set up appropriate env variables +if BUILD_LIBTORCH_WHL: + # Set up environment variables for ONLY building libtorch.so and not libtorch_python.so + # functorch is not supported without python + os.environ["BUILD_FUNCTORCH"] = "OFF" + + +if BUILD_PYTHON_ONLY: + os.environ["BUILD_LIBTORCHLESS"] = "ON" + os.environ["LIBTORCH_LIB_PATH"] = f"{_get_package_path('libtorch')}/lib" + python_min_version = (3, 8, 0) python_min_version_str = ".".join(map(str, python_min_version)) if sys.version_info < python_min_version: @@ -222,7 +257,6 @@ import filecmp import glob import importlib import json -import os import shutil import subprocess import sysconfig @@ -314,7 +348,10 @@ cmake_python_include_dir = sysconfig.get_path("include") ################################################################################ # Version, create_version_file, and package_name ################################################################################ -package_name = os.getenv("TORCH_PACKAGE_NAME", "torch") + +DEFAULT_PACKAGE_NAME = "libtorch" if BUILD_LIBTORCH_WHL else "torch" + +package_name = os.getenv("TORCH_PACKAGE_NAME", DEFAULT_PACKAGE_NAME) package_type = os.getenv("PACKAGE_TYPE", "wheel") version = get_torch_version() report(f"Building wheel {package_name}-{version}") @@ -437,11 +474,12 @@ def build_deps(): check_submodules() check_pydep("yaml", "pyyaml") + build_python = not BUILD_LIBTORCH_WHL build_caffe2( version=version, cmake_python_library=cmake_python_library, - build_python=True, + build_python=build_python, rerun_cmake=RERUN_CMAKE, cmake_only=CMAKE_ONLY, cmake=cmake, @@ -698,6 +736,8 @@ class build_ext(setuptools.command.build_ext.build_ext): "caffe2.python.caffe2_pybind11_state_gpu", "caffe2.python.caffe2_pybind11_state_hip", ] + if BUILD_LIBTORCH_WHL: + caffe2_pybind_exts = [] i = 0 while i < len(self.extensions): ext = self.extensions[i] @@ -929,9 +969,14 @@ def configure_extension_build(): main_compile_args = [] main_libraries = ["torch_python"] + main_link_args = [] main_sources = ["torch/csrc/stub.c"] + if BUILD_LIBTORCH_WHL: + main_libraries = ["torch"] + main_sources = [] + if cmake_cache_vars["USE_CUDA"]: library_dirs.append(os.path.dirname(cmake_cache_vars["CUDA_CUDA_LIB"])) @@ -1053,7 +1098,6 @@ def configure_extension_build(): "default = torch.distributed.elastic.multiprocessing:DefaultLogsSpecs", ], } - return extensions, cmdclass, packages, entry_points, extra_install_requires @@ -1080,6 +1124,11 @@ def print_box(msg): def main(): + if BUILD_LIBTORCH_WHL and BUILD_PYTHON_ONLY: + raise RuntimeError( + "Conflict: 'BUILD_LIBTORCH_WHL' and 'BUILD_PYTHON_ONLY' can't both be 1. Set one to 0 and rerun." + ) + # the list of runtime dependencies required by this built package install_requires = [ "filelock", @@ -1091,6 +1140,9 @@ def main(): 'mkl>=2021.1.1,<=2021.4.0; platform_system == "Windows"', ] + if BUILD_PYTHON_ONLY: + install_requires.append("libtorch") + use_prioritized_text = str(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD", "")) if ( use_prioritized_text == "" @@ -1166,10 +1218,6 @@ def main(): "nn/parallel/*.pyi", "utils/data/*.pyi", "utils/data/datapipes/*.pyi", - "lib/*.so*", - "lib/*.dylib*", - "lib/*.dll", - "lib/*.lib", "lib/*.pdb", "lib/torch_shm_manager", "lib/*.h", @@ -1335,6 +1383,23 @@ def main(): "utils/model_dump/*.mjs", ] + if BUILD_PYTHON_ONLY: + torch_package_data.extend( + [ + "lib/libtorch_python*", + "lib/*shm*", + "lib/libtorch_global_deps*", + ] + ) + else: + torch_package_data.extend( + [ + "lib/*.so*", + "lib/*.dylib*", + "lib/*.dll", + "lib/*.lib", + ] + ) if get_cmake_cache_vars()["BUILD_CAFFE2"]: torch_package_data.extend( [ @@ -1377,6 +1442,29 @@ def main(): "packaged/autograd/*", "packaged/autograd/templates/*", ] + + if BUILD_LIBTORCH_WHL: + modified_packages = [] + for package in packages: + parts = package.split(".") + if parts[0] == "torch": + modified_packages.append(DEFAULT_PACKAGE_NAME + package[len("torch") :]) + packages = modified_packages + package_dir = {"libtorch": "torch"} + torch_package_dir_name = "libtorch" + package_data = {"libtorch": torch_package_data} + extensions = [] + else: + torch_package_dir_name = "torch" + package_dir = {} + package_data = { + "torch": torch_package_data, + "torchgen": torchgen_package_data, + "caffe2": [ + "python/serialized_test/data/operator_test/*.zip", + ], + } + setup( name=package_name, version=version, @@ -1392,13 +1480,8 @@ def main(): entry_points=entry_points, install_requires=install_requires, extras_require=extras_require, - package_data={ - "torch": torch_package_data, - "torchgen": torchgen_package_data, - "caffe2": [ - "python/serialized_test/data/operator_test/*.zip", - ], - }, + package_data=package_data, + package_dir=package_dir, url="https://pytorch.org/", download_url="https://github.com/pytorch/pytorch/tags", author="PyTorch Team", diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 8f879a8ecc7..3a3cf23495b 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -296,6 +296,8 @@ endif() add_library(torch_python SHARED ${TORCH_PYTHON_SRCS}) +add_dependencies(torch_python Caffe2_PROTO) +add_dependencies(torch_python onnx_proto) # Avoid numpy for the DEPLOY build if(USE_NUMPY) target_link_libraries(torch_python PRIVATE numpy::numpy) @@ -344,7 +346,7 @@ endif() target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB") -target_link_libraries(torch_python PRIVATE torch_library ${TORCH_PYTHON_LINK_LIBRARIES}) +target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES}) target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS}) @@ -359,7 +361,9 @@ endif() if(BUILD_ONEDNN_GRAPH) target_compile_definitions(torch_python PRIVATE "-DBUILD_ONEDNN_GRAPH") - target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH") + if(NOT BUILD_LIBTORCHLESS) + target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH") + endif() endif() if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "") @@ -401,7 +405,11 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") ${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp ) # Pybind11 requires explicit linking of the torch_python library - target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11) + if(BUILD_LIBTORCHLESS) + target_link_libraries(nnapi_backend PRIVATE ${TORCH_LIB} torch_python pybind::pybind11) + else() + target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11) + endif() endif() set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE) diff --git a/torch/lib/libshm/CMakeLists.txt b/torch/lib/libshm/CMakeLists.txt index a3b41d0a0b0..8a7329ddab7 100644 --- a/torch/lib/libshm/CMakeLists.txt +++ b/torch/lib/libshm/CMakeLists.txt @@ -22,7 +22,7 @@ set_target_properties(shm PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib" CXX_STANDARD 17) -target_link_libraries(shm PRIVATE torch_cpu) +target_link_libraries(shm PRIVATE ${TORCH_CPU_LIB}) if(UNIX AND NOT APPLE) include(CheckLibraryExists) @@ -60,7 +60,12 @@ if(UNIX AND NOT APPLE) endif() add_executable(torch_shm_manager manager.cpp) -target_link_libraries(torch_shm_manager PRIVATE shm c10) +if(BUILD_LIBTORCHLESS) + target_link_libraries(torch_shm_manager PRIVATE shm ${C10_LIB}) +else() + # we need to link directly to c10 here otherwise we miss symbols + target_link_libraries(torch_shm_manager PRIVATE shm c10) +endif() set_target_properties(torch_shm_manager PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../lib")